summaryrefslogtreecommitdiff
path: root/ot
diff options
context:
space:
mode:
authortvayer <titouan.vayer@gmail.com>2019-05-29 14:24:05 +0200
committertvayer <titouan.vayer@gmail.com>2019-05-29 14:24:05 +0200
commit63bbeb34e48f02c97a762dab5232158d90a5cffc (patch)
tree853026b5854b6e4b01fdf750db139985b3dd596f /ot
parentf70aabfcc11f92181e0dc987b341bad8ec030d75 (diff)
parentf66ab58c7c895011fd37bafd3e848828399c56c4 (diff)
Merge remote-tracking branch 'rflamary/master'
merge pot
Diffstat (limited to 'ot')
-rw-r--r--ot/__init__.py4
-rw-r--r--ot/bregman.py611
-rw-r--r--ot/da.py407
-rw-r--r--ot/datasets.py4
-rw-r--r--ot/externals/funcsigs.py46
-rw-r--r--ot/gpu/__init__.py33
-rw-r--r--ot/gpu/bregman.py148
-rw-r--r--ot/gpu/da.py213
-rw-r--r--ot/gpu/utils.py101
-rw-r--r--ot/gromov.py2
-rw-r--r--ot/lp/__init__.py95
-rw-r--r--ot/lp/cvx.py3
-rw-r--r--ot/stochastic.py736
-rw-r--r--ot/utils.py31
14 files changed, 1822 insertions, 612 deletions
diff --git a/ot/__init__.py b/ot/__init__.py
index 995ce33..b74b924 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -19,7 +19,7 @@ from . import datasets
from . import da
from . import gromov
from . import smooth
-
+from . import stochastic
# OT functions
from .lp import emd, emd2
@@ -29,7 +29,7 @@ from .da import sinkhorn_lpl1_mm
# utils functions
from .utils import dist, unif, tic, toc, toq
-__version__ = "0.4.0"
+__version__ = "0.5.1"
__all__ = ["emd", "emd2", "sinkhorn", "sinkhorn2", "utils", 'datasets',
'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov',
diff --git a/ot/bregman.py b/ot/bregman.py
index ffa6202..321712b 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -11,6 +11,7 @@ Bregman projections for regularized OT
# License: MIT License
import numpy as np
+from .utils import unif, dist
def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
@@ -49,7 +50,7 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
reg : float
Regularization term >0
method : str
- method used for the solver either 'sinkhorn', 'sinkhorn_stabilized' or
+ method used for the solver either 'sinkhorn', 'greenkhorn', 'sinkhorn_stabilized' or
'sinkhorn_epsilon_scaling', see those function for specific parameters
numItermax : int, optional
Max number of iterations
@@ -105,6 +106,10 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
def sink():
return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax,
stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ elif method.lower() == 'greenkhorn':
+ def sink():
+ return greenkhorn(a, b, M, reg, numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose, log=log)
elif method.lower() == 'sinkhorn_stabilized':
def sink():
return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax,
@@ -118,7 +123,8 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
print('Warning : unknown method using classic Sinkhorn Knopp')
def sink():
- return sinkhorn_knopp(a, b, M, reg, **kwargs)
+ return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose, log=log, **kwargs)
return sink()
@@ -199,6 +205,8 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
.. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+ [21] Altschuler J., Weed J., Rigollet P. : Near-linear time approximation algorithms for optimal transport via Sinkhorn iteration, Advances in Neural Information Processing Systems (NIPS) 31, 2017
+
See Also
@@ -206,6 +214,7 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
ot.lp.emd : Unregularized OT
ot.optim.cg : General regularized OT
ot.bregman.sinkhorn_knopp : Classic Sinkhorn [2]
+ ot.bregman.greenkhorn : Greenkhorn [21]
ot.bregman.sinkhorn_stabilized: Stabilized sinkhorn [9][10]
ot.bregman.sinkhorn_epsilon_scaling: Sinkhorn with epslilon scaling [9][10]
@@ -346,8 +355,13 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
# print(reg)
- K = np.exp(-M / reg)
+ # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
+ K = np.empty(M.shape, dtype=M.dtype)
+ np.divide(M, -reg, out=K)
+ np.exp(K, out=K)
+
# print(np.min(K))
+ tmp2 = np.empty(b.shape, dtype=M.dtype)
Kp = (1 / a).reshape(-1, 1) * K
cpt = 0
@@ -355,13 +369,14 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
while (err > stopThr and cpt < numItermax):
uprev = u
vprev = v
+
KtransposeU = np.dot(K.T, u)
v = np.divide(b, KtransposeU)
u = 1. / np.dot(Kp, v)
- if (np.any(KtransposeU == 0) or
- np.any(np.isnan(u)) or np.any(np.isnan(v)) or
- np.any(np.isinf(u)) or np.any(np.isinf(v))):
+ if (np.any(KtransposeU == 0)
+ or np.any(np.isnan(u)) or np.any(np.isnan(v))
+ or np.any(np.isinf(u)) or np.any(np.isinf(v))):
# we have reached the machine precision
# come back to previous solution and quit loop
print('Warning: numerical errors at iteration', cpt)
@@ -375,8 +390,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
np.sum((v - vprev)**2) / np.sum((v)**2)
else:
- transp = u.reshape(-1, 1) * (K * v)
- err = np.linalg.norm((np.sum(transp, axis=0) - b))**2
+ # compute right marginal tmp2= (diag(u)Kdiag(v))^T1
+ np.einsum('i,ij,j->j', u, K, v, out=tmp2)
+ err = np.linalg.norm(tmp2 - b)**2 # violation of marginal
if log:
log['err'].append(err)
@@ -391,10 +407,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
log['v'] = v
if nbb: # return only loss
- res = np.zeros((nbb))
- for i in range(nbb):
- res[i] = np.sum(
- u[:, i].reshape((-1, 1)) * K * v[:, i].reshape((1, -1)) * M)
+ res = np.einsum('ik,ij,jk,ij->k', u, K, v, M)
if log:
return res, log
else:
@@ -408,6 +421,159 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
return u.reshape((-1, 1)) * K * v.reshape((1, -1))
+def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=False):
+ """
+ Solve the entropic regularization optimal transport problem and return the OT matrix
+
+ The algorithm used is based on the paper
+
+ Near-linear time approximation algorithms for optimal transport via Sinkhorn iteration
+ by Jason Altschuler, Jonathan Weed, Philippe Rigollet
+ appeared at NIPS 2017
+
+ which is a stochastic version of the Sinkhorn-Knopp algorithm [2].
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+ s.t. \gamma 1 = a
+
+ \gamma^T 1= b
+
+ \gamma\geq 0
+ where :
+
+ - M is the (ns,nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights (sum to 1)
+
+
+
+ Parameters
+ ----------
+ a : np.ndarray (ns,)
+ samples weights in the source domain
+ b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+ samples in the target domain, compute sinkhorn with multiple targets
+ and fixed M if b is a matrix (return OT loss + dual variables in log)
+ M : np.ndarray (ns,nt)
+ loss matrix
+ reg : float
+ Regularization term >0
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ gamma : (ns x nt) ndarray
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary return only if log==True in parameters
+
+ Examples
+ --------
+
+ >>> import ot
+ >>> a=[.5,.5]
+ >>> b=[.5,.5]
+ >>> M=[[0.,1.],[1.,0.]]
+ >>> ot.bregman.greenkhorn(a,b,M,1)
+ array([[ 0.36552929, 0.13447071],
+ [ 0.13447071, 0.36552929]])
+
+
+ References
+ ----------
+
+ .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+ [22] J. Altschuler, J.Weed, P. Rigollet : Near-linear time approximation algorithms for optimal transport via Sinkhorn iteration, Advances in Neural Information Processing Systems (NIPS) 31, 2017
+
+
+ See Also
+ --------
+ ot.lp.emd : Unregularized OT
+ ot.optim.cg : General regularized OT
+
+ """
+
+ a = np.asarray(a, dtype=np.float64)
+ b = np.asarray(b, dtype=np.float64)
+ M = np.asarray(M, dtype=np.float64)
+
+ if len(a) == 0:
+ a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
+ if len(b) == 0:
+ b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
+
+ n = a.shape[0]
+ m = b.shape[0]
+
+ # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
+ K = np.empty_like(M)
+ np.divide(M, -reg, out=K)
+ np.exp(K, out=K)
+
+ u = np.full(n, 1. / n)
+ v = np.full(m, 1. / m)
+ G = u[:, np.newaxis] * K * v[np.newaxis, :]
+
+ viol = G.sum(1) - a
+ viol_2 = G.sum(0) - b
+ stopThr_val = 1
+
+ if log:
+ log = dict()
+ log['u'] = u
+ log['v'] = v
+
+ for i in range(numItermax):
+ i_1 = np.argmax(np.abs(viol))
+ i_2 = np.argmax(np.abs(viol_2))
+ m_viol_1 = np.abs(viol[i_1])
+ m_viol_2 = np.abs(viol_2[i_2])
+ stopThr_val = np.maximum(m_viol_1, m_viol_2)
+
+ if m_viol_1 > m_viol_2:
+ old_u = u[i_1]
+ u[i_1] = a[i_1] / (K[i_1, :].dot(v))
+ G[i_1, :] = u[i_1] * K[i_1, :] * v
+
+ viol[i_1] = u[i_1] * K[i_1, :].dot(v) - a[i_1]
+ viol_2 += (K[i_1, :].T * (u[i_1] - old_u) * v)
+
+ else:
+ old_v = v[i_2]
+ v[i_2] = b[i_2] / (K[:, i_2].T.dot(u))
+ G[:, i_2] = u * K[:, i_2] * v[i_2]
+ #aviol = (G@one_m - a)
+ #aviol_2 = (G.T@one_n - b)
+ viol += (-old_v + v[i_2]) * K[:, i_2] * u
+ viol_2[i_2] = v[i_2] * K[:, i_2].dot(u) - b[i_2]
+
+ #print('b',np.max(abs(aviol -viol)),np.max(abs(aviol_2 - viol_2)))
+
+ if stopThr_val <= stopThr:
+ break
+ else:
+ print('Warning: Algorithm did not converge')
+
+ if log:
+ log['u'] = u
+ log['v'] = v
+
+ if log:
+ return G, log
+ else:
+ return G
+
+
def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
warmstart=None, verbose=False, print_period=20, log=False, **kwargs):
"""
@@ -532,13 +698,13 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
def get_K(alpha, beta):
"""log space computation"""
- return np.exp(-(M - alpha.reshape((na, 1)) -
- beta.reshape((1, nb))) / reg)
+ return np.exp(-(M - alpha.reshape((na, 1))
+ - beta.reshape((1, nb))) / reg)
def get_Gamma(alpha, beta, u, v):
"""log space gamma computation"""
- return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) /
- reg + np.log(u.reshape((na, 1))) + np.log(v.reshape((1, nb))))
+ return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb)))
+ / reg + np.log(u.reshape((na, 1))) + np.log(v.reshape((1, nb))))
# print(np.min(K))
@@ -748,8 +914,8 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
def get_K(alpha, beta):
"""log space computation"""
- return np.exp(-(M - alpha.reshape((na, 1)) -
- beta.reshape((1, nb))) / reg)
+ return np.exp(-(M - alpha.reshape((na, 1))
+ - beta.reshape((1, nb))) / reg)
# print(np.min(K))
def get_reg(n): # exponential decreasing
@@ -917,6 +1083,116 @@ def barycenter(A, M, reg, weights=None, numItermax=1000,
return geometricBar(weights, UKv)
+def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1e-9, stabThr=1e-30, verbose=False, log=False):
+ """Compute the entropic regularized wasserstein barycenter of distributions A
+ where A is a collection of 2D images.
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \mathbf{a} = arg\min_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
+
+ where :
+
+ - :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance (see ot.bregman.sinkhorn)
+ - :math:`\mathbf{a}_i` are training distributions (2D images) in the mast two dimensions of matrix :math:`\mathbf{A}`
+ - reg is the regularization strength scalar value
+
+ The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [21]_
+
+ Parameters
+ ----------
+ A : np.ndarray (n,w,h)
+ n distributions (2D images) of size w x h
+ reg : float
+ Regularization term >0
+ weights : np.ndarray (n,)
+ Weights of each image on the simplex (barycentric coodinates)
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ stabThr : float, optional
+ Stabilization threshold to avoid numerical precision issue
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ a : (w,h) ndarray
+ 2D Wasserstein barycenter
+ log : dict
+ log dictionary return only if log==True in parameters
+
+
+ References
+ ----------
+
+ .. [21] Solomon, J., De Goes, F., Peyré, G., Cuturi, M., Butscher, A., Nguyen, A. & Guibas, L. (2015).
+ Convolutional wasserstein distances: Efficient optimal transportation on geometric domains
+ ACM Transactions on Graphics (TOG), 34(4), 66
+
+
+ """
+
+ if weights is None:
+ weights = np.ones(A.shape[0]) / A.shape[0]
+ else:
+ assert(len(weights) == A.shape[0])
+
+ if log:
+ log = {'err': []}
+
+ b = np.zeros_like(A[0, :, :])
+ U = np.ones_like(A)
+ KV = np.ones_like(A)
+
+ cpt = 0
+ err = 1
+
+ # build the convolution operator
+ t = np.linspace(0, 1, A.shape[1])
+ [Y, X] = np.meshgrid(t, t)
+ xi1 = np.exp(-(X - Y)**2 / reg)
+
+ def K(x):
+ return np.dot(np.dot(xi1, x), xi1)
+
+ while (err > stopThr and cpt < numItermax):
+
+ bold = b
+ cpt = cpt + 1
+
+ b = np.zeros_like(A[0, :, :])
+ for r in range(A.shape[0]):
+ KV[r, :, :] = K(A[r, :, :] / np.maximum(stabThr, K(U[r, :, :])))
+ b += weights[r] * np.log(np.maximum(stabThr, U[r, :, :] * KV[r, :, :]))
+ b = np.exp(b)
+ for r in range(A.shape[0]):
+ U[r, :, :] = b / np.maximum(stabThr, KV[r, :, :])
+
+ if cpt % 10 == 1:
+ err = np.sum(np.abs(bold - b))
+ # log and verbose print
+ if log:
+ log['err'].append(err)
+
+ if verbose:
+ if cpt % 200 == 0:
+ print('{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
+
+ if log:
+ log['niter'] = cpt
+ log['U'] = U
+ return b, log
+ else:
+ return b
+
+
def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
stopThr=1e-3, verbose=False, log=False):
"""
@@ -1023,3 +1299,302 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
return np.sum(K0, axis=1), log
else:
return np.sum(K0, axis=1)
+
+
+def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+ '''
+ Solve the entropic regularization optimal transport problem and return the
+ OT matrix from empirical data
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+ s.t. \gamma 1 = a
+
+ \gamma^T 1= b
+
+ \gamma\geq 0
+ where :
+
+ - :math:`M` is the (ns,nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`a` and :math:`b` are source and target weights (sum to 1)
+
+
+ Parameters
+ ----------
+ X_s : np.ndarray (ns, d)
+ samples in the source domain
+ X_t : np.ndarray (nt, d)
+ samples in the target domain
+ reg : float
+ Regularization term >0
+ a : np.ndarray (ns,)
+ samples weights in the source domain
+ b : np.ndarray (nt,)
+ samples weights in the target domain
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ gamma : (ns x nt) ndarray
+ Regularized optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary return only if log==True in parameters
+
+ Examples
+ --------
+
+ >>> n_s = 2
+ >>> n_t = 2
+ >>> reg = 0.1
+ >>> X_s = np.reshape(np.arange(n_s), (n_s, 1))
+ >>> X_t = np.reshape(np.arange(0, n_t), (n_t, 1))
+ >>> emp_sinkhorn = empirical_sinkhorn(X_s, X_t, reg, verbose=False)
+ >>> print(emp_sinkhorn)
+ >>> [[4.99977301e-01 2.26989344e-05]
+ [2.26989344e-05 4.99977301e-01]]
+
+
+ References
+ ----------
+
+ .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+
+ .. [9] Schmitzer, B. (2016). Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. arXiv preprint arXiv:1610.06519.
+
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+ '''
+
+ if a is None:
+ a = unif(np.shape(X_s)[0])
+ if b is None:
+ b = unif(np.shape(X_t)[0])
+
+ M = dist(X_s, X_t, metric=metric)
+
+ if log:
+ pi, log = sinkhorn(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=True, **kwargs)
+ return pi, log
+ else:
+ pi = sinkhorn(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=False, **kwargs)
+ return pi
+
+
+def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+ '''
+ Solve the entropic regularization optimal transport problem from empirical
+ data and return the OT loss
+
+
+ The function solves the following optimization problem:
+
+ .. math::
+ W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+ s.t. \gamma 1 = a
+
+ \gamma^T 1= b
+
+ \gamma\geq 0
+ where :
+
+ - :math:`M` is the (ns,nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`a` and :math:`b` are source and target weights (sum to 1)
+
+
+ Parameters
+ ----------
+ X_s : np.ndarray (ns, d)
+ samples in the source domain
+ X_t : np.ndarray (nt, d)
+ samples in the target domain
+ reg : float
+ Regularization term >0
+ a : np.ndarray (ns,)
+ samples weights in the source domain
+ b : np.ndarray (nt,)
+ samples weights in the target domain
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ gamma : (ns x nt) ndarray
+ Regularized optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary return only if log==True in parameters
+
+ Examples
+ --------
+
+ >>> n_s = 2
+ >>> n_t = 2
+ >>> reg = 0.1
+ >>> X_s = np.reshape(np.arange(n_s), (n_s, 1))
+ >>> X_t = np.reshape(np.arange(0, n_t), (n_t, 1))
+ >>> loss_sinkhorn = empirical_sinkhorn2(X_s, X_t, reg, verbose=False)
+ >>> print(loss_sinkhorn)
+ >>> [4.53978687e-05]
+
+
+ References
+ ----------
+
+ .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+
+ .. [9] Schmitzer, B. (2016). Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. arXiv preprint arXiv:1610.06519.
+
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+ '''
+
+ if a is None:
+ a = unif(np.shape(X_s)[0])
+ if b is None:
+ b = unif(np.shape(X_t)[0])
+
+ M = dist(X_s, X_t, metric=metric)
+
+ if log:
+ sinkhorn_loss, log = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ return sinkhorn_loss, log
+ else:
+ sinkhorn_loss = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ return sinkhorn_loss
+
+
+def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+ '''
+ Compute the sinkhorn divergence loss from empirical data
+
+ The function solves the following optimization problems and return the
+ sinkhorn divergence :math:`S`:
+
+ .. math::
+
+ W &= \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+ W_a &= \min_{\gamma_a} <\gamma_a,M_a>_F + reg\cdot\Omega(\gamma_a)
+
+ W_b &= \min_{\gamma_b} <\gamma_b,M_b>_F + reg\cdot\Omega(\gamma_b)
+
+ S &= W - 1/2 * (W_a + W_b)
+
+ .. math::
+ s.t. \gamma 1 = a
+
+ \gamma^T 1= b
+
+ \gamma\geq 0
+
+ \gamma_a 1 = a
+
+ \gamma_a^T 1= a
+
+ \gamma_a\geq 0
+
+ \gamma_b 1 = b
+
+ \gamma_b^T 1= b
+
+ \gamma_b\geq 0
+ where :
+
+ - :math:`M` (resp. :math:`M_a, M_b`) is the (ns,nt) metric cost matrix (resp (ns, ns) and (nt, nt))
+ - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`a` and :math:`b` are source and target weights (sum to 1)
+
+
+ Parameters
+ ----------
+ X_s : np.ndarray (ns, d)
+ samples in the source domain
+ X_t : np.ndarray (nt, d)
+ samples in the target domain
+ reg : float
+ Regularization term >0
+ a : np.ndarray (ns,)
+ samples weights in the source domain
+ b : np.ndarray (nt,)
+ samples weights in the target domain
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ gamma : (ns x nt) ndarray
+ Regularized optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary return only if log==True in parameters
+
+ Examples
+ --------
+
+ >>> n_s = 2
+ >>> n_t = 4
+ >>> reg = 0.1
+ >>> X_s = np.reshape(np.arange(n_s), (n_s, 1))
+ >>> X_t = np.reshape(np.arange(0, n_t), (n_t, 1))
+ >>> emp_sinkhorn_div = empirical_sinkhorn_divergence(X_s, X_t, reg)
+ >>> print(emp_sinkhorn_div)
+ >>> [2.99977435]
+
+
+ References
+ ----------
+
+ .. [23] Aude Genevay, Gabriel Peyré, Marco Cuturi, Learning Generative Models with Sinkhorn Divergences, Proceedings of the Twenty-First International Conference on Artficial Intelligence and Statistics, (AISTATS) 21, 2018
+ '''
+ if log:
+ sinkhorn_loss_ab, log_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+ sinkhorn_loss_a, log_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+ sinkhorn_loss_b, log_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+ sinkhorn_div = sinkhorn_loss_ab - 1 / 2 * (sinkhorn_loss_a + sinkhorn_loss_b)
+
+ log = {}
+ log['sinkhorn_loss_ab'] = sinkhorn_loss_ab
+ log['sinkhorn_loss_a'] = sinkhorn_loss_a
+ log['sinkhorn_loss_b'] = sinkhorn_loss_b
+ log['log_sinkhorn_ab'] = log_ab
+ log['log_sinkhorn_a'] = log_a
+ log['log_sinkhorn_b'] = log_b
+
+ return max(0, sinkhorn_div), log
+
+ else:
+ sinkhorn_loss_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+ sinkhorn_loss_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+ sinkhorn_loss_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+ sinkhorn_div = sinkhorn_loss_ab - 1 / 2 * (sinkhorn_loss_a + sinkhorn_loss_b)
+ return max(0, sinkhorn_div)
diff --git a/ot/da.py b/ot/da.py
index 48b418f..479e698 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -15,7 +15,7 @@ import scipy.linalg as linalg
from .bregman import sinkhorn
from .lp import emd
from .utils import unif, dist, kernel, cost_normalization
-from .utils import check_params, deprecated, BaseEstimator
+from .utils import check_params, BaseEstimator
from .optim import cg
from .optim import gcg
@@ -473,22 +473,24 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
Weight for the linear OT loss (>0)
eta : float, optional
Regularization term for the linear mapping L (>0)
- bias : bool,optional
- Estimate linear mapping with constant bias
kerneltype : str,optional
kernel used by calling function ot.utils.kernel (gaussian by default)
sigma : float, optional
Gaussian kernel bandwidth.
+ bias : bool,optional
+ Estimate linear mapping with constant bias
+ verbose : bool, optional
+ Print information along iterations
+ verbose2 : bool, optional
+ Print information along iterations
numItermax : int, optional
Max number of BCD iterations
- stopThr : float, optional
- Stop threshold on relative loss decrease (>0)
numInnerItermax : int, optional
Max number of iterations (inner CG solver)
stopInnerThr : float, optional
Stop threshold on error (inner CG solver) (>0)
- verbose : bool, optional
- Print information along iterations
+ stopThr : float, optional
+ Stop threshold on relative loss decrease (>0)
log : bool, optional
record log if True
@@ -643,7 +645,8 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None,
The function estimates the optimal linear operator that aligns the two
empirical distributions. This is equivalent to estimating the closed
form mapping between two Gaussian distributions :math:`N(\mu_s,\Sigma_s)`
- and :math:`N(\mu_t,\Sigma_t)` as proposed in [14] and discussed in remark 2.29 in [15].
+ and :math:`N(\mu_t,\Sigma_t)` as proposed in [14] and discussed in remark
+ 2.29 in [15].
The linear operator from source to target :math:`M`
@@ -740,288 +743,6 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None,
return A, b
-@deprecated("The class OTDA is deprecated in 0.3.1 and will be "
- "removed in 0.5"
- "\n\tfor standard transport use class EMDTransport instead.")
-class OTDA(object):
-
- """Class for domain adaptation with optimal transport as proposed in [5]
-
-
- References
- ----------
-
- .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
- "Optimal Transport for Domain Adaptation," in IEEE Transactions on
- Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
-
- """
-
- def __init__(self, metric='sqeuclidean', norm=None):
- """ Class initialization"""
- self.xs = 0
- self.xt = 0
- self.G = 0
- self.metric = metric
- self.norm = norm
- self.computed = False
-
- def fit(self, xs, xt, ws=None, wt=None, max_iter=100000):
- """Fit domain adaptation between samples is xs and xt
- (with optional weights)"""
- self.xs = xs
- self.xt = xt
-
- if wt is None:
- wt = unif(xt.shape[0])
- if ws is None:
- ws = unif(xs.shape[0])
-
- self.ws = ws
- self.wt = wt
-
- self.M = dist(xs, xt, metric=self.metric)
- self.M = cost_normalization(self.M, self.norm)
- self.G = emd(ws, wt, self.M, max_iter)
- self.computed = True
-
- def interp(self, direction=1):
- """Barycentric interpolation for the source (1) or target (-1) samples
-
- This Barycentric interpolation solves for each source (resp target)
- sample xs (resp xt) the following optimization problem:
-
- .. math::
- arg\min_x \sum_i \gamma_{k,i} c(x,x_i^t)
-
- where k is the index of the sample in xs
-
- For the moment only squared euclidean distance is provided but more
- metric could be used in the future.
-
- """
- if direction > 0: # >0 then source to target
- G = self.G
- w = self.ws.reshape((self.xs.shape[0], 1))
- x = self.xt
- else:
- G = self.G.T
- w = self.wt.reshape((self.xt.shape[0], 1))
- x = self.xs
-
- if self.computed:
- if self.metric == 'sqeuclidean':
- return np.dot(G / w, x) # weighted mean
- else:
- print(
- "Warning, metric not handled yet, using weighted average")
- return np.dot(G / w, x) # weighted mean
- return None
- else:
- print("Warning, model not fitted yet, returning None")
- return None
-
- def predict(self, x, direction=1):
- """ Out of sample mapping using the formulation from [6]
-
- For each sample x to map, it finds the nearest source sample xs and
- map the samle x to the position xst+(x-xs) wher xst is the barycentric
- interpolation of source sample xs.
-
- References
- ----------
-
- .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
- Regularized discrete optimal transport. SIAM Journal on Imaging
- Sciences, 7(3), 1853-1882.
-
- """
- if direction > 0: # >0 then source to target
- xf = self.xt
- x0 = self.xs
- else:
- xf = self.xs
- x0 = self.xt
-
- D0 = dist(x, x0) # dist netween new samples an source
- idx = np.argmin(D0, 1) # closest one
- xf = self.interp(direction) # interp the source samples
- # aply the delta to the interpolation
- return xf[idx, :] + x - x0[idx, :]
-
-
-@deprecated("The class OTDA_sinkhorn is deprecated in 0.3.1 and will be"
- " removed in 0.5 \nUse class SinkhornTransport instead.")
-class OTDA_sinkhorn(OTDA):
-
- """Class for domain adaptation with optimal transport with entropic
- regularization
-
-
- """
-
- def fit(self, xs, xt, reg=1, ws=None, wt=None, **kwargs):
- """Fit regularized domain adaptation between samples is xs and xt
- (with optional weights)"""
- self.xs = xs
- self.xt = xt
-
- if wt is None:
- wt = unif(xt.shape[0])
- if ws is None:
- ws = unif(xs.shape[0])
-
- self.ws = ws
- self.wt = wt
-
- self.M = dist(xs, xt, metric=self.metric)
- self.M = cost_normalization(self.M, self.norm)
- self.G = sinkhorn(ws, wt, self.M, reg, **kwargs)
- self.computed = True
-
-
-@deprecated("The class OTDA_lpl1 is deprecated in 0.3.1 and will be"
- " removed in 0.5 \nUse class SinkhornLpl1Transport instead.")
-class OTDA_lpl1(OTDA):
-
- """Class for domain adaptation with optimal transport with entropic and
- group regularization"""
-
- def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, **kwargs):
- """Fit regularized domain adaptation between samples is xs and xt
- (with optional weights), See ot.da.sinkhorn_lpl1_mm for fit
- parameters"""
- self.xs = xs
- self.xt = xt
-
- if wt is None:
- wt = unif(xt.shape[0])
- if ws is None:
- ws = unif(xs.shape[0])
-
- self.ws = ws
- self.wt = wt
-
- self.M = dist(xs, xt, metric=self.metric)
- self.M = cost_normalization(self.M, self.norm)
- self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M, reg, eta, **kwargs)
- self.computed = True
-
-
-@deprecated("The class OTDA_l1L2 is deprecated in 0.3.1 and will be"
- " removed in 0.5 \nUse class SinkhornL1l2Transport instead.")
-class OTDA_l1l2(OTDA):
-
- """Class for domain adaptation with optimal transport with entropic
- and group lasso regularization"""
-
- def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, **kwargs):
- """Fit regularized domain adaptation between samples is xs and xt
- (with optional weights), See ot.da.sinkhorn_lpl1_gl for fit
- parameters"""
- self.xs = xs
- self.xt = xt
-
- if wt is None:
- wt = unif(xt.shape[0])
- if ws is None:
- ws = unif(xs.shape[0])
-
- self.ws = ws
- self.wt = wt
-
- self.M = dist(xs, xt, metric=self.metric)
- self.M = cost_normalization(self.M, self.norm)
- self.G = sinkhorn_l1l2_gl(ws, ys, wt, self.M, reg, eta, **kwargs)
- self.computed = True
-
-
-@deprecated("The class OTDA_mapping_linear is deprecated in 0.3.1 and will be"
- " removed in 0.5 \nUse class MappingTransport instead.")
-class OTDA_mapping_linear(OTDA):
-
- """Class for optimal transport with joint linear mapping estimation as in
- [8]
- """
-
- def __init__(self):
- """ Class initialization"""
-
- self.xs = 0
- self.xt = 0
- self.G = 0
- self.L = 0
- self.bias = False
- self.computed = False
- self.metric = 'sqeuclidean'
-
- def fit(self, xs, xt, mu=1, eta=1, bias=False, **kwargs):
- """ Fit domain adaptation between samples is xs and xt (with optional
- weights)"""
- self.xs = xs
- self.xt = xt
- self.bias = bias
-
- self.ws = unif(xs.shape[0])
- self.wt = unif(xt.shape[0])
-
- self.G, self.L = joint_OT_mapping_linear(
- xs, xt, mu=mu, eta=eta, bias=bias, **kwargs)
- self.computed = True
-
- def mapping(self):
- return lambda x: self.predict(x)
-
- def predict(self, x):
- """ Out of sample mapping estimated during the call to fit"""
- if self.computed:
- if self.bias:
- x = np.hstack((x, np.ones((x.shape[0], 1))))
- return x.dot(self.L) # aply the delta to the interpolation
- else:
- print("Warning, model not fitted yet, returning None")
- return None
-
-
-@deprecated("The class OTDA_mapping_kernel is deprecated in 0.3.1 and will be"
- " removed in 0.5 \nUse class MappingTransport instead.")
-class OTDA_mapping_kernel(OTDA_mapping_linear):
-
- """Class for optimal transport with joint nonlinear mapping
- estimation as in [8]"""
-
- def fit(self, xs, xt, mu=1, eta=1, bias=False, kerneltype='gaussian',
- sigma=1, **kwargs):
- """ Fit domain adaptation between samples is xs and xt """
- self.xs = xs
- self.xt = xt
- self.bias = bias
-
- self.ws = unif(xs.shape[0])
- self.wt = unif(xt.shape[0])
- self.kernel = kerneltype
- self.sigma = sigma
- self.kwargs = kwargs
-
- self.G, self.L = joint_OT_mapping_kernel(
- xs, xt, mu=mu, eta=eta, bias=bias, **kwargs)
- self.computed = True
-
- def predict(self, x):
- """ Out of sample mapping estimated during the call to fit"""
-
- if self.computed:
- K = kernel(
- x, self.xs, method=self.kernel, sigma=self.sigma,
- **self.kwargs)
- if self.bias:
- K = np.hstack((K, np.ones((x.shape[0], 1))))
- return K.dot(self.L)
- else:
- print("Warning, model not fitted yet, returning None")
- return None
-
-
def distribution_estimation_uniform(X):
"""estimates a uniform distribution from an array of samples X
@@ -1466,25 +1187,25 @@ class SinkhornTransport(BaseTransport):
algorithm if no it has not converged
tol : float, optional (default=10e-9)
The precision required to stop the optimization algorithm.
- mapping : string, optional (default="barycentric")
- The kind of mapping to apply to transport samples from a domain into
- another one.
- if "barycentric" only the samples used to estimate the coupling can
- be transported from a domain to another one.
+ verbose : bool, optional (default=False)
+ Controls the verbosity of the optimization algorithm
+ log : int, optional (default=False)
+ Controls the logs of the optimization algorithm
metric : string, optional (default="sqeuclidean")
The ground metric for the Wasserstein problem
norm : string, optional (default=None)
If given, normalize the ground metric to avoid numerical errors that
can occur with large metric values.
- distribution : string, optional (default="uniform")
+ distribution_estimation : callable, optional (defaults to the uniform)
The kind of distribution estimation to employ
- verbose : int, optional (default=0)
- Controls the verbosity of the optimization algorithm
- log : int, optional (default=0)
- Controls the logs of the optimization algorithm
+ out_of_sample_map : string, optional (default="ferradans")
+ The kind of out of sample mapping to apply to transport samples
+ from a domain into another one. Currently the only possible option is
+ "ferradans" which uses the method proposed in [6].
limit_max: float, optional (defaul=np.infty)
Controls the semi supervised mode. Transport between labeled source
- and target samples of different classes will exhibit an infinite cost
+ and target samples of different classes will exhibit an cost defined
+ by this variable
Attributes
----------
@@ -1569,22 +1290,19 @@ class EMDTransport(BaseTransport):
Parameters
----------
- mapping : string, optional (default="barycentric")
- The kind of mapping to apply to transport samples from a domain into
- another one.
- if "barycentric" only the samples used to estimate the coupling can
- be transported from a domain to another one.
metric : string, optional (default="sqeuclidean")
The ground metric for the Wasserstein problem
norm : string, optional (default=None)
If given, normalize the ground metric to avoid numerical errors that
can occur with large metric values.
- distribution : string, optional (default="uniform")
- The kind of distribution estimation to employ
- verbose : int, optional (default=0)
- Controls the verbosity of the optimization algorithm
- log : int, optional (default=0)
+ log : int, optional (default=False)
Controls the logs of the optimization algorithm
+ distribution_estimation : callable, optional (defaults to the uniform)
+ The kind of distribution estimation to employ
+ out_of_sample_map : string, optional (default="ferradans")
+ The kind of out of sample mapping to apply to transport samples
+ from a domain into another one. Currently the only possible option is
+ "ferradans" which uses the method proposed in [6].
limit_max: float, optional (default=10)
Controls the semi supervised mode. Transport between labeled source
and target samples of different classes will exhibit an infinite cost
@@ -1669,28 +1387,32 @@ class SinkhornLpl1Transport(BaseTransport):
Entropic regularization parameter
reg_cl : float, optional (default=0.1)
Class regularization parameter
- mapping : string, optional (default="barycentric")
- The kind of mapping to apply to transport samples from a domain into
- another one.
- if "barycentric" only the samples used to estimate the coupling can
- be transported from a domain to another one.
- metric : string, optional (default="sqeuclidean")
- The ground metric for the Wasserstein problem
- norm : string, optional (default=None)
- If given, normalize the ground metric to avoid numerical errors that
- can occur with large metric values.
- distribution : string, optional (default="uniform")
- The kind of distribution estimation to employ
max_iter : int, float, optional (default=10)
The minimum number of iteration before stopping the optimization
algorithm if no it has not converged
max_inner_iter : int, float, optional (default=200)
The number of iteration in the inner loop
- verbose : int, optional (default=0)
+ log : bool, optional (default=False)
+ Controls the logs of the optimization algorithm
+ tol : float, optional (default=10e-9)
+ Stop threshold on error (inner sinkhorn solver) (>0)
+ verbose : bool, optional (default=False)
Controls the verbosity of the optimization algorithm
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ norm : string, optional (default=None)
+ If given, normalize the ground metric to avoid numerical errors that
+ can occur with large metric values.
+ distribution_estimation : callable, optional (defaults to the uniform)
+ The kind of distribution estimation to employ
+ out_of_sample_map : string, optional (default="ferradans")
+ The kind of out of sample mapping to apply to transport samples
+ from a domain into another one. Currently the only possible option is
+ "ferradans" which uses the method proposed in [6].
limit_max: float, optional (defaul=np.infty)
Controls the semi supervised mode. Transport between labeled source
- and target samples of different classes will exhibit an infinite cost
+ and target samples of different classes will exhibit a cost defined by
+ limit_max.
Attributes
----------
@@ -1786,27 +1508,28 @@ class SinkhornL1l2Transport(BaseTransport):
Entropic regularization parameter
reg_cl : float, optional (default=0.1)
Class regularization parameter
- mapping : string, optional (default="barycentric")
- The kind of mapping to apply to transport samples from a domain into
- another one.
- if "barycentric" only the samples used to estimate the coupling can
- be transported from a domain to another one.
- metric : string, optional (default="sqeuclidean")
- The ground metric for the Wasserstein problem
- norm : string, optional (default=None)
- If given, normalize the ground metric to avoid numerical errors that
- can occur with large metric values.
- distribution : string, optional (default="uniform")
- The kind of distribution estimation to employ
max_iter : int, float, optional (default=10)
The minimum number of iteration before stopping the optimization
algorithm if no it has not converged
max_inner_iter : int, float, optional (default=200)
The number of iteration in the inner loop
- verbose : int, optional (default=0)
+ tol : float, optional (default=10e-9)
+ Stop threshold on error (inner sinkhorn solver) (>0)
+ verbose : bool, optional (default=False)
Controls the verbosity of the optimization algorithm
- log : int, optional (default=0)
+ log : bool, optional (default=False)
Controls the logs of the optimization algorithm
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ norm : string, optional (default=None)
+ If given, normalize the ground metric to avoid numerical errors that
+ can occur with large metric values.
+ distribution_estimation : callable, optional (defaults to the uniform)
+ The kind of distribution estimation to employ
+ out_of_sample_map : string, optional (default="ferradans")
+ The kind of out of sample mapping to apply to transport samples
+ from a domain into another one. Currently the only possible option is
+ "ferradans" which uses the method proposed in [6].
limit_max: float, optional (default=10)
Controls the semi supervised mode. Transport between labeled source
and target samples of different classes will exhibit an infinite cost
@@ -1928,10 +1651,12 @@ class MappingTransport(BaseEstimator):
Max number of iterations (inner CG solver)
inner_tol : float, optional (default=1e-6)
Stop threshold on error (inner CG solver) (>0)
- verbose : bool, optional (default=False)
- Print information along iterations
log : bool, optional (default=False)
record log if True
+ verbose : bool, optional (default=False)
+ Print information along iterations
+ verbose2 : bool, optional (default=False)
+ Print information along iterations
Attributes
----------
diff --git a/ot/datasets.py b/ot/datasets.py
index 362a89b..e76e75d 100644
--- a/ot/datasets.py
+++ b/ot/datasets.py
@@ -38,9 +38,9 @@ def make_1D_gauss(n, m, s):
@deprecated()
-def get_1D_gauss(n, m, sigma, random_state=None):
+def get_1D_gauss(n, m, sigma):
""" Deprecated see make_1D_gauss """
- return make_1D_gauss(n, m, sigma, random_state=None)
+ return make_1D_gauss(n, m, sigma)
def make_2D_samples_gauss(n, m, sigma, random_state=None):
diff --git a/ot/externals/funcsigs.py b/ot/externals/funcsigs.py
index c73fdc9..106bde7 100644
--- a/ot/externals/funcsigs.py
+++ b/ot/externals/funcsigs.py
@@ -126,8 +126,8 @@ def signature(obj):
new_params[arg_name] = param.replace(default=arg_value,
_partial_kwarg=True)
- elif (param.kind not in (_VAR_KEYWORD, _VAR_POSITIONAL) and
- not param._partial_kwarg):
+ elif (param.kind not in (_VAR_KEYWORD, _VAR_POSITIONAL)
+ and not param._partial_kwarg):
new_params.pop(arg_name)
return sig.replace(parameters=new_params.values())
@@ -333,11 +333,11 @@ class Parameter(object):
raise TypeError(msg)
def __eq__(self, other):
- return (issubclass(other.__class__, Parameter) and
- self._name == other._name and
- self._kind == other._kind and
- self._default == other._default and
- self._annotation == other._annotation)
+ return (issubclass(other.__class__, Parameter)
+ and self._name == other._name
+ and self._kind == other._kind
+ and self._default == other._default
+ and self._annotation == other._annotation)
def __ne__(self, other):
return not self.__eq__(other)
@@ -372,8 +372,8 @@ class BoundArguments(object):
def args(self):
args = []
for param_name, param in self._signature.parameters.items():
- if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or
- param._partial_kwarg):
+ if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY)
+ or param._partial_kwarg):
# Keyword arguments mapped by 'functools.partial'
# (Parameter._partial_kwarg is True) are mapped
# in 'BoundArguments.kwargs', along with VAR_KEYWORD &
@@ -402,8 +402,8 @@ class BoundArguments(object):
kwargs_started = False
for param_name, param in self._signature.parameters.items():
if not kwargs_started:
- if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or
- param._partial_kwarg):
+ if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY)
+ or param._partial_kwarg):
kwargs_started = True
else:
if param_name not in self.arguments:
@@ -432,9 +432,9 @@ class BoundArguments(object):
raise TypeError(msg)
def __eq__(self, other):
- return (issubclass(other.__class__, BoundArguments) and
- self.signature == other.signature and
- self.arguments == other.arguments)
+ return (issubclass(other.__class__, BoundArguments)
+ and self.signature == other.signature
+ and self.arguments == other.arguments)
def __ne__(self, other):
return not self.__eq__(other)
@@ -612,9 +612,9 @@ class Signature(object):
raise TypeError(msg)
def __eq__(self, other):
- if (not issubclass(type(other), Signature) or
- self.return_annotation != other.return_annotation or
- len(self.parameters) != len(other.parameters)):
+ if (not issubclass(type(other), Signature)
+ or self.return_annotation != other.return_annotation
+ or len(self.parameters) != len(other.parameters)):
return False
other_positions = dict((param, idx)
@@ -635,8 +635,8 @@ class Signature(object):
except KeyError:
return False
else:
- if (idx != other_idx or
- param != other.parameters[param_name]):
+ if (idx != other_idx
+ or param != other.parameters[param_name]):
return False
return True
@@ -688,8 +688,8 @@ class Signature(object):
raise TypeError(msg)
parameters_ex = (param,)
break
- elif (param.kind == _VAR_KEYWORD or
- param.default is not _empty):
+ elif (param.kind == _VAR_KEYWORD
+ or param.default is not _empty):
# That's fine too - we have a default value for this
# parameter. So, lets start parsing `kwargs`, starting
# with the current parameter
@@ -755,8 +755,8 @@ class Signature(object):
# if it has a default value, or it is an '*args'-like
# parameter, left alone by the processing of positional
# arguments.
- if (not partial and param.kind != _VAR_POSITIONAL and
- param.default is _empty):
+ if (not partial and param.kind != _VAR_POSITIONAL
+ and param.default is _empty):
raise TypeError('{arg!r} parameter lacking default value'.
format(arg=param_name))
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
index a2fdd3d..deda6b1 100644
--- a/ot/gpu/__init__.py
+++ b/ot/gpu/__init__.py
@@ -1,12 +1,37 @@
# -*- coding: utf-8 -*-
+"""
-from . import bregman
-from . import da
-from .bregman import sinkhorn
+This module provides GPU implementation for several OT solvers and utility
+functions. The GPU backend in handled by `cupy
+<https://cupy.chainer.org/>`_.
+
+By default, the functions in this module accept and return numpy arrays
+in order to proide drop-in replacement for the other POT function but
+the transfer between CPU en GPU comes with a significant overhead.
+
+In order to get the best erformances, we recommend to give only cupy
+arrays to the functions and desactivate the conversion to numpy of the
+result of the function with parameter ``to_numpy=False``.
+
+"""
# Author: Remi Flamary <remi.flamary@unice.fr>
# Leo Gautheron <https://github.com/aje>
#
# License: MIT License
-__all__ = ["bregman", "da", "sinkhorn"]
+from . import bregman
+from . import da
+from .bregman import sinkhorn
+from .da import sinkhorn_lpl1_mm
+
+from . import utils
+from .utils import dist, to_gpu, to_np
+
+
+
+
+
+__all__ = ["utils", "dist", "sinkhorn",
+ "sinkhorn_lpl1_mm", 'bregman', 'da', 'to_gpu', 'to_np']
+
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
index 47939c4..978b307 100644
--- a/ot/gpu/bregman.py
+++ b/ot/gpu/bregman.py
@@ -8,14 +8,18 @@ Bregman projections for regularized OT with GPU
#
# License: MIT License
-import numpy as np
-import cudamat
+import cupy as np # np used for matrix computation
+import cupy as cp # cp used for cupy specific operations
+from . import utils
-def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
- log=False, returnAsGPU=False):
- r"""
- Solve the entropic regularization optimal transport problem on GPU
+def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
+ verbose=False, log=False, to_numpy=True, **kwargs):
+ """
+ Solve the entropic regularization optimal transport on GPU
+
+ If the input matrix are in numpy format, they will be uploaded to the
+ GPU first which can incur significant time overhead.
The function solves the following optimization problem:
@@ -40,9 +44,10 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
----------
a : np.ndarray (ns,)
samples weights in the source domain
- b : np.ndarray (nt,)
- samples in the target domain
- M_GPU : cudamat.CUDAMatrix (ns,nt)
+ b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+ samples in the target domain, compute sinkhorn with multiple targets
+ and fixed M if b is a matrix (return OT loss + dual variables in log)
+ M : np.ndarray (ns,nt)
loss matrix
reg : float
Regularization term >0
@@ -54,8 +59,9 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
Print information along iterations
log : bool, optional
record log if True
- returnAsGPU : bool, optional
- return the OT matrix as a cudamat.CUDAMatrix
+ to_numpy : boolean, optional (default True)
+ If true convert back the GPU array result to numpy format.
+
Returns
-------
@@ -88,60 +94,78 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
ot.optim.cg : General regularized OT
"""
+
+ a = cp.asarray(a)
+ b = cp.asarray(b)
+ M = cp.asarray(M)
+
+ if len(a) == 0:
+ a = np.ones((M.shape[0],)) / M.shape[0]
+ if len(b) == 0:
+ b = np.ones((M.shape[1],)) / M.shape[1]
+
# init data
Nini = len(a)
Nfin = len(b)
+ if len(b.shape) > 1:
+ nbb = b.shape[1]
+ else:
+ nbb = 0
+
if log:
log = {'err': []}
# we assume that no distances are null except those of the diagonal of
# distances
- u = (np.ones(Nini) / Nini).reshape((Nini, 1))
- u_GPU = cudamat.CUDAMatrix(u)
- a_GPU = cudamat.CUDAMatrix(a.reshape((Nini, 1)))
- ones_GPU = cudamat.empty(u_GPU.shape).assign(1)
- v = (np.ones(Nfin) / Nfin).reshape((Nfin, 1))
- v_GPU = cudamat.CUDAMatrix(v)
- b_GPU = cudamat.CUDAMatrix(b.reshape((Nfin, 1)))
-
- M_GPU.divide(-reg)
+ if nbb:
+ u = np.ones((Nini, nbb)) / Nini
+ v = np.ones((Nfin, nbb)) / Nfin
+ else:
+ u = np.ones(Nini) / Nini
+ v = np.ones(Nfin) / Nfin
- K_GPU = cudamat.exp(M_GPU)
+ # print(reg)
- ones_GPU.divide(a_GPU, target=a_GPU)
- Kp_GPU = cudamat.empty(K_GPU.shape)
- K_GPU.mult_by_col(a_GPU, target=Kp_GPU)
+ # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
+ K = np.empty(M.shape, dtype=M.dtype)
+ np.divide(M, -reg, out=K)
+ np.exp(K, out=K)
- tmp_GPU = cudamat.empty(K_GPU.shape)
+ # print(np.min(K))
+ tmp2 = np.empty(b.shape, dtype=M.dtype)
+ Kp = (1 / a).reshape(-1, 1) * K
cpt = 0
err = 1
while (err > stopThr and cpt < numItermax):
- uprev_GPU = u_GPU.copy()
- vprev_GPU = v_GPU.copy()
+ uprev = u
+ vprev = v
- KtransposeU_GPU = K_GPU.transpose().dot(u_GPU)
- b_GPU.divide(KtransposeU_GPU, target=v_GPU)
- ones_GPU.divide(Kp_GPU.dot(v_GPU), target=u_GPU)
+ KtransposeU = np.dot(K.T, u)
+ v = np.divide(b, KtransposeU)
+ u = 1. / np.dot(Kp, v)
- if (np.any(KtransposeU_GPU.asarray() == 0) or
- not u_GPU.allfinite() or not v_GPU.allfinite()):
+ if (np.any(KtransposeU == 0) or
+ np.any(np.isnan(u)) or np.any(np.isnan(v)) or
+ np.any(np.isinf(u)) or np.any(np.isinf(v))):
# we have reached the machine precision
# come back to previous solution and quit loop
print('Warning: numerical errors at iteration', cpt)
- u_GPU = uprev_GPU.copy()
- v_GPU = vprev_GPU.copy()
+ u = uprev
+ v = vprev
break
if cpt % 10 == 0:
# we can speed up the process by checking for the error only all
# the 10th iterations
- K_GPU.mult_by_col(u_GPU, target=tmp_GPU)
- tmp_GPU.mult_by_row(v_GPU.transpose(), target=tmp_GPU)
-
- bcopy_GPU = b_GPU.copy().transpose()
- bcopy_GPU.add_sums(tmp_GPU, axis=0, beta=-1)
- err = bcopy_GPU.euclid_norm()**2
+ if nbb:
+ err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
+ np.sum((v - vprev)**2) / np.sum((v)**2)
+ else:
+ # compute right marginal tmp2= (diag(u)Kdiag(v))^T1
+ tmp2 = np.sum(u[:, None] * K * v[None, :], 0)
+ #tmp2=np.einsum('i,ij,j->j', u, K, v)
+ err = np.linalg.norm(tmp2 - b)**2 # violation of marginal
if log:
log['err'].append(err)
@@ -150,20 +174,32 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
print(
'{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
print('{:5d}|{:8e}|'.format(cpt, err))
- cpt += 1
- if log:
- log['u'] = u_GPU.asarray()
- log['v'] = v_GPU.asarray()
-
- K_GPU.mult_by_col(u_GPU, target=K_GPU)
- K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU)
-
- if returnAsGPU:
- res = K_GPU
- else:
- res = K_GPU.asarray()
-
+ cpt = cpt + 1
if log:
- return res, log
- else:
- return res
+ log['u'] = u
+ log['v'] = v
+
+ if nbb: # return only loss
+ #res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) (explodes cupy memory)
+ res = np.empty(nbb)
+ for i in range(nbb):
+ res[i] = np.sum(u[:, None, i] * (K * M) * v[None, :, i])
+ if to_numpy:
+ res = utils.to_np(res)
+ if log:
+ return res, log
+ else:
+ return res
+
+ else: # return OT matrix
+ res = u.reshape((-1, 1)) * K * v.reshape((1, -1))
+ if to_numpy:
+ res = utils.to_np(res)
+ if log:
+ return res, log
+ else:
+ return res
+
+
+# define sinkhorn as sinkhorn_knopp
+sinkhorn = sinkhorn_knopp
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index 71a485a..4a98038 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -11,80 +11,30 @@ Domain adaptation with optimal transport with GPU implementation
# License: MIT License
-import numpy as np
-from ..utils import unif
-from ..da import OTDA
+import cupy as np # np used for matrix computation
+import cupy as cp # cp used for cupy specific operations
+import numpy as npp
+from . import utils
+
from .bregman import sinkhorn
-import cudamat
-def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
+def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
+ numInnerItermax=200, stopInnerThr=1e-9, verbose=False,
+ log=False, to_numpy=True):
"""
- Compute the pairwise euclidean distance between matrices a and b.
-
-
- Parameters
- ----------
- a : np.ndarray (n, f)
- first matrice
- b : np.ndarray (m, f)
- second matrice
- returnAsGPU : boolean, optional (default False)
- if True, returns cudamat matrix still on GPU, else return np.ndarray
- squared : boolean, optional (default False)
- if True, return squared euclidean distance matrice
+ Solve the entropic regularization optimal transport problem with nonconvex
+ group lasso regularization on GPU
+ If the input matrix are in numpy format, they will be uploaded to the
+ GPU first which can incur significant time overhead.
- Returns
- -------
- c : (n x m) np.ndarray or cudamat.CUDAMatrix
- pairwise euclidean distance distance matrix
- """
- # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m).
- # First compute in c_GPU the squared euclidean distance. And return its
- # square root. At each cell [i,j] of c, we want to have
- # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that
- # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c:
- # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2).
-
- a_GPU = cudamat.CUDAMatrix(a)
- b_GPU = cudamat.CUDAMatrix(b)
-
- # Multiply a by b transpose to obtain in each cell [i,j] of c the
- # value sum{k in range(f)} ( a[i,k]b[j,k] )
- c_GPU = cudamat.dot(a_GPU, b_GPU.transpose())
- # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] )
- c_GPU.mult(-2)
-
- # Compute the vectors of the sum of squared elements.
- a_GPU = cudamat.pow(a_GPU, 2).sum(axis=1)
- b_GPU = cudamat.pow(b_GPU, 2).sum(axis=1)
-
- # Add the vectors in each columns (respectivly rows) of c.
- # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] )
- c_GPU.add_col_vec(a_GPU)
- # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2)
- c_GPU.add_row_vec(b_GPU.transpose())
-
- if not squared:
- c_GPU = cudamat.sqrt(c_GPU)
-
- if returnAsGPU:
- return c_GPU
- else:
- return c_GPU.asarray()
-
-
-def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
- numInnerItermax=200, stopInnerThr=1e-9,
- verbose=False, log=False):
- """
- Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ \eta \Omega_g(\gamma)
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)
+ + \eta \Omega_g(\gamma)
s.t. \gamma 1 = a
@@ -94,11 +44,16 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
where :
- M is the (ns,nt) metric cost matrix
- - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - :math:`\Omega_g` is the group lasso regulaization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` where :math:`\mathcal{I}_c` are the index of samples from class c in the source domain.
+ - :math:`\Omega_e` is the entropic regularization term
+ :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\Omega_g` is the group lasso regulaization term
+ :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1`
+ where :math:`\mathcal{I}_c` are the index of samples from class c
+ in the source domain.
- a and b are source and target weights (sum to 1)
- The algorithm used for solving the problem is the generalised conditional gradient as proposed in [5]_ [7]_
+ The algorithm used for solving the problem is the generalised conditional
+ gradient as proposed in [5]_ [7]_
Parameters
@@ -109,7 +64,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
labels of samples in the source domain
b : np.ndarray (nt,)
samples weights in the target domain
- M_GPU : cudamat.CUDAMatrix (ns,nt)
+ M : np.ndarray (ns,nt)
loss matrix
reg : float
Regularization term for entropic regularization >0
@@ -125,6 +80,8 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
Print information along iterations
log : bool, optional
record log if True
+ to_numpy : boolean, optional (default True)
+ If true convert back the GPU array result to numpy format.
Returns
@@ -138,8 +95,13 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
References
----------
- .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
- .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567.
+ .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE
+ Transactions on Pattern Analysis and Machine Intelligence ,
+ vol.PP, no.99, pp.1-1
+ .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
+ Generalized conditional gradient: analysis of convergence
+ and applications. arXiv preprint arXiv:1510.06567.
See Also
--------
@@ -148,108 +110,35 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
ot.optim.cg : General regularized OT
"""
+
+ a, labels_a, b, M = utils.to_gpu(a, labels_a, b, M)
+
p = 0.5
epsilon = 1e-3
- Nfin = len(b)
indices_labels = []
- classes = np.unique(labels_a)
+ labels_a2 = cp.asnumpy(labels_a)
+ classes = npp.unique(labels_a2)
for c in classes:
- idxc, = np.where(labels_a == c)
- indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1)))
+ idxc, = utils.to_gpu(npp.where(labels_a2 == c))
+ indices_labels.append(idxc)
- Mreg_GPU = cudamat.empty(M_GPU.shape)
- W_GPU = cudamat.empty(M_GPU.shape).assign(0)
+ W = np.zeros(M.shape)
for cpt in range(numItermax):
- Mreg_GPU.assign(M_GPU)
- Mreg_GPU.add_mult(W_GPU, eta)
- transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax,
- stopThr=stopInnerThr, returnAsGPU=True)
+ Mreg = M + eta * W
+ transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax,
+ stopThr=stopInnerThr, to_numpy=False)
# the transport has been computed. Check if classes are really
# separated
- W_GPU.assign(1)
- W_GPU = W_GPU.transpose()
+ W = np.ones(M.shape)
for (i, c) in enumerate(classes):
- (_, nbRow) = indices_labels[i].shape
- tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0)
- transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU)
- majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon)
- cudamat.pow(majs_GPU, (p - 1))
- majs_GPU.mult(p)
-
- tmpC_GPU.assign(0)
- tmpC_GPU.add_col_vec(majs_GPU)
- W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU)
-
- W_GPU = W_GPU.transpose()
-
- return transp_GPU.asarray()
+ majs = np.sum(transp[indices_labels[i]], axis=0)
+ majs = p * ((majs + epsilon)**(p - 1))
+ W[indices_labels[i]] = majs
-class OTDA_GPU(OTDA):
-
- def normalizeM(self, norm):
- if norm == "median":
- self.M_GPU.divide(float(np.median(self.M_GPU.asarray())))
- elif norm == "max":
- self.M_GPU.divide(float(np.max(self.M_GPU.asarray())))
- elif norm == "log":
- self.M_GPU.add(1)
- cudamat.log(self.M_GPU)
- elif norm == "loglog":
- self.M_GPU.add(1)
- cudamat.log(self.M_GPU)
- self.M_GPU.add(1)
- cudamat.log(self.M_GPU)
-
-
-class OTDA_sinkhorn(OTDA_GPU):
-
- def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs):
- cudamat.init()
- xs = np.asarray(xs, dtype=np.float64)
- xt = np.asarray(xt, dtype=np.float64)
-
- self.xs = xs
- self.xt = xt
-
- if wt is None:
- wt = unif(xt.shape[0])
- if ws is None:
- ws = unif(xs.shape[0])
-
- self.ws = ws
- self.wt = wt
-
- self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
- squared=True)
- self.normalizeM(norm)
- self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs)
- self.computed = True
-
-
-class OTDA_lpl1(OTDA_GPU):
-
- def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None,
- **kwargs):
- cudamat.init()
- xs = np.asarray(xs, dtype=np.float64)
- xt = np.asarray(xt, dtype=np.float64)
-
- self.xs = xs
- self.xt = xt
-
- if wt is None:
- wt = unif(xt.shape[0])
- if ws is None:
- ws = unif(xs.shape[0])
-
- self.ws = ws
- self.wt = wt
-
- self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
- squared=True)
- self.normalizeM(norm)
- self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs)
- self.computed = True
+ if to_numpy:
+ return utils.to_np(transp)
+ else:
+ return transp
diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py
new file mode 100644
index 0000000..41e168a
--- /dev/null
+++ b/ot/gpu/utils.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+"""
+Utility functions for GPU
+"""
+
+# Author: Remi Flamary <remi.flamary@unice.fr>
+# Nicolas Courty <ncourty@irisa.fr>
+# Leo Gautheron <https://github.com/aje>
+#
+# License: MIT License
+
+import cupy as np # np used for matrix computation
+import cupy as cp # cp used for cupy specific operations
+
+
+def euclidean_distances(a, b, squared=False, to_numpy=True):
+ """
+ Compute the pairwise euclidean distance between matrices a and b.
+
+ If the input matrix are in numpy format, they will be uploaded to the
+ GPU first which can incur significant time overhead.
+
+ Parameters
+ ----------
+ a : np.ndarray (n, f)
+ first matrix
+ b : np.ndarray (m, f)
+ second matrix
+ to_numpy : boolean, optional (default True)
+ If true convert back the GPU array result to numpy format.
+ squared : boolean, optional (default False)
+ if True, return squared euclidean distance matrix
+
+ Returns
+ -------
+ c : (n x m) np.ndarray or cupy.ndarray
+ pairwise euclidean distance distance matrix
+ """
+
+ a, b = to_gpu(a, b)
+
+ a2 = np.sum(np.square(a), 1)
+ b2 = np.sum(np.square(b), 1)
+
+ c = -2 * np.dot(a, b.T)
+ c += a2[:, None]
+ c += b2[None, :]
+
+ if not squared:
+ np.sqrt(c, out=c)
+ if to_numpy:
+ return to_np(c)
+ else:
+ return c
+
+
+def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True):
+ """Compute distance between samples in x1 and x2 on gpu
+
+ Parameters
+ ----------
+
+ x1 : np.array (n1,d)
+ matrix with n1 samples of size d
+ x2 : np.array (n2,d), optional
+ matrix with n2 samples of size d (if None then x2=x1)
+ metric : str
+ Metric from 'sqeuclidean', 'euclidean',
+
+
+ Returns
+ -------
+
+ M : np.array (n1,n2)
+ distance matrix computed with given metric
+
+ """
+ if x2 is None:
+ x2 = x1
+ if metric == "sqeuclidean":
+ return euclidean_distances(x1, x2, squared=True, to_numpy=to_numpy)
+ elif metric == "euclidean":
+ return euclidean_distances(x1, x2, squared=False, to_numpy=to_numpy)
+ else:
+ raise NotImplementedError
+
+
+def to_gpu(*args):
+ """ Upload numpy arrays to GPU and return them"""
+ if len(args) > 1:
+ return (cp.asarray(x) for x in args)
+ else:
+ return cp.asarray(args[0])
+
+
+def to_np(*args):
+ """ convert GPU arras to numpy and return them"""
+ if len(args) > 1:
+ return (cp.asnumpy(x) for x in args)
+ else:
+ return cp.asnumpy(args[0])
diff --git a/ot/gromov.py b/ot/gromov.py
index fe4fc15..33134a2 100644
--- a/ot/gromov.py
+++ b/ot/gromov.py
@@ -40,7 +40,7 @@ def init_matrix(C1, C2, p, q, loss_fun='square_loss'):
* h1(a)=a
* h2(b)=b
- The kl-loss function L(a,b)=(1/2)*|a-b|^2 is read as :
+ The kl-loss function L(a,b)=a*log(a/b)-a+b is read as :
L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with :
* f1(a)=a*log(a)-a
* f2(b)=b
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 4c0d170..02cbd8c 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -17,8 +17,9 @@ from .import cvx
from .emd_wrap import emd_c, check_result
from ..utils import parmap
from .cvx import barycenter
+from ..utils import dist
-__all__=['emd', 'emd2', 'barycenter', 'cvx']
+__all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx']
def emd(a, b, M, numItermax=100000, log=False):
@@ -216,3 +217,95 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
res = parmap(f, [b[:, i] for i in range(nb)], processes)
return res
+
+
+
+def free_support_barycenter(measures_locations, measures_weights, X_init, b=None, weights=None, numItermax=100, stopThr=1e-7, verbose=False, log=None):
+ """
+ Solves the free support (locations of the barycenters are optimized, not the weights) Wasserstein barycenter problem (i.e. the weighted Frechet mean for the 2-Wasserstein distance)
+
+ The function solves the Wasserstein barycenter problem when the barycenter measure is constrained to be supported on k atoms.
+ This problem is considered in [1] (Algorithm 2). There are two differences with the following codes:
+ - we do not optimize over the weights
+ - we do not do line search for the locations updates, we use i.e. theta = 1 in [1] (Algorithm 2). This can be seen as a discrete implementation of the fixed-point algorithm of [2] proposed in the continuous setting.
+
+ Parameters
+ ----------
+ measures_locations : list of (k_i,d) np.ndarray
+ The discrete support of a measure supported on k_i locations of a d-dimensional space (k_i can be different for each element of the list)
+ measures_weights : list of (k_i,) np.ndarray
+ Numpy arrays where each numpy array has k_i non-negatives values summing to one representing the weights of each discrete input measure
+
+ X_init : (k,d) np.ndarray
+ Initialization of the support locations (on k atoms) of the barycenter
+ b : (k,) np.ndarray
+ Initialization of the weights of the barycenter (non-negatives, sum to 1)
+ weights : (k,) np.ndarray
+ Initialization of the coefficients of the barycenter (non-negatives, sum to 1)
+
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+ Returns
+ -------
+ X : (k,d) np.ndarray
+ Support locations (on k atoms) of the barycenter
+
+ References
+ ----------
+
+ .. [1] Cuturi, Marco, and Arnaud Doucet. "Fast computation of Wasserstein barycenters." International Conference on Machine Learning. 2014.
+
+ .. [2] Álvarez-Esteban, Pedro C., et al. "A fixed-point approach to barycenters in Wasserstein space." Journal of Mathematical Analysis and Applications 441.2 (2016): 744-762.
+
+ """
+
+ iter_count = 0
+
+ N = len(measures_locations)
+ k = X_init.shape[0]
+ d = X_init.shape[1]
+ if b is None:
+ b = np.ones((k,))/k
+ if weights is None:
+ weights = np.ones((N,)) / N
+
+ X = X_init
+
+ log_dict = {}
+ displacement_square_norms = []
+
+ displacement_square_norm = stopThr + 1.
+
+ while ( displacement_square_norm > stopThr and iter_count < numItermax ):
+
+ T_sum = np.zeros((k, d))
+
+ for (measure_locations_i, measure_weights_i, weight_i) in zip(measures_locations, measures_weights, weights.tolist()):
+
+ M_i = dist(X, measure_locations_i)
+ T_i = emd(b, measure_weights_i, M_i)
+ T_sum = T_sum + weight_i * np.reshape(1. / b, (-1, 1)) * np.matmul(T_i, measure_locations_i)
+
+ displacement_square_norm = np.sum(np.square(T_sum-X))
+ if log:
+ displacement_square_norms.append(displacement_square_norm)
+
+ X = T_sum
+
+ if verbose:
+ print('iteration %d, displacement_square_norm=%f\n', iter_count, displacement_square_norm)
+
+ iter_count += 1
+
+ if log:
+ log_dict['displacement_square_norms'] = displacement_square_norms
+ return X, log_dict
+ else:
+ return X \ No newline at end of file
diff --git a/ot/lp/cvx.py b/ot/lp/cvx.py
index c8c75bc..8e763be 100644
--- a/ot/lp/cvx.py
+++ b/ot/lp/cvx.py
@@ -11,6 +11,7 @@ import numpy as np
import scipy as sp
import scipy.sparse as sps
+
try:
import cvxopt
from cvxopt import solvers, matrix, spmatrix
@@ -26,7 +27,7 @@ def scipy_sparse_to_spmatrix(A):
def barycenter(A, M, weights=None, verbose=False, log=False, solver='interior-point'):
- """Compute the entropic regularized wasserstein barycenter of distributions A
+ """Compute the Wasserstein barycenter of distributions A
The function solves the following optimization problem [16]:
diff --git a/ot/stochastic.py b/ot/stochastic.py
new file mode 100644
index 0000000..85c4230
--- /dev/null
+++ b/ot/stochastic.py
@@ -0,0 +1,736 @@
+# Author: Kilian Fatras <kilian.fatras@gmail.com>
+#
+# License: MIT License
+
+import numpy as np
+
+
+##############################################################################
+# Optimization toolbox for SEMI - DUAL problems
+##############################################################################
+
+
+def coordinate_grad_semi_dual(b, M, reg, beta, i):
+ '''
+ Compute the coordinate gradient update for regularized discrete distributions for (i, :)
+
+ The function computes the gradient of the semi dual problem:
+
+ .. math::
+ \max_v \sum_i (\sum_j v_j * b_j - reg * log(\sum_j exp((v_j - M_{i,j})/reg) * b_j)) * a_i
+
+ Where :
+
+ - M is the (ns,nt) metric cost matrix
+ - v is a dual variable in R^J
+ - reg is the regularization term
+ - a and b are source and target weights (sum to 1)
+
+ The algorithm used for solving the problem is the ASGD & SAG algorithms
+ as proposed in [18]_ [alg.1 & alg.2]
+
+
+ Parameters
+ ----------
+
+ b : np.ndarray(nt,)
+ target measure
+ M : np.ndarray(ns, nt)
+ cost matrix
+ reg : float nu
+ Regularization term > 0
+ v : np.ndarray(nt,)
+ dual variable
+ i : number int
+ picked number i
+
+ Returns
+ -------
+
+ coordinate gradient : np.ndarray(nt,)
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 300000
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> method = "ASGD"
+ >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+ method, numItermax)
+ >>> print(asgd_pi)
+
+ References
+ ----------
+
+ [Genevay et al., 2016] :
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
+
+ '''
+
+ r = M[i, :] - beta
+ exp_beta = np.exp(-r / reg) * b
+ khi = exp_beta / (np.sum(exp_beta))
+ return b - khi
+
+
+def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
+ '''
+ Compute the SAG algorithm to solve the regularized discrete measures
+ optimal transport max problem
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+ s.t. \gamma 1 = a
+
+ \gamma^T 1 = b
+
+ \gamma \geq 0
+
+ Where :
+
+ - M is the (ns,nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term with :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights (sum to 1)
+
+ The algorithm used for solving the problem is the SAG algorithm
+ as proposed in [18]_ [alg.1]
+
+
+ Parameters
+ ----------
+
+ a : np.ndarray(ns,),
+ source measure
+ b : np.ndarray(nt,),
+ target measure
+ M : np.ndarray(ns, nt),
+ cost matrix
+ reg : float number,
+ Regularization term > 0
+ numItermax : int number
+ number of iteration
+ lr : float number
+ learning rate
+
+ Returns
+ -------
+
+ v : np.ndarray(nt,)
+ dual variable
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 300000
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> method = "ASGD"
+ >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+ method, numItermax)
+ >>> print(asgd_pi)
+
+ References
+ ----------
+
+ [Genevay et al., 2016] :
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
+ '''
+
+ if lr is None:
+ lr = 1. / max(a / reg)
+ n_source = np.shape(M)[0]
+ n_target = np.shape(M)[1]
+ cur_beta = np.zeros(n_target)
+ stored_gradient = np.zeros((n_source, n_target))
+ sum_stored_gradient = np.zeros(n_target)
+ for _ in range(numItermax):
+ i = np.random.randint(n_source)
+ cur_coord_grad = a[i] * coordinate_grad_semi_dual(b, M, reg,
+ cur_beta, i)
+ sum_stored_gradient += (cur_coord_grad - stored_gradient[i])
+ stored_gradient[i] = cur_coord_grad
+ cur_beta += lr * (1. / n_source) * sum_stored_gradient
+ return cur_beta
+
+
+def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
+ '''
+ Compute the ASGD algorithm to solve the regularized semi continous measures optimal transport max problem
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+ s.t. \gamma 1 = a
+
+ \gamma^T 1= b
+
+ \gamma \geq 0
+
+ Where :
+
+ - M is the (ns,nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term with :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights (sum to 1)
+
+ The algorithm used for solving the problem is the ASGD algorithm
+ as proposed in [18]_ [alg.2]
+
+
+ Parameters
+ ----------
+
+ b : np.ndarray(nt,)
+ target measure
+ M : np.ndarray(ns, nt)
+ cost matrix
+ reg : float number
+ Regularization term > 0
+ numItermax : int number
+ number of iteration
+ lr : float number
+ learning rate
+
+
+ Returns
+ -------
+
+ ave_v : np.ndarray(nt,)
+ dual variable
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 300000
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> method = "ASGD"
+ >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+ method, numItermax)
+ >>> print(asgd_pi)
+
+ References
+ ----------
+
+ [Genevay et al., 2016] :
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
+ '''
+
+ if lr is None:
+ lr = 1. / max(a / reg)
+ n_source = np.shape(M)[0]
+ n_target = np.shape(M)[1]
+ cur_beta = np.zeros(n_target)
+ ave_beta = np.zeros(n_target)
+ for cur_iter in range(numItermax):
+ k = cur_iter + 1
+ i = np.random.randint(n_source)
+ cur_coord_grad = coordinate_grad_semi_dual(b, M, reg, cur_beta, i)
+ cur_beta += (lr / np.sqrt(k)) * cur_coord_grad
+ ave_beta = (1. / k) * cur_beta + (1 - 1. / k) * ave_beta
+ return ave_beta
+
+
+def c_transform_entropic(b, M, reg, beta):
+ '''
+ The goal is to recover u from the c-transform.
+
+ The function computes the c_transform of a dual variable from the other
+ dual variable:
+
+ .. math::
+ u = v^{c,reg} = -reg \sum_j exp((v - M)/reg) b_j
+
+ Where :
+
+ - M is the (ns,nt) metric cost matrix
+ - u, v are dual variables in R^IxR^J
+ - reg is the regularization term
+
+ It is used to recover an optimal u from optimal v solving the semi dual
+ problem, see Proposition 2.1 of [18]_
+
+
+ Parameters
+ ----------
+
+ b : np.ndarray(nt,)
+ target measure
+ M : np.ndarray(ns, nt)
+ cost matrix
+ reg : float
+ regularization term > 0
+ v : np.ndarray(nt,)
+ dual variable
+
+ Returns
+ -------
+
+ u : np.ndarray(ns,)
+ dual variable
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 300000
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> method = "ASGD"
+ >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+ method, numItermax)
+ >>> print(asgd_pi)
+
+ References
+ ----------
+
+ [Genevay et al., 2016] :
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
+ '''
+
+ n_source = np.shape(M)[0]
+ alpha = np.zeros(n_source)
+ for i in range(n_source):
+ r = M[i, :] - beta
+ min_r = np.min(r)
+ exp_beta = np.exp(-(r - min_r) / reg) * b
+ alpha[i] = min_r - reg * np.log(np.sum(exp_beta))
+ return alpha
+
+
+def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
+ log=False):
+ '''
+ Compute the transportation matrix to solve the regularized discrete
+ measures optimal transport max problem
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+ s.t. \gamma 1 = a
+
+ \gamma^T 1= b
+
+ \gamma \geq 0
+
+ Where :
+
+ - M is the (ns,nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term with :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights (sum to 1)
+ The algorithm used for solving the problem is the SAG or ASGD algorithms
+ as proposed in [18]_
+
+
+ Parameters
+ ----------
+
+ a : np.ndarray(ns,)
+ source measure
+ b : np.ndarray(nt,)
+ target measure
+ M : np.ndarray(ns, nt)
+ cost matrix
+ reg : float number
+ Regularization term > 0
+ methode : str
+ used method (SAG or ASGD)
+ numItermax : int number
+ number of iteration
+ lr : float number
+ learning rate
+ n_source : int number
+ size of the source measure
+ n_target : int number
+ size of the target measure
+ log : bool, optional
+ record log if True
+
+ Returns
+ -------
+
+ pi : np.ndarray(ns, nt)
+ transportation matrix
+ log : dict
+ log dictionary return only if log==True in parameters
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 300000
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> method = "ASGD"
+ >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+ method, numItermax)
+ >>> print(asgd_pi)
+
+ References
+ ----------
+
+ [Genevay et al., 2016] :
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
+ '''
+
+ if method.lower() == "sag":
+ opt_beta = sag_entropic_transport(a, b, M, reg, numItermax, lr)
+ elif method.lower() == "asgd":
+ opt_beta = averaged_sgd_entropic_transport(a, b, M, reg, numItermax, lr)
+ else:
+ print("Please, select your method between SAG and ASGD")
+ return None
+
+ opt_alpha = c_transform_entropic(b, M, reg, opt_beta)
+ pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) *
+ a[:, None] * b[None, :])
+
+ if log:
+ log = {}
+ log['alpha'] = opt_alpha
+ log['beta'] = opt_beta
+ return pi, log
+ else:
+ return pi
+
+
+##############################################################################
+# Optimization toolbox for DUAL problems
+##############################################################################
+
+
+def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
+ batch_beta):
+ '''
+ Computes the partial gradient of the dual optimal transport problem.
+
+ For each (i,j) in a batch of coordinates, the partial gradients are :
+
+ .. math::
+ \partial_{u_i} F = u_i * b_s/l_{v} - \sum_{j \in B_v} exp((u_i + v_j - M_{i,j})/reg) * a_i * b_j
+
+ \partial_{v_j} F = v_j * b_s/l_{u} - \sum_{i \in B_u} exp((u_i + v_j - M_{i,j})/reg) * a_i * b_j
+
+ Where :
+
+ - M is the (ns,nt) metric cost matrix
+ - u, v are dual variables in R^ixR^J
+ - reg is the regularization term
+ - :math:`B_u` and :math:`B_v` are lists of index
+ - :math:`b_s` is the size of the batchs :math:`B_u` and :math:`B_v`
+ - :math:`l_u` and :math:`l_v` are the lenghts of :math:`B_u` and :math:`B_v`
+ - a and b are source and target weights (sum to 1)
+
+
+ The algorithm used for solving the dual problem is the SGD algorithm
+ as proposed in [19]_ [alg.1]
+
+
+ Parameters
+ ----------
+
+ a : np.ndarray(ns,)
+ source measure
+ b : np.ndarray(nt,)
+ target measure
+ M : np.ndarray(ns, nt)
+ cost matrix
+ reg : float number
+ Regularization term > 0
+ alpha : np.ndarray(ns,)
+ dual variable
+ beta : np.ndarray(nt,)
+ dual variable
+ batch_size : int number
+ size of the batch
+ batch_alpha : np.ndarray(bs,)
+ batch of index of alpha
+ batch_beta : np.ndarray(bs,)
+ batch of index of beta
+
+ Returns
+ -------
+
+ grad : np.ndarray(ns,)
+ partial grad F
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 20000
+ >>> lr = 0.1
+ >>> batch_size = 3
+ >>> log = True
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+ batch_size,
+ numItermax, lr, log)
+ >>> print(log['alpha'], log['beta'])
+ >>> print(sgd_dual_pi)
+
+ References
+ ----------
+
+ [Seguy et al., 2018] :
+ International Conference on Learning Representation (2018),
+ arXiv preprint arxiv:1711.02283.
+ '''
+
+ G = - (np.exp((alpha[batch_alpha, None] + beta[None, batch_beta] -
+ M[batch_alpha, :][:, batch_beta]) / reg) *
+ a[batch_alpha, None] * b[None, batch_beta])
+ grad_beta = np.zeros(np.shape(M)[1])
+ grad_alpha = np.zeros(np.shape(M)[0])
+ grad_beta[batch_beta] = (b[batch_beta] * len(batch_alpha) / np.shape(M)[0]
+ + G.sum(0))
+ grad_alpha[batch_alpha] = (a[batch_alpha] * len(batch_beta)
+ / np.shape(M)[1] + G.sum(1))
+
+ return grad_alpha, grad_beta
+
+
+def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
+ '''
+ Compute the sgd algorithm to solve the regularized discrete measures
+ optimal transport dual problem
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+ s.t. \gamma 1 = a
+
+ \gamma^T 1= b
+
+ \gamma \geq 0
+
+ Where :
+
+ - M is the (ns,nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term with :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights (sum to 1)
+
+ Parameters
+ ----------
+
+ a : np.ndarray(ns,)
+ source measure
+ b : np.ndarray(nt,)
+ target measure
+ M : np.ndarray(ns, nt)
+ cost matrix
+ reg : float number
+ Regularization term > 0
+ batch_size : int number
+ size of the batch
+ numItermax : int number
+ number of iteration
+ lr : float number
+ learning rate
+
+ Returns
+ -------
+
+ alpha : np.ndarray(ns,)
+ dual variable
+ beta : np.ndarray(nt,)
+ dual variable
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 20000
+ >>> lr = 0.1
+ >>> batch_size = 3
+ >>> log = True
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+ batch_size,
+ numItermax, lr, log)
+ >>> print(log['alpha'], log['beta'])
+ >>> print(sgd_dual_pi)
+
+ References
+ ----------
+
+ [Seguy et al., 2018] :
+ International Conference on Learning Representation (2018),
+ arXiv preprint arxiv:1711.02283.
+ '''
+
+ n_source = np.shape(M)[0]
+ n_target = np.shape(M)[1]
+ cur_alpha = np.zeros(n_source)
+ cur_beta = np.zeros(n_target)
+ for cur_iter in range(numItermax):
+ k = np.sqrt(cur_iter + 1)
+ batch_alpha = np.random.choice(n_source, batch_size, replace=False)
+ batch_beta = np.random.choice(n_target, batch_size, replace=False)
+ update_alpha, update_beta = batch_grad_dual(a, b, M, reg, cur_alpha,
+ cur_beta, batch_size,
+ batch_alpha, batch_beta)
+ cur_alpha[batch_alpha] += (lr / k) * update_alpha[batch_alpha]
+ cur_beta[batch_beta] += (lr / k) * update_beta[batch_beta]
+
+ return cur_alpha, cur_beta
+
+
+def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
+ log=False):
+ '''
+ Compute the transportation matrix to solve the regularized discrete measures
+ optimal transport dual problem
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+ s.t. \gamma 1 = a
+
+ \gamma^T 1= b
+
+ \gamma \geq 0
+
+ Where :
+
+ - M is the (ns,nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights (sum to 1)
+
+ Parameters
+ ----------
+
+ a : np.ndarray(ns,)
+ source measure
+ b : np.ndarray(nt,)
+ target measure
+ M : np.ndarray(ns, nt)
+ cost matrix
+ reg : float number
+ Regularization term > 0
+ batch_size : int number
+ size of the batch
+ numItermax : int number
+ number of iteration
+ lr : float number
+ learning rate
+ log : bool, optional
+ record log if True
+
+ Returns
+ -------
+
+ pi : np.ndarray(ns, nt)
+ transportation matrix
+ log : dict
+ log dictionary return only if log==True in parameters
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 20000
+ >>> lr = 0.1
+ >>> batch_size = 3
+ >>> log = True
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+ batch_size,
+ numItermax, lr, log)
+ >>> print(log['alpha'], log['beta'])
+ >>> print(sgd_dual_pi)
+
+ References
+ ----------
+
+ [Seguy et al., 2018] :
+ International Conference on Learning Representation (2018),
+ arXiv preprint arxiv:1711.02283.
+ '''
+
+ opt_alpha, opt_beta = sgd_entropic_regularization(a, b, M, reg, batch_size,
+ numItermax, lr)
+ pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) *
+ a[:, None] * b[None, :])
+ if log:
+ log = {}
+ log['alpha'] = opt_alpha
+ log['beta'] = opt_beta
+ return pi, log
+ else:
+ return pi
diff --git a/ot/utils.py b/ot/utils.py
index 7dac283..bb21b38 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -77,6 +77,34 @@ def clean_zeros(a, b, M):
return a2, b2, M2
+def euclidean_distances(X, Y, squared=False):
+ """
+ Considering the rows of X (and Y=X) as vectors, compute the
+ distance matrix between each pair of vectors.
+ Parameters
+ ----------
+ X : {array-like}, shape (n_samples_1, n_features)
+ Y : {array-like}, shape (n_samples_2, n_features)
+ squared : boolean, optional
+ Return squared Euclidean distances.
+ Returns
+ -------
+ distances : {array}, shape (n_samples_1, n_samples_2)
+ """
+ XX = np.einsum('ij,ij->i', X, X)[:, np.newaxis]
+ YY = np.einsum('ij,ij->i', Y, Y)[np.newaxis, :]
+ distances = np.dot(X, Y.T)
+ distances *= -2
+ distances += XX
+ distances += YY
+ np.maximum(distances, 0, out=distances)
+ if X is Y:
+ # Ensure that distances between vectors and themselves are set to 0.0.
+ # This may not be the case due to floating point rounding errors.
+ distances.flat[::distances.shape[0] + 1] = 0.0
+ return distances if squared else np.sqrt(distances, out=distances)
+
+
def dist(x1, x2=None, metric='sqeuclidean'):
"""Compute distance between samples in x1 and x2 using function scipy.spatial.distance.cdist
@@ -104,7 +132,8 @@ def dist(x1, x2=None, metric='sqeuclidean'):
"""
if x2 is None:
x2 = x1
-
+ if metric == "sqeuclidean":
+ return euclidean_distances(x1, x2, squared=True)
return cdist(x1, x2, metric=metric)