diff options
author | tvayer <titouan.vayer@gmail.com> | 2019-05-29 14:24:05 +0200 |
---|---|---|
committer | tvayer <titouan.vayer@gmail.com> | 2019-05-29 14:24:05 +0200 |
commit | 63bbeb34e48f02c97a762dab5232158d90a5cffc (patch) | |
tree | 853026b5854b6e4b01fdf750db139985b3dd596f /ot | |
parent | f70aabfcc11f92181e0dc987b341bad8ec030d75 (diff) | |
parent | f66ab58c7c895011fd37bafd3e848828399c56c4 (diff) |
Merge remote-tracking branch 'rflamary/master'
merge pot
Diffstat (limited to 'ot')
-rw-r--r-- | ot/__init__.py | 4 | ||||
-rw-r--r-- | ot/bregman.py | 611 | ||||
-rw-r--r-- | ot/da.py | 407 | ||||
-rw-r--r-- | ot/datasets.py | 4 | ||||
-rw-r--r-- | ot/externals/funcsigs.py | 46 | ||||
-rw-r--r-- | ot/gpu/__init__.py | 33 | ||||
-rw-r--r-- | ot/gpu/bregman.py | 148 | ||||
-rw-r--r-- | ot/gpu/da.py | 213 | ||||
-rw-r--r-- | ot/gpu/utils.py | 101 | ||||
-rw-r--r-- | ot/gromov.py | 2 | ||||
-rw-r--r-- | ot/lp/__init__.py | 95 | ||||
-rw-r--r-- | ot/lp/cvx.py | 3 | ||||
-rw-r--r-- | ot/stochastic.py | 736 | ||||
-rw-r--r-- | ot/utils.py | 31 |
14 files changed, 1822 insertions, 612 deletions
diff --git a/ot/__init__.py b/ot/__init__.py index 995ce33..b74b924 100644 --- a/ot/__init__.py +++ b/ot/__init__.py @@ -19,7 +19,7 @@ from . import datasets from . import da from . import gromov from . import smooth - +from . import stochastic # OT functions from .lp import emd, emd2 @@ -29,7 +29,7 @@ from .da import sinkhorn_lpl1_mm # utils functions from .utils import dist, unif, tic, toc, toq -__version__ = "0.4.0" +__version__ = "0.5.1" __all__ = ["emd", "emd2", "sinkhorn", "sinkhorn2", "utils", 'datasets', 'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov', diff --git a/ot/bregman.py b/ot/bregman.py index ffa6202..321712b 100644 --- a/ot/bregman.py +++ b/ot/bregman.py @@ -11,6 +11,7 @@ Bregman projections for regularized OT # License: MIT License import numpy as np +from .utils import unif, dist def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, @@ -49,7 +50,7 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, reg : float Regularization term >0 method : str - method used for the solver either 'sinkhorn', 'sinkhorn_stabilized' or + method used for the solver either 'sinkhorn', 'greenkhorn', 'sinkhorn_stabilized' or 'sinkhorn_epsilon_scaling', see those function for specific parameters numItermax : int, optional Max number of iterations @@ -105,6 +106,10 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, def sink(): return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax, stopThr=stopThr, verbose=verbose, log=log, **kwargs) + elif method.lower() == 'greenkhorn': + def sink(): + return greenkhorn(a, b, M, reg, numItermax=numItermax, + stopThr=stopThr, verbose=verbose, log=log) elif method.lower() == 'sinkhorn_stabilized': def sink(): return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax, @@ -118,7 +123,8 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, print('Warning : unknown method using classic Sinkhorn Knopp') def sink(): - return sinkhorn_knopp(a, b, M, reg, **kwargs) + return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax, + stopThr=stopThr, verbose=verbose, log=log, **kwargs) return sink() @@ -199,6 +205,8 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000, .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816. + [21] Altschuler J., Weed J., Rigollet P. : Near-linear time approximation algorithms for optimal transport via Sinkhorn iteration, Advances in Neural Information Processing Systems (NIPS) 31, 2017 + See Also @@ -206,6 +214,7 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000, ot.lp.emd : Unregularized OT ot.optim.cg : General regularized OT ot.bregman.sinkhorn_knopp : Classic Sinkhorn [2] + ot.bregman.greenkhorn : Greenkhorn [21] ot.bregman.sinkhorn_stabilized: Stabilized sinkhorn [9][10] ot.bregman.sinkhorn_epsilon_scaling: Sinkhorn with epslilon scaling [9][10] @@ -346,8 +355,13 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, # print(reg) - K = np.exp(-M / reg) + # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute + K = np.empty(M.shape, dtype=M.dtype) + np.divide(M, -reg, out=K) + np.exp(K, out=K) + # print(np.min(K)) + tmp2 = np.empty(b.shape, dtype=M.dtype) Kp = (1 / a).reshape(-1, 1) * K cpt = 0 @@ -355,13 +369,14 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, while (err > stopThr and cpt < numItermax): uprev = u vprev = v + KtransposeU = np.dot(K.T, u) v = np.divide(b, KtransposeU) u = 1. / np.dot(Kp, v) - if (np.any(KtransposeU == 0) or - np.any(np.isnan(u)) or np.any(np.isnan(v)) or - np.any(np.isinf(u)) or np.any(np.isinf(v))): + if (np.any(KtransposeU == 0) + or np.any(np.isnan(u)) or np.any(np.isnan(v)) + or np.any(np.isinf(u)) or np.any(np.isinf(v))): # we have reached the machine precision # come back to previous solution and quit loop print('Warning: numerical errors at iteration', cpt) @@ -375,8 +390,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, err = np.sum((u - uprev)**2) / np.sum((u)**2) + \ np.sum((v - vprev)**2) / np.sum((v)**2) else: - transp = u.reshape(-1, 1) * (K * v) - err = np.linalg.norm((np.sum(transp, axis=0) - b))**2 + # compute right marginal tmp2= (diag(u)Kdiag(v))^T1 + np.einsum('i,ij,j->j', u, K, v, out=tmp2) + err = np.linalg.norm(tmp2 - b)**2 # violation of marginal if log: log['err'].append(err) @@ -391,10 +407,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, log['v'] = v if nbb: # return only loss - res = np.zeros((nbb)) - for i in range(nbb): - res[i] = np.sum( - u[:, i].reshape((-1, 1)) * K * v[:, i].reshape((1, -1)) * M) + res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) if log: return res, log else: @@ -408,6 +421,159 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, return u.reshape((-1, 1)) * K * v.reshape((1, -1)) +def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=False): + """ + Solve the entropic regularization optimal transport problem and return the OT matrix + + The algorithm used is based on the paper + + Near-linear time approximation algorithms for optimal transport via Sinkhorn iteration + by Jason Altschuler, Jonathan Weed, Philippe Rigollet + appeared at NIPS 2017 + + which is a stochastic version of the Sinkhorn-Knopp algorithm [2]. + + The function solves the following optimization problem: + + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + + s.t. \gamma 1 = a + + \gamma^T 1= b + + \gamma\geq 0 + where : + + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + + + + Parameters + ---------- + a : np.ndarray (ns,) + samples weights in the source domain + b : np.ndarray (nt,) or np.ndarray (nt,nbb) + samples in the target domain, compute sinkhorn with multiple targets + and fixed M if b is a matrix (return OT loss + dual variables in log) + M : np.ndarray (ns,nt) + loss matrix + reg : float + Regularization term >0 + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + log : bool, optional + record log if True + + + Returns + ------- + gamma : (ns x nt) ndarray + Optimal transportation matrix for the given parameters + log : dict + log dictionary return only if log==True in parameters + + Examples + -------- + + >>> import ot + >>> a=[.5,.5] + >>> b=[.5,.5] + >>> M=[[0.,1.],[1.,0.]] + >>> ot.bregman.greenkhorn(a,b,M,1) + array([[ 0.36552929, 0.13447071], + [ 0.13447071, 0.36552929]]) + + + References + ---------- + + .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013 + [22] J. Altschuler, J.Weed, P. Rigollet : Near-linear time approximation algorithms for optimal transport via Sinkhorn iteration, Advances in Neural Information Processing Systems (NIPS) 31, 2017 + + + See Also + -------- + ot.lp.emd : Unregularized OT + ot.optim.cg : General regularized OT + + """ + + a = np.asarray(a, dtype=np.float64) + b = np.asarray(b, dtype=np.float64) + M = np.asarray(M, dtype=np.float64) + + if len(a) == 0: + a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0] + if len(b) == 0: + b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] + + n = a.shape[0] + m = b.shape[0] + + # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute + K = np.empty_like(M) + np.divide(M, -reg, out=K) + np.exp(K, out=K) + + u = np.full(n, 1. / n) + v = np.full(m, 1. / m) + G = u[:, np.newaxis] * K * v[np.newaxis, :] + + viol = G.sum(1) - a + viol_2 = G.sum(0) - b + stopThr_val = 1 + + if log: + log = dict() + log['u'] = u + log['v'] = v + + for i in range(numItermax): + i_1 = np.argmax(np.abs(viol)) + i_2 = np.argmax(np.abs(viol_2)) + m_viol_1 = np.abs(viol[i_1]) + m_viol_2 = np.abs(viol_2[i_2]) + stopThr_val = np.maximum(m_viol_1, m_viol_2) + + if m_viol_1 > m_viol_2: + old_u = u[i_1] + u[i_1] = a[i_1] / (K[i_1, :].dot(v)) + G[i_1, :] = u[i_1] * K[i_1, :] * v + + viol[i_1] = u[i_1] * K[i_1, :].dot(v) - a[i_1] + viol_2 += (K[i_1, :].T * (u[i_1] - old_u) * v) + + else: + old_v = v[i_2] + v[i_2] = b[i_2] / (K[:, i_2].T.dot(u)) + G[:, i_2] = u * K[:, i_2] * v[i_2] + #aviol = (G@one_m - a) + #aviol_2 = (G.T@one_n - b) + viol += (-old_v + v[i_2]) * K[:, i_2] * u + viol_2[i_2] = v[i_2] * K[:, i_2].dot(u) - b[i_2] + + #print('b',np.max(abs(aviol -viol)),np.max(abs(aviol_2 - viol_2))) + + if stopThr_val <= stopThr: + break + else: + print('Warning: Algorithm did not converge') + + if log: + log['u'] = u + log['v'] = v + + if log: + return G, log + else: + return G + + def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9, warmstart=None, verbose=False, print_period=20, log=False, **kwargs): """ @@ -532,13 +698,13 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9, def get_K(alpha, beta): """log space computation""" - return np.exp(-(M - alpha.reshape((na, 1)) - - beta.reshape((1, nb))) / reg) + return np.exp(-(M - alpha.reshape((na, 1)) + - beta.reshape((1, nb))) / reg) def get_Gamma(alpha, beta, u, v): """log space gamma computation""" - return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) / - reg + np.log(u.reshape((na, 1))) + np.log(v.reshape((1, nb)))) + return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) + / reg + np.log(u.reshape((na, 1))) + np.log(v.reshape((1, nb)))) # print(np.min(K)) @@ -748,8 +914,8 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne def get_K(alpha, beta): """log space computation""" - return np.exp(-(M - alpha.reshape((na, 1)) - - beta.reshape((1, nb))) / reg) + return np.exp(-(M - alpha.reshape((na, 1)) + - beta.reshape((1, nb))) / reg) # print(np.min(K)) def get_reg(n): # exponential decreasing @@ -917,6 +1083,116 @@ def barycenter(A, M, reg, weights=None, numItermax=1000, return geometricBar(weights, UKv) +def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1e-9, stabThr=1e-30, verbose=False, log=False): + """Compute the entropic regularized wasserstein barycenter of distributions A + where A is a collection of 2D images. + + The function solves the following optimization problem: + + .. math:: + \mathbf{a} = arg\min_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i) + + where : + + - :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance (see ot.bregman.sinkhorn) + - :math:`\mathbf{a}_i` are training distributions (2D images) in the mast two dimensions of matrix :math:`\mathbf{A}` + - reg is the regularization strength scalar value + + The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [21]_ + + Parameters + ---------- + A : np.ndarray (n,w,h) + n distributions (2D images) of size w x h + reg : float + Regularization term >0 + weights : np.ndarray (n,) + Weights of each image on the simplex (barycentric coodinates) + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + stabThr : float, optional + Stabilization threshold to avoid numerical precision issue + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + + Returns + ------- + a : (w,h) ndarray + 2D Wasserstein barycenter + log : dict + log dictionary return only if log==True in parameters + + + References + ---------- + + .. [21] Solomon, J., De Goes, F., Peyré, G., Cuturi, M., Butscher, A., Nguyen, A. & Guibas, L. (2015). + Convolutional wasserstein distances: Efficient optimal transportation on geometric domains + ACM Transactions on Graphics (TOG), 34(4), 66 + + + """ + + if weights is None: + weights = np.ones(A.shape[0]) / A.shape[0] + else: + assert(len(weights) == A.shape[0]) + + if log: + log = {'err': []} + + b = np.zeros_like(A[0, :, :]) + U = np.ones_like(A) + KV = np.ones_like(A) + + cpt = 0 + err = 1 + + # build the convolution operator + t = np.linspace(0, 1, A.shape[1]) + [Y, X] = np.meshgrid(t, t) + xi1 = np.exp(-(X - Y)**2 / reg) + + def K(x): + return np.dot(np.dot(xi1, x), xi1) + + while (err > stopThr and cpt < numItermax): + + bold = b + cpt = cpt + 1 + + b = np.zeros_like(A[0, :, :]) + for r in range(A.shape[0]): + KV[r, :, :] = K(A[r, :, :] / np.maximum(stabThr, K(U[r, :, :]))) + b += weights[r] * np.log(np.maximum(stabThr, U[r, :, :] * KV[r, :, :])) + b = np.exp(b) + for r in range(A.shape[0]): + U[r, :, :] = b / np.maximum(stabThr, KV[r, :, :]) + + if cpt % 10 == 1: + err = np.sum(np.abs(bold - b)) + # log and verbose print + if log: + log['err'].append(err) + + if verbose: + if cpt % 200 == 0: + print('{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) + print('{:5d}|{:8e}|'.format(cpt, err)) + + if log: + log['niter'] = cpt + log['U'] = U + return b, log + else: + return b + + def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000, stopThr=1e-3, verbose=False, log=False): """ @@ -1023,3 +1299,302 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000, return np.sum(K0, axis=1), log else: return np.sum(K0, axis=1) + + +def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs): + ''' + Solve the entropic regularization optimal transport problem and return the + OT matrix from empirical data + + The function solves the following optimization problem: + + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + + s.t. \gamma 1 = a + + \gamma^T 1= b + + \gamma\geq 0 + where : + + - :math:`M` is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`a` and :math:`b` are source and target weights (sum to 1) + + + Parameters + ---------- + X_s : np.ndarray (ns, d) + samples in the source domain + X_t : np.ndarray (nt, d) + samples in the target domain + reg : float + Regularization term >0 + a : np.ndarray (ns,) + samples weights in the source domain + b : np.ndarray (nt,) + samples weights in the target domain + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + + Returns + ------- + gamma : (ns x nt) ndarray + Regularized optimal transportation matrix for the given parameters + log : dict + log dictionary return only if log==True in parameters + + Examples + -------- + + >>> n_s = 2 + >>> n_t = 2 + >>> reg = 0.1 + >>> X_s = np.reshape(np.arange(n_s), (n_s, 1)) + >>> X_t = np.reshape(np.arange(0, n_t), (n_t, 1)) + >>> emp_sinkhorn = empirical_sinkhorn(X_s, X_t, reg, verbose=False) + >>> print(emp_sinkhorn) + >>> [[4.99977301e-01 2.26989344e-05] + [2.26989344e-05 4.99977301e-01]] + + + References + ---------- + + .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013 + + .. [9] Schmitzer, B. (2016). Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. arXiv preprint arXiv:1610.06519. + + .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816. + ''' + + if a is None: + a = unif(np.shape(X_s)[0]) + if b is None: + b = unif(np.shape(X_t)[0]) + + M = dist(X_s, X_t, metric=metric) + + if log: + pi, log = sinkhorn(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=True, **kwargs) + return pi, log + else: + pi = sinkhorn(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=False, **kwargs) + return pi + + +def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs): + ''' + Solve the entropic regularization optimal transport problem from empirical + data and return the OT loss + + + The function solves the following optimization problem: + + .. math:: + W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + + s.t. \gamma 1 = a + + \gamma^T 1= b + + \gamma\geq 0 + where : + + - :math:`M` is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`a` and :math:`b` are source and target weights (sum to 1) + + + Parameters + ---------- + X_s : np.ndarray (ns, d) + samples in the source domain + X_t : np.ndarray (nt, d) + samples in the target domain + reg : float + Regularization term >0 + a : np.ndarray (ns,) + samples weights in the source domain + b : np.ndarray (nt,) + samples weights in the target domain + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + + Returns + ------- + gamma : (ns x nt) ndarray + Regularized optimal transportation matrix for the given parameters + log : dict + log dictionary return only if log==True in parameters + + Examples + -------- + + >>> n_s = 2 + >>> n_t = 2 + >>> reg = 0.1 + >>> X_s = np.reshape(np.arange(n_s), (n_s, 1)) + >>> X_t = np.reshape(np.arange(0, n_t), (n_t, 1)) + >>> loss_sinkhorn = empirical_sinkhorn2(X_s, X_t, reg, verbose=False) + >>> print(loss_sinkhorn) + >>> [4.53978687e-05] + + + References + ---------- + + .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013 + + .. [9] Schmitzer, B. (2016). Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. arXiv preprint arXiv:1610.06519. + + .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816. + ''' + + if a is None: + a = unif(np.shape(X_s)[0]) + if b is None: + b = unif(np.shape(X_t)[0]) + + M = dist(X_s, X_t, metric=metric) + + if log: + sinkhorn_loss, log = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log, **kwargs) + return sinkhorn_loss, log + else: + sinkhorn_loss = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log, **kwargs) + return sinkhorn_loss + + +def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs): + ''' + Compute the sinkhorn divergence loss from empirical data + + The function solves the following optimization problems and return the + sinkhorn divergence :math:`S`: + + .. math:: + + W &= \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + + W_a &= \min_{\gamma_a} <\gamma_a,M_a>_F + reg\cdot\Omega(\gamma_a) + + W_b &= \min_{\gamma_b} <\gamma_b,M_b>_F + reg\cdot\Omega(\gamma_b) + + S &= W - 1/2 * (W_a + W_b) + + .. math:: + s.t. \gamma 1 = a + + \gamma^T 1= b + + \gamma\geq 0 + + \gamma_a 1 = a + + \gamma_a^T 1= a + + \gamma_a\geq 0 + + \gamma_b 1 = b + + \gamma_b^T 1= b + + \gamma_b\geq 0 + where : + + - :math:`M` (resp. :math:`M_a, M_b`) is the (ns,nt) metric cost matrix (resp (ns, ns) and (nt, nt)) + - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`a` and :math:`b` are source and target weights (sum to 1) + + + Parameters + ---------- + X_s : np.ndarray (ns, d) + samples in the source domain + X_t : np.ndarray (nt, d) + samples in the target domain + reg : float + Regularization term >0 + a : np.ndarray (ns,) + samples weights in the source domain + b : np.ndarray (nt,) + samples weights in the target domain + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + + Returns + ------- + gamma : (ns x nt) ndarray + Regularized optimal transportation matrix for the given parameters + log : dict + log dictionary return only if log==True in parameters + + Examples + -------- + + >>> n_s = 2 + >>> n_t = 4 + >>> reg = 0.1 + >>> X_s = np.reshape(np.arange(n_s), (n_s, 1)) + >>> X_t = np.reshape(np.arange(0, n_t), (n_t, 1)) + >>> emp_sinkhorn_div = empirical_sinkhorn_divergence(X_s, X_t, reg) + >>> print(emp_sinkhorn_div) + >>> [2.99977435] + + + References + ---------- + + .. [23] Aude Genevay, Gabriel Peyré, Marco Cuturi, Learning Generative Models with Sinkhorn Divergences, Proceedings of the Twenty-First International Conference on Artficial Intelligence and Statistics, (AISTATS) 21, 2018 + ''' + if log: + sinkhorn_loss_ab, log_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs) + + sinkhorn_loss_a, log_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs) + + sinkhorn_loss_b, log_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs) + + sinkhorn_div = sinkhorn_loss_ab - 1 / 2 * (sinkhorn_loss_a + sinkhorn_loss_b) + + log = {} + log['sinkhorn_loss_ab'] = sinkhorn_loss_ab + log['sinkhorn_loss_a'] = sinkhorn_loss_a + log['sinkhorn_loss_b'] = sinkhorn_loss_b + log['log_sinkhorn_ab'] = log_ab + log['log_sinkhorn_a'] = log_a + log['log_sinkhorn_b'] = log_b + + return max(0, sinkhorn_div), log + + else: + sinkhorn_loss_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs) + + sinkhorn_loss_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs) + + sinkhorn_loss_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs) + + sinkhorn_div = sinkhorn_loss_ab - 1 / 2 * (sinkhorn_loss_a + sinkhorn_loss_b) + return max(0, sinkhorn_div) @@ -15,7 +15,7 @@ import scipy.linalg as linalg from .bregman import sinkhorn from .lp import emd from .utils import unif, dist, kernel, cost_normalization -from .utils import check_params, deprecated, BaseEstimator +from .utils import check_params, BaseEstimator from .optim import cg from .optim import gcg @@ -473,22 +473,24 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian', Weight for the linear OT loss (>0) eta : float, optional Regularization term for the linear mapping L (>0) - bias : bool,optional - Estimate linear mapping with constant bias kerneltype : str,optional kernel used by calling function ot.utils.kernel (gaussian by default) sigma : float, optional Gaussian kernel bandwidth. + bias : bool,optional + Estimate linear mapping with constant bias + verbose : bool, optional + Print information along iterations + verbose2 : bool, optional + Print information along iterations numItermax : int, optional Max number of BCD iterations - stopThr : float, optional - Stop threshold on relative loss decrease (>0) numInnerItermax : int, optional Max number of iterations (inner CG solver) stopInnerThr : float, optional Stop threshold on error (inner CG solver) (>0) - verbose : bool, optional - Print information along iterations + stopThr : float, optional + Stop threshold on relative loss decrease (>0) log : bool, optional record log if True @@ -643,7 +645,8 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None, The function estimates the optimal linear operator that aligns the two empirical distributions. This is equivalent to estimating the closed form mapping between two Gaussian distributions :math:`N(\mu_s,\Sigma_s)` - and :math:`N(\mu_t,\Sigma_t)` as proposed in [14] and discussed in remark 2.29 in [15]. + and :math:`N(\mu_t,\Sigma_t)` as proposed in [14] and discussed in remark + 2.29 in [15]. The linear operator from source to target :math:`M` @@ -740,288 +743,6 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None, return A, b -@deprecated("The class OTDA is deprecated in 0.3.1 and will be " - "removed in 0.5" - "\n\tfor standard transport use class EMDTransport instead.") -class OTDA(object): - - """Class for domain adaptation with optimal transport as proposed in [5] - - - References - ---------- - - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, - "Optimal Transport for Domain Adaptation," in IEEE Transactions on - Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 - - """ - - def __init__(self, metric='sqeuclidean', norm=None): - """ Class initialization""" - self.xs = 0 - self.xt = 0 - self.G = 0 - self.metric = metric - self.norm = norm - self.computed = False - - def fit(self, xs, xt, ws=None, wt=None, max_iter=100000): - """Fit domain adaptation between samples is xs and xt - (with optional weights)""" - self.xs = xs - self.xt = xt - - if wt is None: - wt = unif(xt.shape[0]) - if ws is None: - ws = unif(xs.shape[0]) - - self.ws = ws - self.wt = wt - - self.M = dist(xs, xt, metric=self.metric) - self.M = cost_normalization(self.M, self.norm) - self.G = emd(ws, wt, self.M, max_iter) - self.computed = True - - def interp(self, direction=1): - """Barycentric interpolation for the source (1) or target (-1) samples - - This Barycentric interpolation solves for each source (resp target) - sample xs (resp xt) the following optimization problem: - - .. math:: - arg\min_x \sum_i \gamma_{k,i} c(x,x_i^t) - - where k is the index of the sample in xs - - For the moment only squared euclidean distance is provided but more - metric could be used in the future. - - """ - if direction > 0: # >0 then source to target - G = self.G - w = self.ws.reshape((self.xs.shape[0], 1)) - x = self.xt - else: - G = self.G.T - w = self.wt.reshape((self.xt.shape[0], 1)) - x = self.xs - - if self.computed: - if self.metric == 'sqeuclidean': - return np.dot(G / w, x) # weighted mean - else: - print( - "Warning, metric not handled yet, using weighted average") - return np.dot(G / w, x) # weighted mean - return None - else: - print("Warning, model not fitted yet, returning None") - return None - - def predict(self, x, direction=1): - """ Out of sample mapping using the formulation from [6] - - For each sample x to map, it finds the nearest source sample xs and - map the samle x to the position xst+(x-xs) wher xst is the barycentric - interpolation of source sample xs. - - References - ---------- - - .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014). - Regularized discrete optimal transport. SIAM Journal on Imaging - Sciences, 7(3), 1853-1882. - - """ - if direction > 0: # >0 then source to target - xf = self.xt - x0 = self.xs - else: - xf = self.xs - x0 = self.xt - - D0 = dist(x, x0) # dist netween new samples an source - idx = np.argmin(D0, 1) # closest one - xf = self.interp(direction) # interp the source samples - # aply the delta to the interpolation - return xf[idx, :] + x - x0[idx, :] - - -@deprecated("The class OTDA_sinkhorn is deprecated in 0.3.1 and will be" - " removed in 0.5 \nUse class SinkhornTransport instead.") -class OTDA_sinkhorn(OTDA): - - """Class for domain adaptation with optimal transport with entropic - regularization - - - """ - - def fit(self, xs, xt, reg=1, ws=None, wt=None, **kwargs): - """Fit regularized domain adaptation between samples is xs and xt - (with optional weights)""" - self.xs = xs - self.xt = xt - - if wt is None: - wt = unif(xt.shape[0]) - if ws is None: - ws = unif(xs.shape[0]) - - self.ws = ws - self.wt = wt - - self.M = dist(xs, xt, metric=self.metric) - self.M = cost_normalization(self.M, self.norm) - self.G = sinkhorn(ws, wt, self.M, reg, **kwargs) - self.computed = True - - -@deprecated("The class OTDA_lpl1 is deprecated in 0.3.1 and will be" - " removed in 0.5 \nUse class SinkhornLpl1Transport instead.") -class OTDA_lpl1(OTDA): - - """Class for domain adaptation with optimal transport with entropic and - group regularization""" - - def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, **kwargs): - """Fit regularized domain adaptation between samples is xs and xt - (with optional weights), See ot.da.sinkhorn_lpl1_mm for fit - parameters""" - self.xs = xs - self.xt = xt - - if wt is None: - wt = unif(xt.shape[0]) - if ws is None: - ws = unif(xs.shape[0]) - - self.ws = ws - self.wt = wt - - self.M = dist(xs, xt, metric=self.metric) - self.M = cost_normalization(self.M, self.norm) - self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M, reg, eta, **kwargs) - self.computed = True - - -@deprecated("The class OTDA_l1L2 is deprecated in 0.3.1 and will be" - " removed in 0.5 \nUse class SinkhornL1l2Transport instead.") -class OTDA_l1l2(OTDA): - - """Class for domain adaptation with optimal transport with entropic - and group lasso regularization""" - - def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, **kwargs): - """Fit regularized domain adaptation between samples is xs and xt - (with optional weights), See ot.da.sinkhorn_lpl1_gl for fit - parameters""" - self.xs = xs - self.xt = xt - - if wt is None: - wt = unif(xt.shape[0]) - if ws is None: - ws = unif(xs.shape[0]) - - self.ws = ws - self.wt = wt - - self.M = dist(xs, xt, metric=self.metric) - self.M = cost_normalization(self.M, self.norm) - self.G = sinkhorn_l1l2_gl(ws, ys, wt, self.M, reg, eta, **kwargs) - self.computed = True - - -@deprecated("The class OTDA_mapping_linear is deprecated in 0.3.1 and will be" - " removed in 0.5 \nUse class MappingTransport instead.") -class OTDA_mapping_linear(OTDA): - - """Class for optimal transport with joint linear mapping estimation as in - [8] - """ - - def __init__(self): - """ Class initialization""" - - self.xs = 0 - self.xt = 0 - self.G = 0 - self.L = 0 - self.bias = False - self.computed = False - self.metric = 'sqeuclidean' - - def fit(self, xs, xt, mu=1, eta=1, bias=False, **kwargs): - """ Fit domain adaptation between samples is xs and xt (with optional - weights)""" - self.xs = xs - self.xt = xt - self.bias = bias - - self.ws = unif(xs.shape[0]) - self.wt = unif(xt.shape[0]) - - self.G, self.L = joint_OT_mapping_linear( - xs, xt, mu=mu, eta=eta, bias=bias, **kwargs) - self.computed = True - - def mapping(self): - return lambda x: self.predict(x) - - def predict(self, x): - """ Out of sample mapping estimated during the call to fit""" - if self.computed: - if self.bias: - x = np.hstack((x, np.ones((x.shape[0], 1)))) - return x.dot(self.L) # aply the delta to the interpolation - else: - print("Warning, model not fitted yet, returning None") - return None - - -@deprecated("The class OTDA_mapping_kernel is deprecated in 0.3.1 and will be" - " removed in 0.5 \nUse class MappingTransport instead.") -class OTDA_mapping_kernel(OTDA_mapping_linear): - - """Class for optimal transport with joint nonlinear mapping - estimation as in [8]""" - - def fit(self, xs, xt, mu=1, eta=1, bias=False, kerneltype='gaussian', - sigma=1, **kwargs): - """ Fit domain adaptation between samples is xs and xt """ - self.xs = xs - self.xt = xt - self.bias = bias - - self.ws = unif(xs.shape[0]) - self.wt = unif(xt.shape[0]) - self.kernel = kerneltype - self.sigma = sigma - self.kwargs = kwargs - - self.G, self.L = joint_OT_mapping_kernel( - xs, xt, mu=mu, eta=eta, bias=bias, **kwargs) - self.computed = True - - def predict(self, x): - """ Out of sample mapping estimated during the call to fit""" - - if self.computed: - K = kernel( - x, self.xs, method=self.kernel, sigma=self.sigma, - **self.kwargs) - if self.bias: - K = np.hstack((K, np.ones((x.shape[0], 1)))) - return K.dot(self.L) - else: - print("Warning, model not fitted yet, returning None") - return None - - def distribution_estimation_uniform(X): """estimates a uniform distribution from an array of samples X @@ -1466,25 +1187,25 @@ class SinkhornTransport(BaseTransport): algorithm if no it has not converged tol : float, optional (default=10e-9) The precision required to stop the optimization algorithm. - mapping : string, optional (default="barycentric") - The kind of mapping to apply to transport samples from a domain into - another one. - if "barycentric" only the samples used to estimate the coupling can - be transported from a domain to another one. + verbose : bool, optional (default=False) + Controls the verbosity of the optimization algorithm + log : int, optional (default=False) + Controls the logs of the optimization algorithm metric : string, optional (default="sqeuclidean") The ground metric for the Wasserstein problem norm : string, optional (default=None) If given, normalize the ground metric to avoid numerical errors that can occur with large metric values. - distribution : string, optional (default="uniform") + distribution_estimation : callable, optional (defaults to the uniform) The kind of distribution estimation to employ - verbose : int, optional (default=0) - Controls the verbosity of the optimization algorithm - log : int, optional (default=0) - Controls the logs of the optimization algorithm + out_of_sample_map : string, optional (default="ferradans") + The kind of out of sample mapping to apply to transport samples + from a domain into another one. Currently the only possible option is + "ferradans" which uses the method proposed in [6]. limit_max: float, optional (defaul=np.infty) Controls the semi supervised mode. Transport between labeled source - and target samples of different classes will exhibit an infinite cost + and target samples of different classes will exhibit an cost defined + by this variable Attributes ---------- @@ -1569,22 +1290,19 @@ class EMDTransport(BaseTransport): Parameters ---------- - mapping : string, optional (default="barycentric") - The kind of mapping to apply to transport samples from a domain into - another one. - if "barycentric" only the samples used to estimate the coupling can - be transported from a domain to another one. metric : string, optional (default="sqeuclidean") The ground metric for the Wasserstein problem norm : string, optional (default=None) If given, normalize the ground metric to avoid numerical errors that can occur with large metric values. - distribution : string, optional (default="uniform") - The kind of distribution estimation to employ - verbose : int, optional (default=0) - Controls the verbosity of the optimization algorithm - log : int, optional (default=0) + log : int, optional (default=False) Controls the logs of the optimization algorithm + distribution_estimation : callable, optional (defaults to the uniform) + The kind of distribution estimation to employ + out_of_sample_map : string, optional (default="ferradans") + The kind of out of sample mapping to apply to transport samples + from a domain into another one. Currently the only possible option is + "ferradans" which uses the method proposed in [6]. limit_max: float, optional (default=10) Controls the semi supervised mode. Transport between labeled source and target samples of different classes will exhibit an infinite cost @@ -1669,28 +1387,32 @@ class SinkhornLpl1Transport(BaseTransport): Entropic regularization parameter reg_cl : float, optional (default=0.1) Class regularization parameter - mapping : string, optional (default="barycentric") - The kind of mapping to apply to transport samples from a domain into - another one. - if "barycentric" only the samples used to estimate the coupling can - be transported from a domain to another one. - metric : string, optional (default="sqeuclidean") - The ground metric for the Wasserstein problem - norm : string, optional (default=None) - If given, normalize the ground metric to avoid numerical errors that - can occur with large metric values. - distribution : string, optional (default="uniform") - The kind of distribution estimation to employ max_iter : int, float, optional (default=10) The minimum number of iteration before stopping the optimization algorithm if no it has not converged max_inner_iter : int, float, optional (default=200) The number of iteration in the inner loop - verbose : int, optional (default=0) + log : bool, optional (default=False) + Controls the logs of the optimization algorithm + tol : float, optional (default=10e-9) + Stop threshold on error (inner sinkhorn solver) (>0) + verbose : bool, optional (default=False) Controls the verbosity of the optimization algorithm + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. + distribution_estimation : callable, optional (defaults to the uniform) + The kind of distribution estimation to employ + out_of_sample_map : string, optional (default="ferradans") + The kind of out of sample mapping to apply to transport samples + from a domain into another one. Currently the only possible option is + "ferradans" which uses the method proposed in [6]. limit_max: float, optional (defaul=np.infty) Controls the semi supervised mode. Transport between labeled source - and target samples of different classes will exhibit an infinite cost + and target samples of different classes will exhibit a cost defined by + limit_max. Attributes ---------- @@ -1786,27 +1508,28 @@ class SinkhornL1l2Transport(BaseTransport): Entropic regularization parameter reg_cl : float, optional (default=0.1) Class regularization parameter - mapping : string, optional (default="barycentric") - The kind of mapping to apply to transport samples from a domain into - another one. - if "barycentric" only the samples used to estimate the coupling can - be transported from a domain to another one. - metric : string, optional (default="sqeuclidean") - The ground metric for the Wasserstein problem - norm : string, optional (default=None) - If given, normalize the ground metric to avoid numerical errors that - can occur with large metric values. - distribution : string, optional (default="uniform") - The kind of distribution estimation to employ max_iter : int, float, optional (default=10) The minimum number of iteration before stopping the optimization algorithm if no it has not converged max_inner_iter : int, float, optional (default=200) The number of iteration in the inner loop - verbose : int, optional (default=0) + tol : float, optional (default=10e-9) + Stop threshold on error (inner sinkhorn solver) (>0) + verbose : bool, optional (default=False) Controls the verbosity of the optimization algorithm - log : int, optional (default=0) + log : bool, optional (default=False) Controls the logs of the optimization algorithm + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. + distribution_estimation : callable, optional (defaults to the uniform) + The kind of distribution estimation to employ + out_of_sample_map : string, optional (default="ferradans") + The kind of out of sample mapping to apply to transport samples + from a domain into another one. Currently the only possible option is + "ferradans" which uses the method proposed in [6]. limit_max: float, optional (default=10) Controls the semi supervised mode. Transport between labeled source and target samples of different classes will exhibit an infinite cost @@ -1928,10 +1651,12 @@ class MappingTransport(BaseEstimator): Max number of iterations (inner CG solver) inner_tol : float, optional (default=1e-6) Stop threshold on error (inner CG solver) (>0) - verbose : bool, optional (default=False) - Print information along iterations log : bool, optional (default=False) record log if True + verbose : bool, optional (default=False) + Print information along iterations + verbose2 : bool, optional (default=False) + Print information along iterations Attributes ---------- diff --git a/ot/datasets.py b/ot/datasets.py index 362a89b..e76e75d 100644 --- a/ot/datasets.py +++ b/ot/datasets.py @@ -38,9 +38,9 @@ def make_1D_gauss(n, m, s): @deprecated() -def get_1D_gauss(n, m, sigma, random_state=None): +def get_1D_gauss(n, m, sigma): """ Deprecated see make_1D_gauss """ - return make_1D_gauss(n, m, sigma, random_state=None) + return make_1D_gauss(n, m, sigma) def make_2D_samples_gauss(n, m, sigma, random_state=None): diff --git a/ot/externals/funcsigs.py b/ot/externals/funcsigs.py index c73fdc9..106bde7 100644 --- a/ot/externals/funcsigs.py +++ b/ot/externals/funcsigs.py @@ -126,8 +126,8 @@ def signature(obj): new_params[arg_name] = param.replace(default=arg_value, _partial_kwarg=True) - elif (param.kind not in (_VAR_KEYWORD, _VAR_POSITIONAL) and - not param._partial_kwarg): + elif (param.kind not in (_VAR_KEYWORD, _VAR_POSITIONAL) + and not param._partial_kwarg): new_params.pop(arg_name) return sig.replace(parameters=new_params.values()) @@ -333,11 +333,11 @@ class Parameter(object): raise TypeError(msg) def __eq__(self, other): - return (issubclass(other.__class__, Parameter) and - self._name == other._name and - self._kind == other._kind and - self._default == other._default and - self._annotation == other._annotation) + return (issubclass(other.__class__, Parameter) + and self._name == other._name + and self._kind == other._kind + and self._default == other._default + and self._annotation == other._annotation) def __ne__(self, other): return not self.__eq__(other) @@ -372,8 +372,8 @@ class BoundArguments(object): def args(self): args = [] for param_name, param in self._signature.parameters.items(): - if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or - param._partial_kwarg): + if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) + or param._partial_kwarg): # Keyword arguments mapped by 'functools.partial' # (Parameter._partial_kwarg is True) are mapped # in 'BoundArguments.kwargs', along with VAR_KEYWORD & @@ -402,8 +402,8 @@ class BoundArguments(object): kwargs_started = False for param_name, param in self._signature.parameters.items(): if not kwargs_started: - if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or - param._partial_kwarg): + if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) + or param._partial_kwarg): kwargs_started = True else: if param_name not in self.arguments: @@ -432,9 +432,9 @@ class BoundArguments(object): raise TypeError(msg) def __eq__(self, other): - return (issubclass(other.__class__, BoundArguments) and - self.signature == other.signature and - self.arguments == other.arguments) + return (issubclass(other.__class__, BoundArguments) + and self.signature == other.signature + and self.arguments == other.arguments) def __ne__(self, other): return not self.__eq__(other) @@ -612,9 +612,9 @@ class Signature(object): raise TypeError(msg) def __eq__(self, other): - if (not issubclass(type(other), Signature) or - self.return_annotation != other.return_annotation or - len(self.parameters) != len(other.parameters)): + if (not issubclass(type(other), Signature) + or self.return_annotation != other.return_annotation + or len(self.parameters) != len(other.parameters)): return False other_positions = dict((param, idx) @@ -635,8 +635,8 @@ class Signature(object): except KeyError: return False else: - if (idx != other_idx or - param != other.parameters[param_name]): + if (idx != other_idx + or param != other.parameters[param_name]): return False return True @@ -688,8 +688,8 @@ class Signature(object): raise TypeError(msg) parameters_ex = (param,) break - elif (param.kind == _VAR_KEYWORD or - param.default is not _empty): + elif (param.kind == _VAR_KEYWORD + or param.default is not _empty): # That's fine too - we have a default value for this # parameter. So, lets start parsing `kwargs`, starting # with the current parameter @@ -755,8 +755,8 @@ class Signature(object): # if it has a default value, or it is an '*args'-like # parameter, left alone by the processing of positional # arguments. - if (not partial and param.kind != _VAR_POSITIONAL and - param.default is _empty): + if (not partial and param.kind != _VAR_POSITIONAL + and param.default is _empty): raise TypeError('{arg!r} parameter lacking default value'. format(arg=param_name)) diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py index a2fdd3d..deda6b1 100644 --- a/ot/gpu/__init__.py +++ b/ot/gpu/__init__.py @@ -1,12 +1,37 @@ # -*- coding: utf-8 -*- +""" -from . import bregman -from . import da -from .bregman import sinkhorn +This module provides GPU implementation for several OT solvers and utility +functions. The GPU backend in handled by `cupy +<https://cupy.chainer.org/>`_. + +By default, the functions in this module accept and return numpy arrays +in order to proide drop-in replacement for the other POT function but +the transfer between CPU en GPU comes with a significant overhead. + +In order to get the best erformances, we recommend to give only cupy +arrays to the functions and desactivate the conversion to numpy of the +result of the function with parameter ``to_numpy=False``. + +""" # Author: Remi Flamary <remi.flamary@unice.fr> # Leo Gautheron <https://github.com/aje> # # License: MIT License -__all__ = ["bregman", "da", "sinkhorn"] +from . import bregman +from . import da +from .bregman import sinkhorn +from .da import sinkhorn_lpl1_mm + +from . import utils +from .utils import dist, to_gpu, to_np + + + + + +__all__ = ["utils", "dist", "sinkhorn", + "sinkhorn_lpl1_mm", 'bregman', 'da', 'to_gpu', 'to_np'] + diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py index 47939c4..978b307 100644 --- a/ot/gpu/bregman.py +++ b/ot/gpu/bregman.py @@ -8,14 +8,18 @@ Bregman projections for regularized OT with GPU # # License: MIT License -import numpy as np -import cudamat +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations +from . import utils -def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, - log=False, returnAsGPU=False): - r""" - Solve the entropic regularization optimal transport problem on GPU +def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, + verbose=False, log=False, to_numpy=True, **kwargs): + """ + Solve the entropic regularization optimal transport on GPU + + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. The function solves the following optimization problem: @@ -40,9 +44,10 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, ---------- a : np.ndarray (ns,) samples weights in the source domain - b : np.ndarray (nt,) - samples in the target domain - M_GPU : cudamat.CUDAMatrix (ns,nt) + b : np.ndarray (nt,) or np.ndarray (nt,nbb) + samples in the target domain, compute sinkhorn with multiple targets + and fixed M if b is a matrix (return OT loss + dual variables in log) + M : np.ndarray (ns,nt) loss matrix reg : float Regularization term >0 @@ -54,8 +59,9 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, Print information along iterations log : bool, optional record log if True - returnAsGPU : bool, optional - return the OT matrix as a cudamat.CUDAMatrix + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. + Returns ------- @@ -88,60 +94,78 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, ot.optim.cg : General regularized OT """ + + a = cp.asarray(a) + b = cp.asarray(b) + M = cp.asarray(M) + + if len(a) == 0: + a = np.ones((M.shape[0],)) / M.shape[0] + if len(b) == 0: + b = np.ones((M.shape[1],)) / M.shape[1] + # init data Nini = len(a) Nfin = len(b) + if len(b.shape) > 1: + nbb = b.shape[1] + else: + nbb = 0 + if log: log = {'err': []} # we assume that no distances are null except those of the diagonal of # distances - u = (np.ones(Nini) / Nini).reshape((Nini, 1)) - u_GPU = cudamat.CUDAMatrix(u) - a_GPU = cudamat.CUDAMatrix(a.reshape((Nini, 1))) - ones_GPU = cudamat.empty(u_GPU.shape).assign(1) - v = (np.ones(Nfin) / Nfin).reshape((Nfin, 1)) - v_GPU = cudamat.CUDAMatrix(v) - b_GPU = cudamat.CUDAMatrix(b.reshape((Nfin, 1))) - - M_GPU.divide(-reg) + if nbb: + u = np.ones((Nini, nbb)) / Nini + v = np.ones((Nfin, nbb)) / Nfin + else: + u = np.ones(Nini) / Nini + v = np.ones(Nfin) / Nfin - K_GPU = cudamat.exp(M_GPU) + # print(reg) - ones_GPU.divide(a_GPU, target=a_GPU) - Kp_GPU = cudamat.empty(K_GPU.shape) - K_GPU.mult_by_col(a_GPU, target=Kp_GPU) + # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute + K = np.empty(M.shape, dtype=M.dtype) + np.divide(M, -reg, out=K) + np.exp(K, out=K) - tmp_GPU = cudamat.empty(K_GPU.shape) + # print(np.min(K)) + tmp2 = np.empty(b.shape, dtype=M.dtype) + Kp = (1 / a).reshape(-1, 1) * K cpt = 0 err = 1 while (err > stopThr and cpt < numItermax): - uprev_GPU = u_GPU.copy() - vprev_GPU = v_GPU.copy() + uprev = u + vprev = v - KtransposeU_GPU = K_GPU.transpose().dot(u_GPU) - b_GPU.divide(KtransposeU_GPU, target=v_GPU) - ones_GPU.divide(Kp_GPU.dot(v_GPU), target=u_GPU) + KtransposeU = np.dot(K.T, u) + v = np.divide(b, KtransposeU) + u = 1. / np.dot(Kp, v) - if (np.any(KtransposeU_GPU.asarray() == 0) or - not u_GPU.allfinite() or not v_GPU.allfinite()): + if (np.any(KtransposeU == 0) or + np.any(np.isnan(u)) or np.any(np.isnan(v)) or + np.any(np.isinf(u)) or np.any(np.isinf(v))): # we have reached the machine precision # come back to previous solution and quit loop print('Warning: numerical errors at iteration', cpt) - u_GPU = uprev_GPU.copy() - v_GPU = vprev_GPU.copy() + u = uprev + v = vprev break if cpt % 10 == 0: # we can speed up the process by checking for the error only all # the 10th iterations - K_GPU.mult_by_col(u_GPU, target=tmp_GPU) - tmp_GPU.mult_by_row(v_GPU.transpose(), target=tmp_GPU) - - bcopy_GPU = b_GPU.copy().transpose() - bcopy_GPU.add_sums(tmp_GPU, axis=0, beta=-1) - err = bcopy_GPU.euclid_norm()**2 + if nbb: + err = np.sum((u - uprev)**2) / np.sum((u)**2) + \ + np.sum((v - vprev)**2) / np.sum((v)**2) + else: + # compute right marginal tmp2= (diag(u)Kdiag(v))^T1 + tmp2 = np.sum(u[:, None] * K * v[None, :], 0) + #tmp2=np.einsum('i,ij,j->j', u, K, v) + err = np.linalg.norm(tmp2 - b)**2 # violation of marginal if log: log['err'].append(err) @@ -150,20 +174,32 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, print( '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) print('{:5d}|{:8e}|'.format(cpt, err)) - cpt += 1 - if log: - log['u'] = u_GPU.asarray() - log['v'] = v_GPU.asarray() - - K_GPU.mult_by_col(u_GPU, target=K_GPU) - K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU) - - if returnAsGPU: - res = K_GPU - else: - res = K_GPU.asarray() - + cpt = cpt + 1 if log: - return res, log - else: - return res + log['u'] = u + log['v'] = v + + if nbb: # return only loss + #res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) (explodes cupy memory) + res = np.empty(nbb) + for i in range(nbb): + res[i] = np.sum(u[:, None, i] * (K * M) * v[None, :, i]) + if to_numpy: + res = utils.to_np(res) + if log: + return res, log + else: + return res + + else: # return OT matrix + res = u.reshape((-1, 1)) * K * v.reshape((1, -1)) + if to_numpy: + res = utils.to_np(res) + if log: + return res, log + else: + return res + + +# define sinkhorn as sinkhorn_knopp +sinkhorn = sinkhorn_knopp diff --git a/ot/gpu/da.py b/ot/gpu/da.py index 71a485a..4a98038 100644 --- a/ot/gpu/da.py +++ b/ot/gpu/da.py @@ -11,80 +11,30 @@ Domain adaptation with optimal transport with GPU implementation # License: MIT License -import numpy as np -from ..utils import unif -from ..da import OTDA +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations +import numpy as npp +from . import utils + from .bregman import sinkhorn -import cudamat -def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False): +def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, + numInnerItermax=200, stopInnerThr=1e-9, verbose=False, + log=False, to_numpy=True): """ - Compute the pairwise euclidean distance between matrices a and b. - - - Parameters - ---------- - a : np.ndarray (n, f) - first matrice - b : np.ndarray (m, f) - second matrice - returnAsGPU : boolean, optional (default False) - if True, returns cudamat matrix still on GPU, else return np.ndarray - squared : boolean, optional (default False) - if True, return squared euclidean distance matrice + Solve the entropic regularization optimal transport problem with nonconvex + group lasso regularization on GPU + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. - Returns - ------- - c : (n x m) np.ndarray or cudamat.CUDAMatrix - pairwise euclidean distance distance matrix - """ - # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m). - # First compute in c_GPU the squared euclidean distance. And return its - # square root. At each cell [i,j] of c, we want to have - # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that - # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c: - # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2). - - a_GPU = cudamat.CUDAMatrix(a) - b_GPU = cudamat.CUDAMatrix(b) - - # Multiply a by b transpose to obtain in each cell [i,j] of c the - # value sum{k in range(f)} ( a[i,k]b[j,k] ) - c_GPU = cudamat.dot(a_GPU, b_GPU.transpose()) - # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] ) - c_GPU.mult(-2) - - # Compute the vectors of the sum of squared elements. - a_GPU = cudamat.pow(a_GPU, 2).sum(axis=1) - b_GPU = cudamat.pow(b_GPU, 2).sum(axis=1) - - # Add the vectors in each columns (respectivly rows) of c. - # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] ) - c_GPU.add_col_vec(a_GPU) - # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2) - c_GPU.add_row_vec(b_GPU.transpose()) - - if not squared: - c_GPU = cudamat.sqrt(c_GPU) - - if returnAsGPU: - return c_GPU - else: - return c_GPU.asarray() - - -def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, - numInnerItermax=200, stopInnerThr=1e-9, - verbose=False, log=False): - """ - Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization The function solves the following optimization problem: .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ \eta \Omega_g(\gamma) + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma) + + \eta \Omega_g(\gamma) s.t. \gamma 1 = a @@ -94,11 +44,16 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, where : - M is the (ns,nt) metric cost matrix - - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - :math:`\Omega_g` is the group lasso regulaization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` where :math:`\mathcal{I}_c` are the index of samples from class c in the source domain. + - :math:`\Omega_e` is the entropic regularization term + :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega_g` is the group lasso regulaization term + :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` + where :math:`\mathcal{I}_c` are the index of samples from class c + in the source domain. - a and b are source and target weights (sum to 1) - The algorithm used for solving the problem is the generalised conditional gradient as proposed in [5]_ [7]_ + The algorithm used for solving the problem is the generalised conditional + gradient as proposed in [5]_ [7]_ Parameters @@ -109,7 +64,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, labels of samples in the source domain b : np.ndarray (nt,) samples weights in the target domain - M_GPU : cudamat.CUDAMatrix (ns,nt) + M : np.ndarray (ns,nt) loss matrix reg : float Regularization term for entropic regularization >0 @@ -125,6 +80,8 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, Print information along iterations log : bool, optional record log if True + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. Returns @@ -138,8 +95,13 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, References ---------- - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 - .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567. + .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE + Transactions on Pattern Analysis and Machine Intelligence , + vol.PP, no.99, pp.1-1 + .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence + and applications. arXiv preprint arXiv:1510.06567. See Also -------- @@ -148,108 +110,35 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, ot.optim.cg : General regularized OT """ + + a, labels_a, b, M = utils.to_gpu(a, labels_a, b, M) + p = 0.5 epsilon = 1e-3 - Nfin = len(b) indices_labels = [] - classes = np.unique(labels_a) + labels_a2 = cp.asnumpy(labels_a) + classes = npp.unique(labels_a2) for c in classes: - idxc, = np.where(labels_a == c) - indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1))) + idxc, = utils.to_gpu(npp.where(labels_a2 == c)) + indices_labels.append(idxc) - Mreg_GPU = cudamat.empty(M_GPU.shape) - W_GPU = cudamat.empty(M_GPU.shape).assign(0) + W = np.zeros(M.shape) for cpt in range(numItermax): - Mreg_GPU.assign(M_GPU) - Mreg_GPU.add_mult(W_GPU, eta) - transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax, - stopThr=stopInnerThr, returnAsGPU=True) + Mreg = M + eta * W + transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax, + stopThr=stopInnerThr, to_numpy=False) # the transport has been computed. Check if classes are really # separated - W_GPU.assign(1) - W_GPU = W_GPU.transpose() + W = np.ones(M.shape) for (i, c) in enumerate(classes): - (_, nbRow) = indices_labels[i].shape - tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0) - transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU) - majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon) - cudamat.pow(majs_GPU, (p - 1)) - majs_GPU.mult(p) - - tmpC_GPU.assign(0) - tmpC_GPU.add_col_vec(majs_GPU) - W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU) - - W_GPU = W_GPU.transpose() - - return transp_GPU.asarray() + majs = np.sum(transp[indices_labels[i]], axis=0) + majs = p * ((majs + epsilon)**(p - 1)) + W[indices_labels[i]] = majs -class OTDA_GPU(OTDA): - - def normalizeM(self, norm): - if norm == "median": - self.M_GPU.divide(float(np.median(self.M_GPU.asarray()))) - elif norm == "max": - self.M_GPU.divide(float(np.max(self.M_GPU.asarray()))) - elif norm == "log": - self.M_GPU.add(1) - cudamat.log(self.M_GPU) - elif norm == "loglog": - self.M_GPU.add(1) - cudamat.log(self.M_GPU) - self.M_GPU.add(1) - cudamat.log(self.M_GPU) - - -class OTDA_sinkhorn(OTDA_GPU): - - def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs): - cudamat.init() - xs = np.asarray(xs, dtype=np.float64) - xt = np.asarray(xt, dtype=np.float64) - - self.xs = xs - self.xt = xt - - if wt is None: - wt = unif(xt.shape[0]) - if ws is None: - ws = unif(xs.shape[0]) - - self.ws = ws - self.wt = wt - - self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True, - squared=True) - self.normalizeM(norm) - self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs) - self.computed = True - - -class OTDA_lpl1(OTDA_GPU): - - def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None, - **kwargs): - cudamat.init() - xs = np.asarray(xs, dtype=np.float64) - xt = np.asarray(xt, dtype=np.float64) - - self.xs = xs - self.xt = xt - - if wt is None: - wt = unif(xt.shape[0]) - if ws is None: - ws = unif(xs.shape[0]) - - self.ws = ws - self.wt = wt - - self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True, - squared=True) - self.normalizeM(norm) - self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs) - self.computed = True + if to_numpy: + return utils.to_np(transp) + else: + return transp diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py new file mode 100644 index 0000000..41e168a --- /dev/null +++ b/ot/gpu/utils.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +""" +Utility functions for GPU +""" + +# Author: Remi Flamary <remi.flamary@unice.fr> +# Nicolas Courty <ncourty@irisa.fr> +# Leo Gautheron <https://github.com/aje> +# +# License: MIT License + +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations + + +def euclidean_distances(a, b, squared=False, to_numpy=True): + """ + Compute the pairwise euclidean distance between matrices a and b. + + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. + + Parameters + ---------- + a : np.ndarray (n, f) + first matrix + b : np.ndarray (m, f) + second matrix + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. + squared : boolean, optional (default False) + if True, return squared euclidean distance matrix + + Returns + ------- + c : (n x m) np.ndarray or cupy.ndarray + pairwise euclidean distance distance matrix + """ + + a, b = to_gpu(a, b) + + a2 = np.sum(np.square(a), 1) + b2 = np.sum(np.square(b), 1) + + c = -2 * np.dot(a, b.T) + c += a2[:, None] + c += b2[None, :] + + if not squared: + np.sqrt(c, out=c) + if to_numpy: + return to_np(c) + else: + return c + + +def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True): + """Compute distance between samples in x1 and x2 on gpu + + Parameters + ---------- + + x1 : np.array (n1,d) + matrix with n1 samples of size d + x2 : np.array (n2,d), optional + matrix with n2 samples of size d (if None then x2=x1) + metric : str + Metric from 'sqeuclidean', 'euclidean', + + + Returns + ------- + + M : np.array (n1,n2) + distance matrix computed with given metric + + """ + if x2 is None: + x2 = x1 + if metric == "sqeuclidean": + return euclidean_distances(x1, x2, squared=True, to_numpy=to_numpy) + elif metric == "euclidean": + return euclidean_distances(x1, x2, squared=False, to_numpy=to_numpy) + else: + raise NotImplementedError + + +def to_gpu(*args): + """ Upload numpy arrays to GPU and return them""" + if len(args) > 1: + return (cp.asarray(x) for x in args) + else: + return cp.asarray(args[0]) + + +def to_np(*args): + """ convert GPU arras to numpy and return them""" + if len(args) > 1: + return (cp.asnumpy(x) for x in args) + else: + return cp.asnumpy(args[0]) diff --git a/ot/gromov.py b/ot/gromov.py index fe4fc15..33134a2 100644 --- a/ot/gromov.py +++ b/ot/gromov.py @@ -40,7 +40,7 @@ def init_matrix(C1, C2, p, q, loss_fun='square_loss'): * h1(a)=a
* h2(b)=b
- The kl-loss function L(a,b)=(1/2)*|a-b|^2 is read as :
+ The kl-loss function L(a,b)=a*log(a/b)-a+b is read as :
L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with :
* f1(a)=a*log(a)-a
* f2(b)=b
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 4c0d170..02cbd8c 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -17,8 +17,9 @@ from .import cvx from .emd_wrap import emd_c, check_result from ..utils import parmap from .cvx import barycenter +from ..utils import dist -__all__=['emd', 'emd2', 'barycenter', 'cvx'] +__all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx'] def emd(a, b, M, numItermax=100000, log=False): @@ -216,3 +217,95 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), res = parmap(f, [b[:, i] for i in range(nb)], processes) return res + + + +def free_support_barycenter(measures_locations, measures_weights, X_init, b=None, weights=None, numItermax=100, stopThr=1e-7, verbose=False, log=None): + """ + Solves the free support (locations of the barycenters are optimized, not the weights) Wasserstein barycenter problem (i.e. the weighted Frechet mean for the 2-Wasserstein distance) + + The function solves the Wasserstein barycenter problem when the barycenter measure is constrained to be supported on k atoms. + This problem is considered in [1] (Algorithm 2). There are two differences with the following codes: + - we do not optimize over the weights + - we do not do line search for the locations updates, we use i.e. theta = 1 in [1] (Algorithm 2). This can be seen as a discrete implementation of the fixed-point algorithm of [2] proposed in the continuous setting. + + Parameters + ---------- + measures_locations : list of (k_i,d) np.ndarray + The discrete support of a measure supported on k_i locations of a d-dimensional space (k_i can be different for each element of the list) + measures_weights : list of (k_i,) np.ndarray + Numpy arrays where each numpy array has k_i non-negatives values summing to one representing the weights of each discrete input measure + + X_init : (k,d) np.ndarray + Initialization of the support locations (on k atoms) of the barycenter + b : (k,) np.ndarray + Initialization of the weights of the barycenter (non-negatives, sum to 1) + weights : (k,) np.ndarray + Initialization of the coefficients of the barycenter (non-negatives, sum to 1) + + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + Returns + ------- + X : (k,d) np.ndarray + Support locations (on k atoms) of the barycenter + + References + ---------- + + .. [1] Cuturi, Marco, and Arnaud Doucet. "Fast computation of Wasserstein barycenters." International Conference on Machine Learning. 2014. + + .. [2] Álvarez-Esteban, Pedro C., et al. "A fixed-point approach to barycenters in Wasserstein space." Journal of Mathematical Analysis and Applications 441.2 (2016): 744-762. + + """ + + iter_count = 0 + + N = len(measures_locations) + k = X_init.shape[0] + d = X_init.shape[1] + if b is None: + b = np.ones((k,))/k + if weights is None: + weights = np.ones((N,)) / N + + X = X_init + + log_dict = {} + displacement_square_norms = [] + + displacement_square_norm = stopThr + 1. + + while ( displacement_square_norm > stopThr and iter_count < numItermax ): + + T_sum = np.zeros((k, d)) + + for (measure_locations_i, measure_weights_i, weight_i) in zip(measures_locations, measures_weights, weights.tolist()): + + M_i = dist(X, measure_locations_i) + T_i = emd(b, measure_weights_i, M_i) + T_sum = T_sum + weight_i * np.reshape(1. / b, (-1, 1)) * np.matmul(T_i, measure_locations_i) + + displacement_square_norm = np.sum(np.square(T_sum-X)) + if log: + displacement_square_norms.append(displacement_square_norm) + + X = T_sum + + if verbose: + print('iteration %d, displacement_square_norm=%f\n', iter_count, displacement_square_norm) + + iter_count += 1 + + if log: + log_dict['displacement_square_norms'] = displacement_square_norms + return X, log_dict + else: + return X
\ No newline at end of file diff --git a/ot/lp/cvx.py b/ot/lp/cvx.py index c8c75bc..8e763be 100644 --- a/ot/lp/cvx.py +++ b/ot/lp/cvx.py @@ -11,6 +11,7 @@ import numpy as np import scipy as sp import scipy.sparse as sps + try: import cvxopt from cvxopt import solvers, matrix, spmatrix @@ -26,7 +27,7 @@ def scipy_sparse_to_spmatrix(A): def barycenter(A, M, weights=None, verbose=False, log=False, solver='interior-point'): - """Compute the entropic regularized wasserstein barycenter of distributions A + """Compute the Wasserstein barycenter of distributions A The function solves the following optimization problem [16]: diff --git a/ot/stochastic.py b/ot/stochastic.py new file mode 100644 index 0000000..85c4230 --- /dev/null +++ b/ot/stochastic.py @@ -0,0 +1,736 @@ +# Author: Kilian Fatras <kilian.fatras@gmail.com> +# +# License: MIT License + +import numpy as np + + +############################################################################## +# Optimization toolbox for SEMI - DUAL problems +############################################################################## + + +def coordinate_grad_semi_dual(b, M, reg, beta, i): + ''' + Compute the coordinate gradient update for regularized discrete distributions for (i, :) + + The function computes the gradient of the semi dual problem: + + .. math:: + \max_v \sum_i (\sum_j v_j * b_j - reg * log(\sum_j exp((v_j - M_{i,j})/reg) * b_j)) * a_i + + Where : + + - M is the (ns,nt) metric cost matrix + - v is a dual variable in R^J + - reg is the regularization term + - a and b are source and target weights (sum to 1) + + The algorithm used for solving the problem is the ASGD & SAG algorithms + as proposed in [18]_ [alg.1 & alg.2] + + + Parameters + ---------- + + b : np.ndarray(nt,) + target measure + M : np.ndarray(ns, nt) + cost matrix + reg : float nu + Regularization term > 0 + v : np.ndarray(nt,) + dual variable + i : number int + picked number i + + Returns + ------- + + coordinate gradient : np.ndarray(nt,) + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 300000 + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> method = "ASGD" + >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, + method, numItermax) + >>> print(asgd_pi) + + References + ---------- + + [Genevay et al., 2016] : + Stochastic Optimization for Large-scale Optimal Transport, + Advances in Neural Information Processing Systems (2016), + arXiv preprint arxiv:1605.08527. + + ''' + + r = M[i, :] - beta + exp_beta = np.exp(-r / reg) * b + khi = exp_beta / (np.sum(exp_beta)) + return b - khi + + +def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None): + ''' + Compute the SAG algorithm to solve the regularized discrete measures + optimal transport max problem + + The function solves the following optimization problem: + + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + + s.t. \gamma 1 = a + + \gamma^T 1 = b + + \gamma \geq 0 + + Where : + + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term with :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + + The algorithm used for solving the problem is the SAG algorithm + as proposed in [18]_ [alg.1] + + + Parameters + ---------- + + a : np.ndarray(ns,), + source measure + b : np.ndarray(nt,), + target measure + M : np.ndarray(ns, nt), + cost matrix + reg : float number, + Regularization term > 0 + numItermax : int number + number of iteration + lr : float number + learning rate + + Returns + ------- + + v : np.ndarray(nt,) + dual variable + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 300000 + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> method = "ASGD" + >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, + method, numItermax) + >>> print(asgd_pi) + + References + ---------- + + [Genevay et al., 2016] : + Stochastic Optimization for Large-scale Optimal Transport, + Advances in Neural Information Processing Systems (2016), + arXiv preprint arxiv:1605.08527. + ''' + + if lr is None: + lr = 1. / max(a / reg) + n_source = np.shape(M)[0] + n_target = np.shape(M)[1] + cur_beta = np.zeros(n_target) + stored_gradient = np.zeros((n_source, n_target)) + sum_stored_gradient = np.zeros(n_target) + for _ in range(numItermax): + i = np.random.randint(n_source) + cur_coord_grad = a[i] * coordinate_grad_semi_dual(b, M, reg, + cur_beta, i) + sum_stored_gradient += (cur_coord_grad - stored_gradient[i]) + stored_gradient[i] = cur_coord_grad + cur_beta += lr * (1. / n_source) * sum_stored_gradient + return cur_beta + + +def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None): + ''' + Compute the ASGD algorithm to solve the regularized semi continous measures optimal transport max problem + + The function solves the following optimization problem: + + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + + s.t. \gamma 1 = a + + \gamma^T 1= b + + \gamma \geq 0 + + Where : + + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term with :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + + The algorithm used for solving the problem is the ASGD algorithm + as proposed in [18]_ [alg.2] + + + Parameters + ---------- + + b : np.ndarray(nt,) + target measure + M : np.ndarray(ns, nt) + cost matrix + reg : float number + Regularization term > 0 + numItermax : int number + number of iteration + lr : float number + learning rate + + + Returns + ------- + + ave_v : np.ndarray(nt,) + dual variable + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 300000 + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> method = "ASGD" + >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, + method, numItermax) + >>> print(asgd_pi) + + References + ---------- + + [Genevay et al., 2016] : + Stochastic Optimization for Large-scale Optimal Transport, + Advances in Neural Information Processing Systems (2016), + arXiv preprint arxiv:1605.08527. + ''' + + if lr is None: + lr = 1. / max(a / reg) + n_source = np.shape(M)[0] + n_target = np.shape(M)[1] + cur_beta = np.zeros(n_target) + ave_beta = np.zeros(n_target) + for cur_iter in range(numItermax): + k = cur_iter + 1 + i = np.random.randint(n_source) + cur_coord_grad = coordinate_grad_semi_dual(b, M, reg, cur_beta, i) + cur_beta += (lr / np.sqrt(k)) * cur_coord_grad + ave_beta = (1. / k) * cur_beta + (1 - 1. / k) * ave_beta + return ave_beta + + +def c_transform_entropic(b, M, reg, beta): + ''' + The goal is to recover u from the c-transform. + + The function computes the c_transform of a dual variable from the other + dual variable: + + .. math:: + u = v^{c,reg} = -reg \sum_j exp((v - M)/reg) b_j + + Where : + + - M is the (ns,nt) metric cost matrix + - u, v are dual variables in R^IxR^J + - reg is the regularization term + + It is used to recover an optimal u from optimal v solving the semi dual + problem, see Proposition 2.1 of [18]_ + + + Parameters + ---------- + + b : np.ndarray(nt,) + target measure + M : np.ndarray(ns, nt) + cost matrix + reg : float + regularization term > 0 + v : np.ndarray(nt,) + dual variable + + Returns + ------- + + u : np.ndarray(ns,) + dual variable + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 300000 + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> method = "ASGD" + >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, + method, numItermax) + >>> print(asgd_pi) + + References + ---------- + + [Genevay et al., 2016] : + Stochastic Optimization for Large-scale Optimal Transport, + Advances in Neural Information Processing Systems (2016), + arXiv preprint arxiv:1605.08527. + ''' + + n_source = np.shape(M)[0] + alpha = np.zeros(n_source) + for i in range(n_source): + r = M[i, :] - beta + min_r = np.min(r) + exp_beta = np.exp(-(r - min_r) / reg) * b + alpha[i] = min_r - reg * np.log(np.sum(exp_beta)) + return alpha + + +def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None, + log=False): + ''' + Compute the transportation matrix to solve the regularized discrete + measures optimal transport max problem + + The function solves the following optimization problem: + + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + + s.t. \gamma 1 = a + + \gamma^T 1= b + + \gamma \geq 0 + + Where : + + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term with :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + The algorithm used for solving the problem is the SAG or ASGD algorithms + as proposed in [18]_ + + + Parameters + ---------- + + a : np.ndarray(ns,) + source measure + b : np.ndarray(nt,) + target measure + M : np.ndarray(ns, nt) + cost matrix + reg : float number + Regularization term > 0 + methode : str + used method (SAG or ASGD) + numItermax : int number + number of iteration + lr : float number + learning rate + n_source : int number + size of the source measure + n_target : int number + size of the target measure + log : bool, optional + record log if True + + Returns + ------- + + pi : np.ndarray(ns, nt) + transportation matrix + log : dict + log dictionary return only if log==True in parameters + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 300000 + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> method = "ASGD" + >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, + method, numItermax) + >>> print(asgd_pi) + + References + ---------- + + [Genevay et al., 2016] : + Stochastic Optimization for Large-scale Optimal Transport, + Advances in Neural Information Processing Systems (2016), + arXiv preprint arxiv:1605.08527. + ''' + + if method.lower() == "sag": + opt_beta = sag_entropic_transport(a, b, M, reg, numItermax, lr) + elif method.lower() == "asgd": + opt_beta = averaged_sgd_entropic_transport(a, b, M, reg, numItermax, lr) + else: + print("Please, select your method between SAG and ASGD") + return None + + opt_alpha = c_transform_entropic(b, M, reg, opt_beta) + pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) * + a[:, None] * b[None, :]) + + if log: + log = {} + log['alpha'] = opt_alpha + log['beta'] = opt_beta + return pi, log + else: + return pi + + +############################################################################## +# Optimization toolbox for DUAL problems +############################################################################## + + +def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha, + batch_beta): + ''' + Computes the partial gradient of the dual optimal transport problem. + + For each (i,j) in a batch of coordinates, the partial gradients are : + + .. math:: + \partial_{u_i} F = u_i * b_s/l_{v} - \sum_{j \in B_v} exp((u_i + v_j - M_{i,j})/reg) * a_i * b_j + + \partial_{v_j} F = v_j * b_s/l_{u} - \sum_{i \in B_u} exp((u_i + v_j - M_{i,j})/reg) * a_i * b_j + + Where : + + - M is the (ns,nt) metric cost matrix + - u, v are dual variables in R^ixR^J + - reg is the regularization term + - :math:`B_u` and :math:`B_v` are lists of index + - :math:`b_s` is the size of the batchs :math:`B_u` and :math:`B_v` + - :math:`l_u` and :math:`l_v` are the lenghts of :math:`B_u` and :math:`B_v` + - a and b are source and target weights (sum to 1) + + + The algorithm used for solving the dual problem is the SGD algorithm + as proposed in [19]_ [alg.1] + + + Parameters + ---------- + + a : np.ndarray(ns,) + source measure + b : np.ndarray(nt,) + target measure + M : np.ndarray(ns, nt) + cost matrix + reg : float number + Regularization term > 0 + alpha : np.ndarray(ns,) + dual variable + beta : np.ndarray(nt,) + dual variable + batch_size : int number + size of the batch + batch_alpha : np.ndarray(bs,) + batch of index of alpha + batch_beta : np.ndarray(bs,) + batch of index of beta + + Returns + ------- + + grad : np.ndarray(ns,) + partial grad F + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 20000 + >>> lr = 0.1 + >>> batch_size = 3 + >>> log = True + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, + batch_size, + numItermax, lr, log) + >>> print(log['alpha'], log['beta']) + >>> print(sgd_dual_pi) + + References + ---------- + + [Seguy et al., 2018] : + International Conference on Learning Representation (2018), + arXiv preprint arxiv:1711.02283. + ''' + + G = - (np.exp((alpha[batch_alpha, None] + beta[None, batch_beta] - + M[batch_alpha, :][:, batch_beta]) / reg) * + a[batch_alpha, None] * b[None, batch_beta]) + grad_beta = np.zeros(np.shape(M)[1]) + grad_alpha = np.zeros(np.shape(M)[0]) + grad_beta[batch_beta] = (b[batch_beta] * len(batch_alpha) / np.shape(M)[0] + + G.sum(0)) + grad_alpha[batch_alpha] = (a[batch_alpha] * len(batch_beta) + / np.shape(M)[1] + G.sum(1)) + + return grad_alpha, grad_beta + + +def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr): + ''' + Compute the sgd algorithm to solve the regularized discrete measures + optimal transport dual problem + + The function solves the following optimization problem: + + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + + s.t. \gamma 1 = a + + \gamma^T 1= b + + \gamma \geq 0 + + Where : + + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term with :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + + Parameters + ---------- + + a : np.ndarray(ns,) + source measure + b : np.ndarray(nt,) + target measure + M : np.ndarray(ns, nt) + cost matrix + reg : float number + Regularization term > 0 + batch_size : int number + size of the batch + numItermax : int number + number of iteration + lr : float number + learning rate + + Returns + ------- + + alpha : np.ndarray(ns,) + dual variable + beta : np.ndarray(nt,) + dual variable + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 20000 + >>> lr = 0.1 + >>> batch_size = 3 + >>> log = True + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, + batch_size, + numItermax, lr, log) + >>> print(log['alpha'], log['beta']) + >>> print(sgd_dual_pi) + + References + ---------- + + [Seguy et al., 2018] : + International Conference on Learning Representation (2018), + arXiv preprint arxiv:1711.02283. + ''' + + n_source = np.shape(M)[0] + n_target = np.shape(M)[1] + cur_alpha = np.zeros(n_source) + cur_beta = np.zeros(n_target) + for cur_iter in range(numItermax): + k = np.sqrt(cur_iter + 1) + batch_alpha = np.random.choice(n_source, batch_size, replace=False) + batch_beta = np.random.choice(n_target, batch_size, replace=False) + update_alpha, update_beta = batch_grad_dual(a, b, M, reg, cur_alpha, + cur_beta, batch_size, + batch_alpha, batch_beta) + cur_alpha[batch_alpha] += (lr / k) * update_alpha[batch_alpha] + cur_beta[batch_beta] += (lr / k) * update_beta[batch_beta] + + return cur_alpha, cur_beta + + +def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1, + log=False): + ''' + Compute the transportation matrix to solve the regularized discrete measures + optimal transport dual problem + + The function solves the following optimization problem: + + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + + s.t. \gamma 1 = a + + \gamma^T 1= b + + \gamma \geq 0 + + Where : + + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + + Parameters + ---------- + + a : np.ndarray(ns,) + source measure + b : np.ndarray(nt,) + target measure + M : np.ndarray(ns, nt) + cost matrix + reg : float number + Regularization term > 0 + batch_size : int number + size of the batch + numItermax : int number + number of iteration + lr : float number + learning rate + log : bool, optional + record log if True + + Returns + ------- + + pi : np.ndarray(ns, nt) + transportation matrix + log : dict + log dictionary return only if log==True in parameters + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 20000 + >>> lr = 0.1 + >>> batch_size = 3 + >>> log = True + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, + batch_size, + numItermax, lr, log) + >>> print(log['alpha'], log['beta']) + >>> print(sgd_dual_pi) + + References + ---------- + + [Seguy et al., 2018] : + International Conference on Learning Representation (2018), + arXiv preprint arxiv:1711.02283. + ''' + + opt_alpha, opt_beta = sgd_entropic_regularization(a, b, M, reg, batch_size, + numItermax, lr) + pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) * + a[:, None] * b[None, :]) + if log: + log = {} + log['alpha'] = opt_alpha + log['beta'] = opt_beta + return pi, log + else: + return pi diff --git a/ot/utils.py b/ot/utils.py index 7dac283..bb21b38 100644 --- a/ot/utils.py +++ b/ot/utils.py @@ -77,6 +77,34 @@ def clean_zeros(a, b, M): return a2, b2, M2 +def euclidean_distances(X, Y, squared=False): + """ + Considering the rows of X (and Y=X) as vectors, compute the + distance matrix between each pair of vectors. + Parameters + ---------- + X : {array-like}, shape (n_samples_1, n_features) + Y : {array-like}, shape (n_samples_2, n_features) + squared : boolean, optional + Return squared Euclidean distances. + Returns + ------- + distances : {array}, shape (n_samples_1, n_samples_2) + """ + XX = np.einsum('ij,ij->i', X, X)[:, np.newaxis] + YY = np.einsum('ij,ij->i', Y, Y)[np.newaxis, :] + distances = np.dot(X, Y.T) + distances *= -2 + distances += XX + distances += YY + np.maximum(distances, 0, out=distances) + if X is Y: + # Ensure that distances between vectors and themselves are set to 0.0. + # This may not be the case due to floating point rounding errors. + distances.flat[::distances.shape[0] + 1] = 0.0 + return distances if squared else np.sqrt(distances, out=distances) + + def dist(x1, x2=None, metric='sqeuclidean'): """Compute distance between samples in x1 and x2 using function scipy.spatial.distance.cdist @@ -104,7 +132,8 @@ def dist(x1, x2=None, metric='sqeuclidean'): """ if x2 is None: x2 = x1 - + if metric == "sqeuclidean": + return euclidean_distances(x1, x2, squared=True) return cdist(x1, x2, metric=metric) |