diff options
Diffstat (limited to 'ot')
-rw-r--r-- | ot/__init__.py | 2 | ||||
-rw-r--r-- | ot/bregman.py | 279 | ||||
-rw-r--r-- | ot/da.py | 284 | ||||
-rw-r--r-- | ot/datasets.py | 4 | ||||
-rw-r--r-- | ot/gpu/__init__.py | 33 | ||||
-rw-r--r-- | ot/gpu/bregman.py | 148 | ||||
-rw-r--r-- | ot/gpu/da.py | 213 | ||||
-rw-r--r-- | ot/gpu/utils.py | 101 | ||||
-rw-r--r-- | ot/lp/__init__.py | 95 | ||||
-rw-r--r-- | ot/lp/cvx.py | 3 | ||||
-rw-r--r-- | ot/smooth.py | 600 | ||||
-rw-r--r-- | ot/utils.py | 31 |
12 files changed, 1275 insertions, 518 deletions
diff --git a/ot/__init__.py b/ot/__init__.py index 1dde390..b74b924 100644 --- a/ot/__init__.py +++ b/ot/__init__.py @@ -29,7 +29,7 @@ from .da import sinkhorn_lpl1_mm # utils functions from .utils import dist, unif, tic, toc, toq -__version__ = "0.4.0" +__version__ = "0.5.1" __all__ = ["emd", "emd2", "sinkhorn", "sinkhorn2", "utils", 'datasets', 'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov', diff --git a/ot/bregman.py b/ot/bregman.py index b017c1a..d1057ff 100644 --- a/ot/bregman.py +++ b/ot/bregman.py @@ -47,7 +47,7 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, reg : float Regularization term >0 method : str - method used for the solver either 'sinkhorn', 'sinkhorn_stabilized' or + method used for the solver either 'sinkhorn', 'greenkhorn', 'sinkhorn_stabilized' or 'sinkhorn_epsilon_scaling', see those function for specific parameters numItermax : int, optional Max number of iterations @@ -103,6 +103,10 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, def sink(): return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax, stopThr=stopThr, verbose=verbose, log=log, **kwargs) + elif method.lower() == 'greenkhorn': + def sink(): + return greenkhorn(a, b, M, reg, numItermax=numItermax, + stopThr=stopThr, verbose=verbose, log=log) elif method.lower() == 'sinkhorn_stabilized': def sink(): return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax, @@ -197,6 +201,8 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000, .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816. + [21] Altschuler J., Weed J., Rigollet P. : Near-linear time approximation algorithms for optimal transport via Sinkhorn iteration, Advances in Neural Information Processing Systems (NIPS) 31, 2017 + See Also @@ -204,6 +210,7 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000, ot.lp.emd : Unregularized OT ot.optim.cg : General regularized OT ot.bregman.sinkhorn_knopp : Classic Sinkhorn [2] + ot.bregman.greenkhorn : Greenkhorn [21] ot.bregman.sinkhorn_stabilized: Stabilized sinkhorn [9][10] ot.bregman.sinkhorn_epsilon_scaling: Sinkhorn with epslilon scaling [9][10] @@ -344,8 +351,13 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, # print(reg) - K = np.exp(-M / reg) + # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute + K = np.empty(M.shape, dtype=M.dtype) + np.divide(M, -reg, out=K) + np.exp(K, out=K) + # print(np.min(K)) + tmp2 = np.empty(b.shape, dtype=M.dtype) Kp = (1 / a).reshape(-1, 1) * K cpt = 0 @@ -353,6 +365,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, while (err > stopThr and cpt < numItermax): uprev = u vprev = v + KtransposeU = np.dot(K.T, u) v = np.divide(b, KtransposeU) u = 1. / np.dot(Kp, v) @@ -373,8 +386,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, err = np.sum((u - uprev)**2) / np.sum((u)**2) + \ np.sum((v - vprev)**2) / np.sum((v)**2) else: - transp = u.reshape(-1, 1) * (K * v) - err = np.linalg.norm((np.sum(transp, axis=0) - b))**2 + # compute right marginal tmp2= (diag(u)Kdiag(v))^T1 + np.einsum('i,ij,j->j', u, K, v, out=tmp2) + err = np.linalg.norm(tmp2 - b)**2 # violation of marginal if log: log['err'].append(err) @@ -389,10 +403,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, log['v'] = v if nbb: # return only loss - res = np.zeros((nbb)) - for i in range(nbb): - res[i] = np.sum( - u[:, i].reshape((-1, 1)) * K * v[:, i].reshape((1, -1)) * M) + res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) if log: return res, log else: @@ -406,6 +417,148 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, return u.reshape((-1, 1)) * K * v.reshape((1, -1)) +def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=False): + """ + Solve the entropic regularization optimal transport problem and return the OT matrix + + The algorithm used is based on the paper + + Near-linear time approximation algorithms for optimal transport via Sinkhorn iteration + by Jason Altschuler, Jonathan Weed, Philippe Rigollet + appeared at NIPS 2017 + + which is a stochastic version of the Sinkhorn-Knopp algorithm [2]. + + The function solves the following optimization problem: + + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + + s.t. \gamma 1 = a + + \gamma^T 1= b + + \gamma\geq 0 + where : + + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + + + + Parameters + ---------- + a : np.ndarray (ns,) + samples weights in the source domain + b : np.ndarray (nt,) or np.ndarray (nt,nbb) + samples in the target domain, compute sinkhorn with multiple targets + and fixed M if b is a matrix (return OT loss + dual variables in log) + M : np.ndarray (ns,nt) + loss matrix + reg : float + Regularization term >0 + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + log : bool, optional + record log if True + + + Returns + ------- + gamma : (ns x nt) ndarray + Optimal transportation matrix for the given parameters + log : dict + log dictionary return only if log==True in parameters + + Examples + -------- + + >>> import ot + >>> a=[.5,.5] + >>> b=[.5,.5] + >>> M=[[0.,1.],[1.,0.]] + >>> ot.bregman.greenkhorn(a,b,M,1) + array([[ 0.36552929, 0.13447071], + [ 0.13447071, 0.36552929]]) + + + References + ---------- + + .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013 + [22] J. Altschuler, J.Weed, P. Rigollet : Near-linear time approximation algorithms for optimal transport via Sinkhorn iteration, Advances in Neural Information Processing Systems (NIPS) 31, 2017 + + + See Also + -------- + ot.lp.emd : Unregularized OT + ot.optim.cg : General regularized OT + + """ + + n = a.shape[0] + m = b.shape[0] + + # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute + K = np.empty_like(M) + np.divide(M, -reg, out=K) + np.exp(K, out=K) + + u = np.full(n, 1. / n) + v = np.full(m, 1. / m) + G = u[:, np.newaxis] * K * v[np.newaxis, :] + + viol = G.sum(1) - a + viol_2 = G.sum(0) - b + stopThr_val = 1 + if log: + log['u'] = u + log['v'] = v + + for i in range(numItermax): + i_1 = np.argmax(np.abs(viol)) + i_2 = np.argmax(np.abs(viol_2)) + m_viol_1 = np.abs(viol[i_1]) + m_viol_2 = np.abs(viol_2[i_2]) + stopThr_val = np.maximum(m_viol_1, m_viol_2) + + if m_viol_1 > m_viol_2: + old_u = u[i_1] + u[i_1] = a[i_1] / (K[i_1, :].dot(v)) + G[i_1, :] = u[i_1] * K[i_1, :] * v + + viol[i_1] = u[i_1] * K[i_1, :].dot(v) - a[i_1] + viol_2 += (K[i_1, :].T * (u[i_1] - old_u) * v) + + else: + old_v = v[i_2] + v[i_2] = b[i_2] / (K[:, i_2].T.dot(u)) + G[:, i_2] = u * K[:, i_2] * v[i_2] + #aviol = (G@one_m - a) + #aviol_2 = (G.T@one_n - b) + viol += (-old_v + v[i_2]) * K[:, i_2] * u + viol_2[i_2] = v[i_2] * K[:, i_2].dot(u) - b[i_2] + + #print('b',np.max(abs(aviol -viol)),np.max(abs(aviol_2 - viol_2))) + + if stopThr_val <= stopThr: + break + else: + print('Warning: Algorithm did not converge') + + if log: + log['u'] = u + log['v'] = v + + if log: + return G, log + else: + return G + + def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9, warmstart=None, verbose=False, print_period=20, log=False, **kwargs): """ @@ -915,6 +1068,116 @@ def barycenter(A, M, reg, weights=None, numItermax=1000, return geometricBar(weights, UKv) +def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1e-9, stabThr=1e-30, verbose=False, log=False): + """Compute the entropic regularized wasserstein barycenter of distributions A + where A is a collection of 2D images. + + The function solves the following optimization problem: + + .. math:: + \mathbf{a} = arg\min_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i) + + where : + + - :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance (see ot.bregman.sinkhorn) + - :math:`\mathbf{a}_i` are training distributions (2D images) in the mast two dimensions of matrix :math:`\mathbf{A}` + - reg is the regularization strength scalar value + + The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [21]_ + + Parameters + ---------- + A : np.ndarray (n,w,h) + n distributions (2D images) of size w x h + reg : float + Regularization term >0 + weights : np.ndarray (n,) + Weights of each image on the simplex (barycentric coodinates) + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + stabThr : float, optional + Stabilization threshold to avoid numerical precision issue + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + + Returns + ------- + a : (w,h) ndarray + 2D Wasserstein barycenter + log : dict + log dictionary return only if log==True in parameters + + + References + ---------- + + .. [21] Solomon, J., De Goes, F., Peyré, G., Cuturi, M., Butscher, A., Nguyen, A. & Guibas, L. (2015). + Convolutional wasserstein distances: Efficient optimal transportation on geometric domains + ACM Transactions on Graphics (TOG), 34(4), 66 + + + """ + + if weights is None: + weights = np.ones(A.shape[0]) / A.shape[0] + else: + assert(len(weights) == A.shape[0]) + + if log: + log = {'err': []} + + b = np.zeros_like(A[0, :, :]) + U = np.ones_like(A) + KV = np.ones_like(A) + + cpt = 0 + err = 1 + + # build the convolution operator + t = np.linspace(0, 1, A.shape[1]) + [Y, X] = np.meshgrid(t, t) + xi1 = np.exp(-(X - Y)**2 / reg) + + def K(x): + return np.dot(np.dot(xi1, x), xi1) + + while (err > stopThr and cpt < numItermax): + + bold = b + cpt = cpt + 1 + + b = np.zeros_like(A[0, :, :]) + for r in range(A.shape[0]): + KV[r, :, :] = K(A[r, :, :] / np.maximum(stabThr, K(U[r, :, :]))) + b += weights[r] * np.log(np.maximum(stabThr, U[r, :, :] * KV[r, :, :])) + b = np.exp(b) + for r in range(A.shape[0]): + U[r, :, :] = b / np.maximum(stabThr, KV[r, :, :]) + + if cpt % 10 == 1: + err = np.sum(np.abs(bold - b)) + # log and verbose print + if log: + log['err'].append(err) + + if verbose: + if cpt % 200 == 0: + print('{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) + print('{:5d}|{:8e}|'.format(cpt, err)) + + if log: + log['niter'] = cpt + log['U'] = U + return b, log + else: + return b + + def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000, stopThr=1e-3, verbose=False, log=False): """ @@ -15,7 +15,7 @@ import scipy.linalg as linalg from .bregman import sinkhorn from .lp import emd from .utils import unif, dist, kernel, cost_normalization -from .utils import check_params, deprecated, BaseEstimator +from .utils import check_params, BaseEstimator from .optim import cg from .optim import gcg @@ -740,288 +740,6 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None, return A, b -@deprecated("The class OTDA is deprecated in 0.3.1 and will be " - "removed in 0.5" - "\n\tfor standard transport use class EMDTransport instead.") -class OTDA(object): - - """Class for domain adaptation with optimal transport as proposed in [5] - - - References - ---------- - - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, - "Optimal Transport for Domain Adaptation," in IEEE Transactions on - Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 - - """ - - def __init__(self, metric='sqeuclidean', norm=None): - """ Class initialization""" - self.xs = 0 - self.xt = 0 - self.G = 0 - self.metric = metric - self.norm = norm - self.computed = False - - def fit(self, xs, xt, ws=None, wt=None, max_iter=100000): - """Fit domain adaptation between samples is xs and xt - (with optional weights)""" - self.xs = xs - self.xt = xt - - if wt is None: - wt = unif(xt.shape[0]) - if ws is None: - ws = unif(xs.shape[0]) - - self.ws = ws - self.wt = wt - - self.M = dist(xs, xt, metric=self.metric) - self.M = cost_normalization(self.M, self.norm) - self.G = emd(ws, wt, self.M, max_iter) - self.computed = True - - def interp(self, direction=1): - """Barycentric interpolation for the source (1) or target (-1) samples - - This Barycentric interpolation solves for each source (resp target) - sample xs (resp xt) the following optimization problem: - - .. math:: - arg\min_x \sum_i \gamma_{k,i} c(x,x_i^t) - - where k is the index of the sample in xs - - For the moment only squared euclidean distance is provided but more - metric could be used in the future. - - """ - if direction > 0: # >0 then source to target - G = self.G - w = self.ws.reshape((self.xs.shape[0], 1)) - x = self.xt - else: - G = self.G.T - w = self.wt.reshape((self.xt.shape[0], 1)) - x = self.xs - - if self.computed: - if self.metric == 'sqeuclidean': - return np.dot(G / w, x) # weighted mean - else: - print( - "Warning, metric not handled yet, using weighted average") - return np.dot(G / w, x) # weighted mean - return None - else: - print("Warning, model not fitted yet, returning None") - return None - - def predict(self, x, direction=1): - """ Out of sample mapping using the formulation from [6] - - For each sample x to map, it finds the nearest source sample xs and - map the samle x to the position xst+(x-xs) wher xst is the barycentric - interpolation of source sample xs. - - References - ---------- - - .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014). - Regularized discrete optimal transport. SIAM Journal on Imaging - Sciences, 7(3), 1853-1882. - - """ - if direction > 0: # >0 then source to target - xf = self.xt - x0 = self.xs - else: - xf = self.xs - x0 = self.xt - - D0 = dist(x, x0) # dist netween new samples an source - idx = np.argmin(D0, 1) # closest one - xf = self.interp(direction) # interp the source samples - # aply the delta to the interpolation - return xf[idx, :] + x - x0[idx, :] - - -@deprecated("The class OTDA_sinkhorn is deprecated in 0.3.1 and will be" - " removed in 0.5 \nUse class SinkhornTransport instead.") -class OTDA_sinkhorn(OTDA): - - """Class for domain adaptation with optimal transport with entropic - regularization - - - """ - - def fit(self, xs, xt, reg=1, ws=None, wt=None, **kwargs): - """Fit regularized domain adaptation between samples is xs and xt - (with optional weights)""" - self.xs = xs - self.xt = xt - - if wt is None: - wt = unif(xt.shape[0]) - if ws is None: - ws = unif(xs.shape[0]) - - self.ws = ws - self.wt = wt - - self.M = dist(xs, xt, metric=self.metric) - self.M = cost_normalization(self.M, self.norm) - self.G = sinkhorn(ws, wt, self.M, reg, **kwargs) - self.computed = True - - -@deprecated("The class OTDA_lpl1 is deprecated in 0.3.1 and will be" - " removed in 0.5 \nUse class SinkhornLpl1Transport instead.") -class OTDA_lpl1(OTDA): - - """Class for domain adaptation with optimal transport with entropic and - group regularization""" - - def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, **kwargs): - """Fit regularized domain adaptation between samples is xs and xt - (with optional weights), See ot.da.sinkhorn_lpl1_mm for fit - parameters""" - self.xs = xs - self.xt = xt - - if wt is None: - wt = unif(xt.shape[0]) - if ws is None: - ws = unif(xs.shape[0]) - - self.ws = ws - self.wt = wt - - self.M = dist(xs, xt, metric=self.metric) - self.M = cost_normalization(self.M, self.norm) - self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M, reg, eta, **kwargs) - self.computed = True - - -@deprecated("The class OTDA_l1L2 is deprecated in 0.3.1 and will be" - " removed in 0.5 \nUse class SinkhornL1l2Transport instead.") -class OTDA_l1l2(OTDA): - - """Class for domain adaptation with optimal transport with entropic - and group lasso regularization""" - - def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, **kwargs): - """Fit regularized domain adaptation between samples is xs and xt - (with optional weights), See ot.da.sinkhorn_lpl1_gl for fit - parameters""" - self.xs = xs - self.xt = xt - - if wt is None: - wt = unif(xt.shape[0]) - if ws is None: - ws = unif(xs.shape[0]) - - self.ws = ws - self.wt = wt - - self.M = dist(xs, xt, metric=self.metric) - self.M = cost_normalization(self.M, self.norm) - self.G = sinkhorn_l1l2_gl(ws, ys, wt, self.M, reg, eta, **kwargs) - self.computed = True - - -@deprecated("The class OTDA_mapping_linear is deprecated in 0.3.1 and will be" - " removed in 0.5 \nUse class MappingTransport instead.") -class OTDA_mapping_linear(OTDA): - - """Class for optimal transport with joint linear mapping estimation as in - [8] - """ - - def __init__(self): - """ Class initialization""" - - self.xs = 0 - self.xt = 0 - self.G = 0 - self.L = 0 - self.bias = False - self.computed = False - self.metric = 'sqeuclidean' - - def fit(self, xs, xt, mu=1, eta=1, bias=False, **kwargs): - """ Fit domain adaptation between samples is xs and xt (with optional - weights)""" - self.xs = xs - self.xt = xt - self.bias = bias - - self.ws = unif(xs.shape[0]) - self.wt = unif(xt.shape[0]) - - self.G, self.L = joint_OT_mapping_linear( - xs, xt, mu=mu, eta=eta, bias=bias, **kwargs) - self.computed = True - - def mapping(self): - return lambda x: self.predict(x) - - def predict(self, x): - """ Out of sample mapping estimated during the call to fit""" - if self.computed: - if self.bias: - x = np.hstack((x, np.ones((x.shape[0], 1)))) - return x.dot(self.L) # aply the delta to the interpolation - else: - print("Warning, model not fitted yet, returning None") - return None - - -@deprecated("The class OTDA_mapping_kernel is deprecated in 0.3.1 and will be" - " removed in 0.5 \nUse class MappingTransport instead.") -class OTDA_mapping_kernel(OTDA_mapping_linear): - - """Class for optimal transport with joint nonlinear mapping - estimation as in [8]""" - - def fit(self, xs, xt, mu=1, eta=1, bias=False, kerneltype='gaussian', - sigma=1, **kwargs): - """ Fit domain adaptation between samples is xs and xt """ - self.xs = xs - self.xt = xt - self.bias = bias - - self.ws = unif(xs.shape[0]) - self.wt = unif(xt.shape[0]) - self.kernel = kerneltype - self.sigma = sigma - self.kwargs = kwargs - - self.G, self.L = joint_OT_mapping_kernel( - xs, xt, mu=mu, eta=eta, bias=bias, **kwargs) - self.computed = True - - def predict(self, x): - """ Out of sample mapping estimated during the call to fit""" - - if self.computed: - K = kernel( - x, self.xs, method=self.kernel, sigma=self.sigma, - **self.kwargs) - if self.bias: - K = np.hstack((K, np.ones((x.shape[0], 1)))) - return K.dot(self.L) - else: - print("Warning, model not fitted yet, returning None") - return None - - def distribution_estimation_uniform(X): """estimates a uniform distribution from an array of samples X diff --git a/ot/datasets.py b/ot/datasets.py index 362a89b..e76e75d 100644 --- a/ot/datasets.py +++ b/ot/datasets.py @@ -38,9 +38,9 @@ def make_1D_gauss(n, m, s): @deprecated() -def get_1D_gauss(n, m, sigma, random_state=None): +def get_1D_gauss(n, m, sigma): """ Deprecated see make_1D_gauss """ - return make_1D_gauss(n, m, sigma, random_state=None) + return make_1D_gauss(n, m, sigma) def make_2D_samples_gauss(n, m, sigma, random_state=None): diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py index a2fdd3d..deda6b1 100644 --- a/ot/gpu/__init__.py +++ b/ot/gpu/__init__.py @@ -1,12 +1,37 @@ # -*- coding: utf-8 -*- +""" -from . import bregman -from . import da -from .bregman import sinkhorn +This module provides GPU implementation for several OT solvers and utility +functions. The GPU backend in handled by `cupy +<https://cupy.chainer.org/>`_. + +By default, the functions in this module accept and return numpy arrays +in order to proide drop-in replacement for the other POT function but +the transfer between CPU en GPU comes with a significant overhead. + +In order to get the best erformances, we recommend to give only cupy +arrays to the functions and desactivate the conversion to numpy of the +result of the function with parameter ``to_numpy=False``. + +""" # Author: Remi Flamary <remi.flamary@unice.fr> # Leo Gautheron <https://github.com/aje> # # License: MIT License -__all__ = ["bregman", "da", "sinkhorn"] +from . import bregman +from . import da +from .bregman import sinkhorn +from .da import sinkhorn_lpl1_mm + +from . import utils +from .utils import dist, to_gpu, to_np + + + + + +__all__ = ["utils", "dist", "sinkhorn", + "sinkhorn_lpl1_mm", 'bregman', 'da', 'to_gpu', 'to_np'] + diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py index 47939c4..978b307 100644 --- a/ot/gpu/bregman.py +++ b/ot/gpu/bregman.py @@ -8,14 +8,18 @@ Bregman projections for regularized OT with GPU # # License: MIT License -import numpy as np -import cudamat +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations +from . import utils -def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, - log=False, returnAsGPU=False): - r""" - Solve the entropic regularization optimal transport problem on GPU +def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, + verbose=False, log=False, to_numpy=True, **kwargs): + """ + Solve the entropic regularization optimal transport on GPU + + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. The function solves the following optimization problem: @@ -40,9 +44,10 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, ---------- a : np.ndarray (ns,) samples weights in the source domain - b : np.ndarray (nt,) - samples in the target domain - M_GPU : cudamat.CUDAMatrix (ns,nt) + b : np.ndarray (nt,) or np.ndarray (nt,nbb) + samples in the target domain, compute sinkhorn with multiple targets + and fixed M if b is a matrix (return OT loss + dual variables in log) + M : np.ndarray (ns,nt) loss matrix reg : float Regularization term >0 @@ -54,8 +59,9 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, Print information along iterations log : bool, optional record log if True - returnAsGPU : bool, optional - return the OT matrix as a cudamat.CUDAMatrix + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. + Returns ------- @@ -88,60 +94,78 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, ot.optim.cg : General regularized OT """ + + a = cp.asarray(a) + b = cp.asarray(b) + M = cp.asarray(M) + + if len(a) == 0: + a = np.ones((M.shape[0],)) / M.shape[0] + if len(b) == 0: + b = np.ones((M.shape[1],)) / M.shape[1] + # init data Nini = len(a) Nfin = len(b) + if len(b.shape) > 1: + nbb = b.shape[1] + else: + nbb = 0 + if log: log = {'err': []} # we assume that no distances are null except those of the diagonal of # distances - u = (np.ones(Nini) / Nini).reshape((Nini, 1)) - u_GPU = cudamat.CUDAMatrix(u) - a_GPU = cudamat.CUDAMatrix(a.reshape((Nini, 1))) - ones_GPU = cudamat.empty(u_GPU.shape).assign(1) - v = (np.ones(Nfin) / Nfin).reshape((Nfin, 1)) - v_GPU = cudamat.CUDAMatrix(v) - b_GPU = cudamat.CUDAMatrix(b.reshape((Nfin, 1))) - - M_GPU.divide(-reg) + if nbb: + u = np.ones((Nini, nbb)) / Nini + v = np.ones((Nfin, nbb)) / Nfin + else: + u = np.ones(Nini) / Nini + v = np.ones(Nfin) / Nfin - K_GPU = cudamat.exp(M_GPU) + # print(reg) - ones_GPU.divide(a_GPU, target=a_GPU) - Kp_GPU = cudamat.empty(K_GPU.shape) - K_GPU.mult_by_col(a_GPU, target=Kp_GPU) + # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute + K = np.empty(M.shape, dtype=M.dtype) + np.divide(M, -reg, out=K) + np.exp(K, out=K) - tmp_GPU = cudamat.empty(K_GPU.shape) + # print(np.min(K)) + tmp2 = np.empty(b.shape, dtype=M.dtype) + Kp = (1 / a).reshape(-1, 1) * K cpt = 0 err = 1 while (err > stopThr and cpt < numItermax): - uprev_GPU = u_GPU.copy() - vprev_GPU = v_GPU.copy() + uprev = u + vprev = v - KtransposeU_GPU = K_GPU.transpose().dot(u_GPU) - b_GPU.divide(KtransposeU_GPU, target=v_GPU) - ones_GPU.divide(Kp_GPU.dot(v_GPU), target=u_GPU) + KtransposeU = np.dot(K.T, u) + v = np.divide(b, KtransposeU) + u = 1. / np.dot(Kp, v) - if (np.any(KtransposeU_GPU.asarray() == 0) or - not u_GPU.allfinite() or not v_GPU.allfinite()): + if (np.any(KtransposeU == 0) or + np.any(np.isnan(u)) or np.any(np.isnan(v)) or + np.any(np.isinf(u)) or np.any(np.isinf(v))): # we have reached the machine precision # come back to previous solution and quit loop print('Warning: numerical errors at iteration', cpt) - u_GPU = uprev_GPU.copy() - v_GPU = vprev_GPU.copy() + u = uprev + v = vprev break if cpt % 10 == 0: # we can speed up the process by checking for the error only all # the 10th iterations - K_GPU.mult_by_col(u_GPU, target=tmp_GPU) - tmp_GPU.mult_by_row(v_GPU.transpose(), target=tmp_GPU) - - bcopy_GPU = b_GPU.copy().transpose() - bcopy_GPU.add_sums(tmp_GPU, axis=0, beta=-1) - err = bcopy_GPU.euclid_norm()**2 + if nbb: + err = np.sum((u - uprev)**2) / np.sum((u)**2) + \ + np.sum((v - vprev)**2) / np.sum((v)**2) + else: + # compute right marginal tmp2= (diag(u)Kdiag(v))^T1 + tmp2 = np.sum(u[:, None] * K * v[None, :], 0) + #tmp2=np.einsum('i,ij,j->j', u, K, v) + err = np.linalg.norm(tmp2 - b)**2 # violation of marginal if log: log['err'].append(err) @@ -150,20 +174,32 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, print( '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) print('{:5d}|{:8e}|'.format(cpt, err)) - cpt += 1 - if log: - log['u'] = u_GPU.asarray() - log['v'] = v_GPU.asarray() - - K_GPU.mult_by_col(u_GPU, target=K_GPU) - K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU) - - if returnAsGPU: - res = K_GPU - else: - res = K_GPU.asarray() - + cpt = cpt + 1 if log: - return res, log - else: - return res + log['u'] = u + log['v'] = v + + if nbb: # return only loss + #res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) (explodes cupy memory) + res = np.empty(nbb) + for i in range(nbb): + res[i] = np.sum(u[:, None, i] * (K * M) * v[None, :, i]) + if to_numpy: + res = utils.to_np(res) + if log: + return res, log + else: + return res + + else: # return OT matrix + res = u.reshape((-1, 1)) * K * v.reshape((1, -1)) + if to_numpy: + res = utils.to_np(res) + if log: + return res, log + else: + return res + + +# define sinkhorn as sinkhorn_knopp +sinkhorn = sinkhorn_knopp diff --git a/ot/gpu/da.py b/ot/gpu/da.py index 71a485a..4a98038 100644 --- a/ot/gpu/da.py +++ b/ot/gpu/da.py @@ -11,80 +11,30 @@ Domain adaptation with optimal transport with GPU implementation # License: MIT License -import numpy as np -from ..utils import unif -from ..da import OTDA +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations +import numpy as npp +from . import utils + from .bregman import sinkhorn -import cudamat -def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False): +def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, + numInnerItermax=200, stopInnerThr=1e-9, verbose=False, + log=False, to_numpy=True): """ - Compute the pairwise euclidean distance between matrices a and b. - - - Parameters - ---------- - a : np.ndarray (n, f) - first matrice - b : np.ndarray (m, f) - second matrice - returnAsGPU : boolean, optional (default False) - if True, returns cudamat matrix still on GPU, else return np.ndarray - squared : boolean, optional (default False) - if True, return squared euclidean distance matrice + Solve the entropic regularization optimal transport problem with nonconvex + group lasso regularization on GPU + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. - Returns - ------- - c : (n x m) np.ndarray or cudamat.CUDAMatrix - pairwise euclidean distance distance matrix - """ - # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m). - # First compute in c_GPU the squared euclidean distance. And return its - # square root. At each cell [i,j] of c, we want to have - # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that - # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c: - # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2). - - a_GPU = cudamat.CUDAMatrix(a) - b_GPU = cudamat.CUDAMatrix(b) - - # Multiply a by b transpose to obtain in each cell [i,j] of c the - # value sum{k in range(f)} ( a[i,k]b[j,k] ) - c_GPU = cudamat.dot(a_GPU, b_GPU.transpose()) - # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] ) - c_GPU.mult(-2) - - # Compute the vectors of the sum of squared elements. - a_GPU = cudamat.pow(a_GPU, 2).sum(axis=1) - b_GPU = cudamat.pow(b_GPU, 2).sum(axis=1) - - # Add the vectors in each columns (respectivly rows) of c. - # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] ) - c_GPU.add_col_vec(a_GPU) - # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2) - c_GPU.add_row_vec(b_GPU.transpose()) - - if not squared: - c_GPU = cudamat.sqrt(c_GPU) - - if returnAsGPU: - return c_GPU - else: - return c_GPU.asarray() - - -def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, - numInnerItermax=200, stopInnerThr=1e-9, - verbose=False, log=False): - """ - Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization The function solves the following optimization problem: .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ \eta \Omega_g(\gamma) + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma) + + \eta \Omega_g(\gamma) s.t. \gamma 1 = a @@ -94,11 +44,16 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, where : - M is the (ns,nt) metric cost matrix - - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - :math:`\Omega_g` is the group lasso regulaization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` where :math:`\mathcal{I}_c` are the index of samples from class c in the source domain. + - :math:`\Omega_e` is the entropic regularization term + :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega_g` is the group lasso regulaization term + :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` + where :math:`\mathcal{I}_c` are the index of samples from class c + in the source domain. - a and b are source and target weights (sum to 1) - The algorithm used for solving the problem is the generalised conditional gradient as proposed in [5]_ [7]_ + The algorithm used for solving the problem is the generalised conditional + gradient as proposed in [5]_ [7]_ Parameters @@ -109,7 +64,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, labels of samples in the source domain b : np.ndarray (nt,) samples weights in the target domain - M_GPU : cudamat.CUDAMatrix (ns,nt) + M : np.ndarray (ns,nt) loss matrix reg : float Regularization term for entropic regularization >0 @@ -125,6 +80,8 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, Print information along iterations log : bool, optional record log if True + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. Returns @@ -138,8 +95,13 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, References ---------- - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 - .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567. + .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE + Transactions on Pattern Analysis and Machine Intelligence , + vol.PP, no.99, pp.1-1 + .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence + and applications. arXiv preprint arXiv:1510.06567. See Also -------- @@ -148,108 +110,35 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, ot.optim.cg : General regularized OT """ + + a, labels_a, b, M = utils.to_gpu(a, labels_a, b, M) + p = 0.5 epsilon = 1e-3 - Nfin = len(b) indices_labels = [] - classes = np.unique(labels_a) + labels_a2 = cp.asnumpy(labels_a) + classes = npp.unique(labels_a2) for c in classes: - idxc, = np.where(labels_a == c) - indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1))) + idxc, = utils.to_gpu(npp.where(labels_a2 == c)) + indices_labels.append(idxc) - Mreg_GPU = cudamat.empty(M_GPU.shape) - W_GPU = cudamat.empty(M_GPU.shape).assign(0) + W = np.zeros(M.shape) for cpt in range(numItermax): - Mreg_GPU.assign(M_GPU) - Mreg_GPU.add_mult(W_GPU, eta) - transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax, - stopThr=stopInnerThr, returnAsGPU=True) + Mreg = M + eta * W + transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax, + stopThr=stopInnerThr, to_numpy=False) # the transport has been computed. Check if classes are really # separated - W_GPU.assign(1) - W_GPU = W_GPU.transpose() + W = np.ones(M.shape) for (i, c) in enumerate(classes): - (_, nbRow) = indices_labels[i].shape - tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0) - transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU) - majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon) - cudamat.pow(majs_GPU, (p - 1)) - majs_GPU.mult(p) - - tmpC_GPU.assign(0) - tmpC_GPU.add_col_vec(majs_GPU) - W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU) - - W_GPU = W_GPU.transpose() - - return transp_GPU.asarray() + majs = np.sum(transp[indices_labels[i]], axis=0) + majs = p * ((majs + epsilon)**(p - 1)) + W[indices_labels[i]] = majs -class OTDA_GPU(OTDA): - - def normalizeM(self, norm): - if norm == "median": - self.M_GPU.divide(float(np.median(self.M_GPU.asarray()))) - elif norm == "max": - self.M_GPU.divide(float(np.max(self.M_GPU.asarray()))) - elif norm == "log": - self.M_GPU.add(1) - cudamat.log(self.M_GPU) - elif norm == "loglog": - self.M_GPU.add(1) - cudamat.log(self.M_GPU) - self.M_GPU.add(1) - cudamat.log(self.M_GPU) - - -class OTDA_sinkhorn(OTDA_GPU): - - def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs): - cudamat.init() - xs = np.asarray(xs, dtype=np.float64) - xt = np.asarray(xt, dtype=np.float64) - - self.xs = xs - self.xt = xt - - if wt is None: - wt = unif(xt.shape[0]) - if ws is None: - ws = unif(xs.shape[0]) - - self.ws = ws - self.wt = wt - - self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True, - squared=True) - self.normalizeM(norm) - self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs) - self.computed = True - - -class OTDA_lpl1(OTDA_GPU): - - def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None, - **kwargs): - cudamat.init() - xs = np.asarray(xs, dtype=np.float64) - xt = np.asarray(xt, dtype=np.float64) - - self.xs = xs - self.xt = xt - - if wt is None: - wt = unif(xt.shape[0]) - if ws is None: - ws = unif(xs.shape[0]) - - self.ws = ws - self.wt = wt - - self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True, - squared=True) - self.normalizeM(norm) - self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs) - self.computed = True + if to_numpy: + return utils.to_np(transp) + else: + return transp diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py new file mode 100644 index 0000000..41e168a --- /dev/null +++ b/ot/gpu/utils.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +""" +Utility functions for GPU +""" + +# Author: Remi Flamary <remi.flamary@unice.fr> +# Nicolas Courty <ncourty@irisa.fr> +# Leo Gautheron <https://github.com/aje> +# +# License: MIT License + +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations + + +def euclidean_distances(a, b, squared=False, to_numpy=True): + """ + Compute the pairwise euclidean distance between matrices a and b. + + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. + + Parameters + ---------- + a : np.ndarray (n, f) + first matrix + b : np.ndarray (m, f) + second matrix + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. + squared : boolean, optional (default False) + if True, return squared euclidean distance matrix + + Returns + ------- + c : (n x m) np.ndarray or cupy.ndarray + pairwise euclidean distance distance matrix + """ + + a, b = to_gpu(a, b) + + a2 = np.sum(np.square(a), 1) + b2 = np.sum(np.square(b), 1) + + c = -2 * np.dot(a, b.T) + c += a2[:, None] + c += b2[None, :] + + if not squared: + np.sqrt(c, out=c) + if to_numpy: + return to_np(c) + else: + return c + + +def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True): + """Compute distance between samples in x1 and x2 on gpu + + Parameters + ---------- + + x1 : np.array (n1,d) + matrix with n1 samples of size d + x2 : np.array (n2,d), optional + matrix with n2 samples of size d (if None then x2=x1) + metric : str + Metric from 'sqeuclidean', 'euclidean', + + + Returns + ------- + + M : np.array (n1,n2) + distance matrix computed with given metric + + """ + if x2 is None: + x2 = x1 + if metric == "sqeuclidean": + return euclidean_distances(x1, x2, squared=True, to_numpy=to_numpy) + elif metric == "euclidean": + return euclidean_distances(x1, x2, squared=False, to_numpy=to_numpy) + else: + raise NotImplementedError + + +def to_gpu(*args): + """ Upload numpy arrays to GPU and return them""" + if len(args) > 1: + return (cp.asarray(x) for x in args) + else: + return cp.asarray(args[0]) + + +def to_np(*args): + """ convert GPU arras to numpy and return them""" + if len(args) > 1: + return (cp.asnumpy(x) for x in args) + else: + return cp.asnumpy(args[0]) diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 5dda82a..02cbd8c 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -17,6 +17,9 @@ from .import cvx from .emd_wrap import emd_c, check_result from ..utils import parmap from .cvx import barycenter +from ..utils import dist + +__all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx'] def emd(a, b, M, numItermax=100000, log=False): @@ -214,3 +217,95 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), res = parmap(f, [b[:, i] for i in range(nb)], processes) return res + + + +def free_support_barycenter(measures_locations, measures_weights, X_init, b=None, weights=None, numItermax=100, stopThr=1e-7, verbose=False, log=None): + """ + Solves the free support (locations of the barycenters are optimized, not the weights) Wasserstein barycenter problem (i.e. the weighted Frechet mean for the 2-Wasserstein distance) + + The function solves the Wasserstein barycenter problem when the barycenter measure is constrained to be supported on k atoms. + This problem is considered in [1] (Algorithm 2). There are two differences with the following codes: + - we do not optimize over the weights + - we do not do line search for the locations updates, we use i.e. theta = 1 in [1] (Algorithm 2). This can be seen as a discrete implementation of the fixed-point algorithm of [2] proposed in the continuous setting. + + Parameters + ---------- + measures_locations : list of (k_i,d) np.ndarray + The discrete support of a measure supported on k_i locations of a d-dimensional space (k_i can be different for each element of the list) + measures_weights : list of (k_i,) np.ndarray + Numpy arrays where each numpy array has k_i non-negatives values summing to one representing the weights of each discrete input measure + + X_init : (k,d) np.ndarray + Initialization of the support locations (on k atoms) of the barycenter + b : (k,) np.ndarray + Initialization of the weights of the barycenter (non-negatives, sum to 1) + weights : (k,) np.ndarray + Initialization of the coefficients of the barycenter (non-negatives, sum to 1) + + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + Returns + ------- + X : (k,d) np.ndarray + Support locations (on k atoms) of the barycenter + + References + ---------- + + .. [1] Cuturi, Marco, and Arnaud Doucet. "Fast computation of Wasserstein barycenters." International Conference on Machine Learning. 2014. + + .. [2] Álvarez-Esteban, Pedro C., et al. "A fixed-point approach to barycenters in Wasserstein space." Journal of Mathematical Analysis and Applications 441.2 (2016): 744-762. + + """ + + iter_count = 0 + + N = len(measures_locations) + k = X_init.shape[0] + d = X_init.shape[1] + if b is None: + b = np.ones((k,))/k + if weights is None: + weights = np.ones((N,)) / N + + X = X_init + + log_dict = {} + displacement_square_norms = [] + + displacement_square_norm = stopThr + 1. + + while ( displacement_square_norm > stopThr and iter_count < numItermax ): + + T_sum = np.zeros((k, d)) + + for (measure_locations_i, measure_weights_i, weight_i) in zip(measures_locations, measures_weights, weights.tolist()): + + M_i = dist(X, measure_locations_i) + T_i = emd(b, measure_weights_i, M_i) + T_sum = T_sum + weight_i * np.reshape(1. / b, (-1, 1)) * np.matmul(T_i, measure_locations_i) + + displacement_square_norm = np.sum(np.square(T_sum-X)) + if log: + displacement_square_norms.append(displacement_square_norm) + + X = T_sum + + if verbose: + print('iteration %d, displacement_square_norm=%f\n', iter_count, displacement_square_norm) + + iter_count += 1 + + if log: + log_dict['displacement_square_norms'] = displacement_square_norms + return X, log_dict + else: + return X
\ No newline at end of file diff --git a/ot/lp/cvx.py b/ot/lp/cvx.py index c8c75bc..8e763be 100644 --- a/ot/lp/cvx.py +++ b/ot/lp/cvx.py @@ -11,6 +11,7 @@ import numpy as np import scipy as sp import scipy.sparse as sps + try: import cvxopt from cvxopt import solvers, matrix, spmatrix @@ -26,7 +27,7 @@ def scipy_sparse_to_spmatrix(A): def barycenter(A, M, weights=None, verbose=False, log=False, solver='interior-point'): - """Compute the entropic regularized wasserstein barycenter of distributions A + """Compute the Wasserstein barycenter of distributions A The function solves the following optimization problem [16]: diff --git a/ot/smooth.py b/ot/smooth.py new file mode 100644 index 0000000..5a8e4b5 --- /dev/null +++ b/ot/smooth.py @@ -0,0 +1,600 @@ +#Copyright (c) 2018, Mathieu Blondel +#All rights reserved. +# +#Redistribution and use in source and binary forms, with or without +#modification, are permitted provided that the following conditions are met: +# +#1. Redistributions of source code must retain the above copyright notice, this +#list of conditions and the following disclaimer. +# +#2. Redistributions in binary form must reproduce the above copyright notice, +#this list of conditions and the following disclaimer in the documentation and/or +#other materials provided with the distribution. +# +#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND +#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. +#IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, +#INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT +#NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, +#OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +#LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +#OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +#THE POSSIBILITY OF SUCH DAMAGE. + +# Author: Mathieu Blondel +# Remi Flamary <remi.flamary@unice.fr> + +""" +Implementation of +Smooth and Sparse Optimal Transport. +Mathieu Blondel, Vivien Seguy, Antoine Rolet. +In Proc. of AISTATS 2018. +https://arxiv.org/abs/1710.06276 + +[17] Blondel, M., Seguy, V., & Rolet, A. (2018). Smooth and Sparse Optimal +Transport. Proceedings of the Twenty-First International Conference on +Artificial Intelligence and Statistics (AISTATS). + +Original code from https://github.com/mblondel/smooth-ot/ + +""" + +import numpy as np +from scipy.optimize import minimize + + +def projection_simplex(V, z=1, axis=None): + """ Projection of x onto the simplex, scaled by z + + P(x; z) = argmin_{y >= 0, sum(y) = z} ||y - x||^2 + z: float or array + If array, len(z) must be compatible with V + axis: None or int + - axis=None: project V by P(V.ravel(); z) + - axis=1: project each V[i] by P(V[i]; z[i]) + - axis=0: project each V[:, j] by P(V[:, j]; z[j]) + """ + if axis == 1: + n_features = V.shape[1] + U = np.sort(V, axis=1)[:, ::-1] + z = np.ones(len(V)) * z + cssv = np.cumsum(U, axis=1) - z[:, np.newaxis] + ind = np.arange(n_features) + 1 + cond = U - cssv / ind > 0 + rho = np.count_nonzero(cond, axis=1) + theta = cssv[np.arange(len(V)), rho - 1] / rho + return np.maximum(V - theta[:, np.newaxis], 0) + + elif axis == 0: + return projection_simplex(V.T, z, axis=1).T + + else: + V = V.ravel().reshape(1, -1) + return projection_simplex(V, z, axis=1).ravel() + + +class Regularization(object): + """Base class for Regularization objects + + Notes + ----- + This class is not intended for direct use but as aparent for true + regularizatiojn implementation. + """ + + def __init__(self, gamma=1.0): + """ + + Parameters + ---------- + gamma: float + Regularization parameter. + We recover unregularized OT when gamma -> 0. + + """ + self.gamma = gamma + + def delta_Omega(X): + """ + Compute delta_Omega(X[:, j]) for each X[:, j]. + delta_Omega(x) = sup_{y >= 0} y^T x - Omega(y). + + Parameters + ---------- + X: array, shape = len(a) x len(b) + Input array. + + Returns + ------- + v: array, len(b) + Values: v[j] = delta_Omega(X[:, j]) + G: array, len(a) x len(b) + Gradients: G[:, j] = nabla delta_Omega(X[:, j]) + """ + raise NotImplementedError + + def max_Omega(X, b): + """ + Compute max_Omega_j(X[:, j]) for each X[:, j]. + max_Omega_j(x) = sup_{y >= 0, sum(y) = 1} y^T x - Omega(b[j] y) / b[j]. + + Parameters + ---------- + X: array, shape = len(a) x len(b) + Input array. + + Returns + ------- + v: array, len(b) + Values: v[j] = max_Omega_j(X[:, j]) + G: array, len(a) x len(b) + Gradients: G[:, j] = nabla max_Omega_j(X[:, j]) + """ + raise NotImplementedError + + def Omega(T): + """ + Compute regularization term. + + Parameters + ---------- + T: array, shape = len(a) x len(b) + Input array. + + Returns + ------- + value: float + Regularization term. + """ + raise NotImplementedError + + +class NegEntropy(Regularization): + """ NegEntropy regularization """ + + def delta_Omega(self, X): + G = np.exp(X / self.gamma - 1) + val = self.gamma * np.sum(G, axis=0) + return val, G + + def max_Omega(self, X, b): + max_X = np.max(X, axis=0) / self.gamma + exp_X = np.exp(X / self.gamma - max_X) + val = self.gamma * (np.log(np.sum(exp_X, axis=0)) + max_X) + val -= self.gamma * np.log(b) + G = exp_X / np.sum(exp_X, axis=0) + return val, G + + def Omega(self, T): + return self.gamma * np.sum(T * np.log(T)) + + +class SquaredL2(Regularization): + """ Squared L2 regularization """ + + def delta_Omega(self, X): + max_X = np.maximum(X, 0) + val = np.sum(max_X ** 2, axis=0) / (2 * self.gamma) + G = max_X / self.gamma + return val, G + + def max_Omega(self, X, b): + G = projection_simplex(X / (b * self.gamma), axis=0) + val = np.sum(X * G, axis=0) + val -= 0.5 * self.gamma * b * np.sum(G * G, axis=0) + return val, G + + def Omega(self, T): + return 0.5 * self.gamma * np.sum(T ** 2) + + +def dual_obj_grad(alpha, beta, a, b, C, regul): + """ + Compute objective value and gradients of dual objective. + + Parameters + ---------- + alpha: array, shape = len(a) + beta: array, shape = len(b) + Current iterate of dual potentials. + a: array, shape = len(a) + b: array, shape = len(b) + Input histograms (should be non-negative and sum to 1). + C: array, shape = len(a) x len(b) + Ground cost matrix. + regul: Regularization object + Should implement a delta_Omega(X) method. + + Returns + ------- + obj: float + Objective value (higher is better). + grad_alpha: array, shape = len(a) + Gradient w.r.t. alpha. + grad_beta: array, shape = len(b) + Gradient w.r.t. beta. + """ + obj = np.dot(alpha, a) + np.dot(beta, b) + grad_alpha = a.copy() + grad_beta = b.copy() + + # X[:, j] = alpha + beta[j] - C[:, j] + X = alpha[:, np.newaxis] + beta - C + + # val.shape = len(b) + # G.shape = len(a) x len(b) + val, G = regul.delta_Omega(X) + + obj -= np.sum(val) + grad_alpha -= G.sum(axis=1) + grad_beta -= G.sum(axis=0) + + return obj, grad_alpha, grad_beta + + +def solve_dual(a, b, C, regul, method="L-BFGS-B", tol=1e-3, max_iter=500, + verbose=False): + """ + Solve the "smoothed" dual objective. + + Parameters + ---------- + a: array, shape = len(a) + b: array, shape = len(b) + Input histograms (should be non-negative and sum to 1). + C: array, shape = len(a) x len(b) + Ground cost matrix. + regul: Regularization object + Should implement a delta_Omega(X) method. + method: str + Solver to be used (passed to `scipy.optimize.minimize`). + tol: float + Tolerance parameter. + max_iter: int + Maximum number of iterations. + + Returns + ------- + alpha: array, shape = len(a) + beta: array, shape = len(b) + Dual potentials. + """ + + def _func(params): + # Unpack alpha and beta. + alpha = params[:len(a)] + beta = params[len(a):] + + obj, grad_alpha, grad_beta = dual_obj_grad(alpha, beta, a, b, C, regul) + + # Pack grad_alpha and grad_beta. + grad = np.concatenate((grad_alpha, grad_beta)) + + # We need to maximize the dual. + return -obj, -grad + + # Unfortunately, `minimize` only supports functions whose argument is a + # vector. So, we need to concatenate alpha and beta. + alpha_init = np.zeros(len(a)) + beta_init = np.zeros(len(b)) + params_init = np.concatenate((alpha_init, beta_init)) + + res = minimize(_func, params_init, method=method, jac=True, + tol=tol, options=dict(maxiter=max_iter, disp=verbose)) + + alpha = res.x[:len(a)] + beta = res.x[len(a):] + + return alpha, beta, res + + +def semi_dual_obj_grad(alpha, a, b, C, regul): + """ + Compute objective value and gradient of semi-dual objective. + + Parameters + ---------- + alpha: array, shape = len(a) + Current iterate of semi-dual potentials. + a: array, shape = len(a) + b: array, shape = len(b) + Input histograms (should be non-negative and sum to 1). + C: array, shape = len(a) x len(b) + Ground cost matrix. + regul: Regularization object + Should implement a max_Omega(X) method. + + Returns + ------- + obj: float + Objective value (higher is better). + grad: array, shape = len(a) + Gradient w.r.t. alpha. + """ + obj = np.dot(alpha, a) + grad = a.copy() + + # X[:, j] = alpha - C[:, j] + X = alpha[:, np.newaxis] - C + + # val.shape = len(b) + # G.shape = len(a) x len(b) + val, G = regul.max_Omega(X, b) + + obj -= np.dot(b, val) + grad -= np.dot(G, b) + + return obj, grad + + +def solve_semi_dual(a, b, C, regul, method="L-BFGS-B", tol=1e-3, max_iter=500, + verbose=False): + """ + Solve the "smoothed" semi-dual objective. + + Parameters + ---------- + a: array, shape = len(a) + b: array, shape = len(b) + Input histograms (should be non-negative and sum to 1). + C: array, shape = len(a) x len(b) + Ground cost matrix. + regul: Regularization object + Should implement a max_Omega(X) method. + method: str + Solver to be used (passed to `scipy.optimize.minimize`). + tol: float + Tolerance parameter. + max_iter: int + Maximum number of iterations. + + Returns + ------- + alpha: array, shape = len(a) + Semi-dual potentials. + """ + + def _func(alpha): + obj, grad = semi_dual_obj_grad(alpha, a, b, C, regul) + # We need to maximize the semi-dual. + return -obj, -grad + + alpha_init = np.zeros(len(a)) + + res = minimize(_func, alpha_init, method=method, jac=True, + tol=tol, options=dict(maxiter=max_iter, disp=verbose)) + + return res.x, res + + +def get_plan_from_dual(alpha, beta, C, regul): + """ + Retrieve optimal transportation plan from optimal dual potentials. + + Parameters + ---------- + alpha: array, shape = len(a) + beta: array, shape = len(b) + Optimal dual potentials. + C: array, shape = len(a) x len(b) + Ground cost matrix. + regul: Regularization object + Should implement a delta_Omega(X) method. + + Returns + ------- + T: array, shape = len(a) x len(b) + Optimal transportation plan. + """ + X = alpha[:, np.newaxis] + beta - C + return regul.delta_Omega(X)[1] + + +def get_plan_from_semi_dual(alpha, b, C, regul): + """ + Retrieve optimal transportation plan from optimal semi-dual potentials. + + Parameters + ---------- + alpha: array, shape = len(a) + Optimal semi-dual potentials. + b: array, shape = len(b) + Second input histogram (should be non-negative and sum to 1). + C: array, shape = len(a) x len(b) + Ground cost matrix. + regul: Regularization object + Should implement a delta_Omega(X) method. + + Returns + ------- + T: array, shape = len(a) x len(b) + Optimal transportation plan. + """ + X = alpha[:, np.newaxis] - C + return regul.max_Omega(X, b)[1] * b + + +def smooth_ot_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=1e-9, + numItermax=500, verbose=False, log=False): + r""" + Solve the regularized OT problem in the dual and return the OT matrix + + The function solves the smooth relaxed dual formulation (7) in [17]_ : + + .. math:: + \max_{\alpha,\beta}\quad a^T\alpha+b^T\beta-\sum_j\delta_\Omega(\alpha+\beta_j-\mathbf{m}_j) + + where : + + - :math:`\mathbf{m}_j` is the jth column of the cost matrix + - :math:`\delta_\Omega` is the convex conjugate of the regularization term :math:`\Omega` + - a and b are source and target weights (sum to 1) + + The OT matrix can is reconstructed from the gradient of :math:`\delta_\Omega` + (See [17]_ Proposition 1). + The optimization algorithm is using gradient decent (L-BFGS by default). + + + Parameters + ---------- + a : np.ndarray (ns,) + samples weights in the source domain + b : np.ndarray (nt,) or np.ndarray (nt,nbb) + samples in the target domain, compute sinkhorn with multiple targets + and fixed M if b is a matrix (return OT loss + dual variables in log) + M : np.ndarray (ns,nt) + loss matrix + reg : float + Regularization term >0 + reg_type : str + Regularization type, can be the following (default ='l2'): + - 'kl' : Kullback Leibler (~ Neg-entropy used in sinkhorn [2]_) + - 'l2' : Squared Euclidean regularization + method : str + Solver to use for scipy.optimize.minimize + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + + Returns + ------- + gamma : (ns x nt) ndarray + Optimal transportation matrix for the given parameters + log : dict + log dictionary return only if log==True in parameters + + + References + ---------- + + .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013 + + .. [17] Blondel, M., Seguy, V., & Rolet, A. (2018). Smooth and Sparse Optimal Transport. Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS). + + See Also + -------- + ot.lp.emd : Unregularized OT + ot.sinhorn : Entropic regularized OT + ot.optim.cg : General regularized OT + + """ + + if reg_type.lower() in ['l2', 'squaredl2']: + regul = SquaredL2(gamma=reg) + elif reg_type.lower() in ['entropic', 'negentropy', 'kl']: + regul = NegEntropy(gamma=reg) + else: + raise NotImplementedError('Unknown regularization') + + # solve dual + alpha, beta, res = solve_dual(a, b, M, regul, max_iter=numItermax, + tol=stopThr, verbose=verbose) + + # reconstruct transport matrix + G = get_plan_from_dual(alpha, beta, M, regul) + + if log: + log = {'alpha': alpha, 'beta': beta, 'res': res} + return G, log + else: + return G + + +def smooth_ot_semi_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=1e-9, + numItermax=500, verbose=False, log=False): + r""" + Solve the regularized OT problem in the semi-dual and return the OT matrix + + The function solves the smooth relaxed dual formulation (10) in [17]_ : + + .. math:: + \max_{\alpha}\quad a^T\alpha-OT_\Omega^*(\alpha,b) + + where : + + .. math:: + OT_\Omega^*(\alpha,b)=\sum_j b_j + + - :math:`\mathbf{m}_j` is the jth column of the cost matrix + - :math:`OT_\Omega^*(\alpha,b)` is defined in Eq. (9) in [17] + - a and b are source and target weights (sum to 1) + + The OT matrix can is reconstructed using [17]_ Proposition 2. + The optimization algorithm is using gradient decent (L-BFGS by default). + + + Parameters + ---------- + a : np.ndarray (ns,) + samples weights in the source domain + b : np.ndarray (nt,) or np.ndarray (nt,nbb) + samples in the target domain, compute sinkhorn with multiple targets + and fixed M if b is a matrix (return OT loss + dual variables in log) + M : np.ndarray (ns,nt) + loss matrix + reg : float + Regularization term >0 + reg_type : str + Regularization type, can be the following (default ='l2'): + - 'kl' : Kullback Leibler (~ Neg-entropy used in sinkhorn [2]_) + - 'l2' : Squared Euclidean regularization + method : str + Solver to use for scipy.optimize.minimize + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + + Returns + ------- + gamma : (ns x nt) ndarray + Optimal transportation matrix for the given parameters + log : dict + log dictionary return only if log==True in parameters + + + References + ---------- + + .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013 + + .. [17] Blondel, M., Seguy, V., & Rolet, A. (2018). Smooth and Sparse Optimal Transport. Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS). + + See Also + -------- + ot.lp.emd : Unregularized OT + ot.sinhorn : Entropic regularized OT + ot.optim.cg : General regularized OT + + """ + if reg_type.lower() in ['l2', 'squaredl2']: + regul = SquaredL2(gamma=reg) + elif reg_type.lower() in ['entropic', 'negentropy', 'kl']: + regul = NegEntropy(gamma=reg) + else: + raise NotImplementedError('Unknown regularization') + + # solve dual + alpha, res = solve_semi_dual(a, b, M, regul, max_iter=numItermax, + tol=stopThr, verbose=verbose) + + # reconstruct transport matrix + G = get_plan_from_semi_dual(alpha, b, M, regul) + + if log: + log = {'alpha': alpha, 'res': res} + return G, log + else: + return G diff --git a/ot/utils.py b/ot/utils.py index 7dac283..bb21b38 100644 --- a/ot/utils.py +++ b/ot/utils.py @@ -77,6 +77,34 @@ def clean_zeros(a, b, M): return a2, b2, M2 +def euclidean_distances(X, Y, squared=False): + """ + Considering the rows of X (and Y=X) as vectors, compute the + distance matrix between each pair of vectors. + Parameters + ---------- + X : {array-like}, shape (n_samples_1, n_features) + Y : {array-like}, shape (n_samples_2, n_features) + squared : boolean, optional + Return squared Euclidean distances. + Returns + ------- + distances : {array}, shape (n_samples_1, n_samples_2) + """ + XX = np.einsum('ij,ij->i', X, X)[:, np.newaxis] + YY = np.einsum('ij,ij->i', Y, Y)[np.newaxis, :] + distances = np.dot(X, Y.T) + distances *= -2 + distances += XX + distances += YY + np.maximum(distances, 0, out=distances) + if X is Y: + # Ensure that distances between vectors and themselves are set to 0.0. + # This may not be the case due to floating point rounding errors. + distances.flat[::distances.shape[0] + 1] = 0.0 + return distances if squared else np.sqrt(distances, out=distances) + + def dist(x1, x2=None, metric='sqeuclidean'): """Compute distance between samples in x1 and x2 using function scipy.spatial.distance.cdist @@ -104,7 +132,8 @@ def dist(x1, x2=None, metric='sqeuclidean'): """ if x2 is None: x2 = x1 - + if metric == "sqeuclidean": + return euclidean_distances(x1, x2, squared=True) return cdist(x1, x2, metric=metric) |