diff options
Diffstat (limited to 'ot/gpu')
-rw-r--r-- | ot/gpu/__init__.py | 41 | ||||
-rw-r--r-- | ot/gpu/bregman.py | 194 | ||||
-rw-r--r-- | ot/gpu/da.py | 144 | ||||
-rw-r--r-- | ot/gpu/utils.py | 101 |
4 files changed, 480 insertions, 0 deletions
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py new file mode 100644 index 0000000..1ab95bb --- /dev/null +++ b/ot/gpu/__init__.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- +""" + +This module provides GPU implementation for several OT solvers and utility +functions. The GPU backend in handled by `cupy +<https://cupy.chainer.org/>`_. + +.. warning:: + Note that by default the module is not import in :mod:`ot`. In order to + use it you need to explicitely import :mod:`ot.gpu` . + +By default, the functions in this module accept and return numpy arrays +in order to proide drop-in replacement for the other POT function but +the transfer between CPU en GPU comes with a significant overhead. + +In order to get the best performances, we recommend to give only cupy +arrays to the functions and desactivate the conversion to numpy of the +result of the function with parameter ``to_numpy=False``. + +""" + +# Author: Remi Flamary <remi.flamary@unice.fr> +# Leo Gautheron <https://github.com/aje> +# +# License: MIT License + +from . import bregman +from . import da +from .bregman import sinkhorn +from .da import sinkhorn_lpl1_mm + +from . import utils +from .utils import dist, to_gpu, to_np + + + + + +__all__ = ["utils", "dist", "sinkhorn", + "sinkhorn_lpl1_mm", 'bregman', 'da', 'to_gpu', 'to_np'] + diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py new file mode 100644 index 0000000..2e2df83 --- /dev/null +++ b/ot/gpu/bregman.py @@ -0,0 +1,194 @@ +# -*- coding: utf-8 -*- +""" +Bregman projections for regularized OT with GPU +""" + +# Author: Remi Flamary <remi.flamary@unice.fr> +# Leo Gautheron <https://github.com/aje> +# +# License: MIT License + +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations +from . import utils + + +def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, + verbose=False, log=False, to_numpy=True, **kwargs): + """ + Solve the entropic regularization optimal transport on GPU + + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. + + The function solves the following optimization problem: + + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + + s.t. \gamma 1 = a + + \gamma^T 1= b + + \gamma\geq 0 + where : + + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + + The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [2]_ + + + Parameters + ---------- + a : np.ndarray (ns,) + samples weights in the source domain + b : np.ndarray (nt,) or np.ndarray (nt,nbb) + samples in the target domain, compute sinkhorn with multiple targets + and fixed M if b is a matrix (return OT loss + dual variables in log) + M : np.ndarray (ns,nt) + loss matrix + reg : float + Regularization term >0 + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. + + + Returns + ------- + gamma : (ns x nt) ndarray + Optimal transportation matrix for the given parameters + log : dict + log dictionary return only if log==True in parameters + + + References + ---------- + + .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013 + + + See Also + -------- + ot.lp.emd : Unregularized OT + ot.optim.cg : General regularized OT + + """ + + a = cp.asarray(a) + b = cp.asarray(b) + M = cp.asarray(M) + + if len(a) == 0: + a = np.ones((M.shape[0],)) / M.shape[0] + if len(b) == 0: + b = np.ones((M.shape[1],)) / M.shape[1] + + # init data + Nini = len(a) + Nfin = len(b) + + if len(b.shape) > 1: + nbb = b.shape[1] + else: + nbb = 0 + + if log: + log = {'err': []} + + # we assume that no distances are null except those of the diagonal of + # distances + if nbb: + u = np.ones((Nini, nbb)) / Nini + v = np.ones((Nfin, nbb)) / Nfin + else: + u = np.ones(Nini) / Nini + v = np.ones(Nfin) / Nfin + + # print(reg) + + # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute + K = np.empty(M.shape, dtype=M.dtype) + np.divide(M, -reg, out=K) + np.exp(K, out=K) + + # print(np.min(K)) + tmp2 = np.empty(b.shape, dtype=M.dtype) + + Kp = (1 / a).reshape(-1, 1) * K + cpt = 0 + err = 1 + while (err > stopThr and cpt < numItermax): + uprev = u + vprev = v + + KtransposeU = np.dot(K.T, u) + v = np.divide(b, KtransposeU) + u = 1. / np.dot(Kp, v) + + if (np.any(KtransposeU == 0) or + np.any(np.isnan(u)) or np.any(np.isnan(v)) or + np.any(np.isinf(u)) or np.any(np.isinf(v))): + # we have reached the machine precision + # come back to previous solution and quit loop + print('Warning: numerical errors at iteration', cpt) + u = uprev + v = vprev + break + if cpt % 10 == 0: + # we can speed up the process by checking for the error only all + # the 10th iterations + if nbb: + err = np.sum((u - uprev)**2) / np.sum((u)**2) + \ + np.sum((v - vprev)**2) / np.sum((v)**2) + else: + # compute right marginal tmp2= (diag(u)Kdiag(v))^T1 + tmp2 = np.sum(u[:, None] * K * v[None, :], 0) + #tmp2=np.einsum('i,ij,j->j', u, K, v) + err = np.linalg.norm(tmp2 - b)**2 # violation of marginal + if log: + log['err'].append(err) + + if verbose: + if cpt % 200 == 0: + print( + '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) + print('{:5d}|{:8e}|'.format(cpt, err)) + cpt = cpt + 1 + if log: + log['u'] = u + log['v'] = v + + if nbb: # return only loss + #res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) (explodes cupy memory) + res = np.empty(nbb) + for i in range(nbb): + res[i] = np.sum(u[:, None, i] * (K * M) * v[None, :, i]) + if to_numpy: + res = utils.to_np(res) + if log: + return res, log + else: + return res + + else: # return OT matrix + res = u.reshape((-1, 1)) * K * v.reshape((1, -1)) + if to_numpy: + res = utils.to_np(res) + if log: + return res, log + else: + return res + + +# define sinkhorn as sinkhorn_knopp +sinkhorn = sinkhorn_knopp diff --git a/ot/gpu/da.py b/ot/gpu/da.py new file mode 100644 index 0000000..4a98038 --- /dev/null +++ b/ot/gpu/da.py @@ -0,0 +1,144 @@ +# -*- coding: utf-8 -*- +""" +Domain adaptation with optimal transport with GPU implementation +""" + +# Author: Remi Flamary <remi.flamary@unice.fr> +# Nicolas Courty <ncourty@irisa.fr> +# Michael Perrot <michael.perrot@univ-st-etienne.fr> +# Leo Gautheron <https://github.com/aje> +# +# License: MIT License + + +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations +import numpy as npp +from . import utils + +from .bregman import sinkhorn + + +def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, + numInnerItermax=200, stopInnerThr=1e-9, verbose=False, + log=False, to_numpy=True): + """ + Solve the entropic regularization optimal transport problem with nonconvex + group lasso regularization on GPU + + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. + + + The function solves the following optimization problem: + + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma) + + \eta \Omega_g(\gamma) + + s.t. \gamma 1 = a + + \gamma^T 1= b + + \gamma\geq 0 + where : + + - M is the (ns,nt) metric cost matrix + - :math:`\Omega_e` is the entropic regularization term + :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega_g` is the group lasso regulaization term + :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` + where :math:`\mathcal{I}_c` are the index of samples from class c + in the source domain. + - a and b are source and target weights (sum to 1) + + The algorithm used for solving the problem is the generalised conditional + gradient as proposed in [5]_ [7]_ + + + Parameters + ---------- + a : np.ndarray (ns,) + samples weights in the source domain + labels_a : np.ndarray (ns,) + labels of samples in the source domain + b : np.ndarray (nt,) + samples weights in the target domain + M : np.ndarray (ns,nt) + loss matrix + reg : float + Regularization term for entropic regularization >0 + eta : float, optional + Regularization term for group lasso regularization >0 + numItermax : int, optional + Max number of iterations + numInnerItermax : int, optional + Max number of iterations (inner sinkhorn solver) + stopInnerThr : float, optional + Stop threshold on error (inner sinkhorn solver) (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. + + + Returns + ------- + gamma : (ns x nt) ndarray + Optimal transportation matrix for the given parameters + log : dict + log dictionary return only if log==True in parameters + + + References + ---------- + + .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE + Transactions on Pattern Analysis and Machine Intelligence , + vol.PP, no.99, pp.1-1 + .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence + and applications. arXiv preprint arXiv:1510.06567. + + See Also + -------- + ot.lp.emd : Unregularized OT + ot.bregman.sinkhorn : Entropic regularized OT + ot.optim.cg : General regularized OT + + """ + + a, labels_a, b, M = utils.to_gpu(a, labels_a, b, M) + + p = 0.5 + epsilon = 1e-3 + + indices_labels = [] + labels_a2 = cp.asnumpy(labels_a) + classes = npp.unique(labels_a2) + for c in classes: + idxc, = utils.to_gpu(npp.where(labels_a2 == c)) + indices_labels.append(idxc) + + W = np.zeros(M.shape) + + for cpt in range(numItermax): + Mreg = M + eta * W + transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax, + stopThr=stopInnerThr, to_numpy=False) + # the transport has been computed. Check if classes are really + # separated + W = np.ones(M.shape) + for (i, c) in enumerate(classes): + + majs = np.sum(transp[indices_labels[i]], axis=0) + majs = p * ((majs + epsilon)**(p - 1)) + W[indices_labels[i]] = majs + + if to_numpy: + return utils.to_np(transp) + else: + return transp diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py new file mode 100644 index 0000000..41e168a --- /dev/null +++ b/ot/gpu/utils.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +""" +Utility functions for GPU +""" + +# Author: Remi Flamary <remi.flamary@unice.fr> +# Nicolas Courty <ncourty@irisa.fr> +# Leo Gautheron <https://github.com/aje> +# +# License: MIT License + +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations + + +def euclidean_distances(a, b, squared=False, to_numpy=True): + """ + Compute the pairwise euclidean distance between matrices a and b. + + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. + + Parameters + ---------- + a : np.ndarray (n, f) + first matrix + b : np.ndarray (m, f) + second matrix + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. + squared : boolean, optional (default False) + if True, return squared euclidean distance matrix + + Returns + ------- + c : (n x m) np.ndarray or cupy.ndarray + pairwise euclidean distance distance matrix + """ + + a, b = to_gpu(a, b) + + a2 = np.sum(np.square(a), 1) + b2 = np.sum(np.square(b), 1) + + c = -2 * np.dot(a, b.T) + c += a2[:, None] + c += b2[None, :] + + if not squared: + np.sqrt(c, out=c) + if to_numpy: + return to_np(c) + else: + return c + + +def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True): + """Compute distance between samples in x1 and x2 on gpu + + Parameters + ---------- + + x1 : np.array (n1,d) + matrix with n1 samples of size d + x2 : np.array (n2,d), optional + matrix with n2 samples of size d (if None then x2=x1) + metric : str + Metric from 'sqeuclidean', 'euclidean', + + + Returns + ------- + + M : np.array (n1,n2) + distance matrix computed with given metric + + """ + if x2 is None: + x2 = x1 + if metric == "sqeuclidean": + return euclidean_distances(x1, x2, squared=True, to_numpy=to_numpy) + elif metric == "euclidean": + return euclidean_distances(x1, x2, squared=False, to_numpy=to_numpy) + else: + raise NotImplementedError + + +def to_gpu(*args): + """ Upload numpy arrays to GPU and return them""" + if len(args) > 1: + return (cp.asarray(x) for x in args) + else: + return cp.asarray(args[0]) + + +def to_np(*args): + """ convert GPU arras to numpy and return them""" + if len(args) > 1: + return (cp.asnumpy(x) for x in args) + else: + return cp.asnumpy(args[0]) |