From d258c7d6936410cd78189445a0260d983f7684d6 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Sep 2018 13:57:42 +0200 Subject: convert ot.gpu to cupy --- ot/gpu/__init__.py | 6 +- ot/gpu/bregman.py | 143 +++++++++++++++++++++-------------- ot/gpu/da.py | 214 ++++++++++++----------------------------------------- ot/gpu/utils.py | 100 +++++++++++++++++++++++++ 4 files changed, 239 insertions(+), 224 deletions(-) create mode 100644 ot/gpu/utils.py (limited to 'ot') diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py index a2fdd3d..de4825d 100644 --- a/ot/gpu/__init__.py +++ b/ot/gpu/__init__.py @@ -4,9 +4,13 @@ from . import bregman from . import da from .bregman import sinkhorn +from . import utils +from .utils import dist, to_gpu, to_np + + # Author: Remi Flamary # Leo Gautheron # # License: MIT License -__all__ = ["bregman", "da", "sinkhorn"] +__all__ = ["utils", "dist", "sinkhorn"] diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py index 47939c4..912104c 100644 --- a/ot/gpu/bregman.py +++ b/ot/gpu/bregman.py @@ -8,14 +8,16 @@ Bregman projections for regularized OT with GPU # # License: MIT License -import numpy as np -import cudamat +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations +from . import utils -def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, - log=False, returnAsGPU=False): - r""" - Solve the entropic regularization optimal transport problem on GPU + +def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, + verbose=False, log=False, to_numpy=True, **kwargs): + """ + Solve the entropic regularization optimal transport problem and return the OT matrix The function solves the following optimization problem: @@ -40,9 +42,10 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, ---------- a : np.ndarray (ns,) samples weights in the source domain - b : np.ndarray (nt,) - samples in the target domain - M_GPU : cudamat.CUDAMatrix (ns,nt) + b : np.ndarray (nt,) or np.ndarray (nt,nbb) + samples in the target domain, compute sinkhorn with multiple targets + and fixed M if b is a matrix (return OT loss + dual variables in log) + M : np.ndarray (ns,nt) loss matrix reg : float Regularization term >0 @@ -54,8 +57,7 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, Print information along iterations log : bool, optional record log if True - returnAsGPU : bool, optional - return the OT matrix as a cudamat.CUDAMatrix + Returns ------- @@ -88,60 +90,78 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, ot.optim.cg : General regularized OT """ + + a = cp.asarray(a, dtype=np.float64) + b = cp.asarray(b, dtype=np.float64) + M = cp.asarray(M, dtype=np.float64) + + if len(a) == 0: + a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0] + if len(b) == 0: + b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] + # init data Nini = len(a) Nfin = len(b) + if len(b.shape) > 1: + nbb = b.shape[1] + else: + nbb = 0 + if log: log = {'err': []} # we assume that no distances are null except those of the diagonal of # distances - u = (np.ones(Nini) / Nini).reshape((Nini, 1)) - u_GPU = cudamat.CUDAMatrix(u) - a_GPU = cudamat.CUDAMatrix(a.reshape((Nini, 1))) - ones_GPU = cudamat.empty(u_GPU.shape).assign(1) - v = (np.ones(Nfin) / Nfin).reshape((Nfin, 1)) - v_GPU = cudamat.CUDAMatrix(v) - b_GPU = cudamat.CUDAMatrix(b.reshape((Nfin, 1))) - - M_GPU.divide(-reg) + if nbb: + u = np.ones((Nini, nbb)) / Nini + v = np.ones((Nfin, nbb)) / Nfin + else: + u = np.ones(Nini) / Nini + v = np.ones(Nfin) / Nfin - K_GPU = cudamat.exp(M_GPU) + # print(reg) - ones_GPU.divide(a_GPU, target=a_GPU) - Kp_GPU = cudamat.empty(K_GPU.shape) - K_GPU.mult_by_col(a_GPU, target=Kp_GPU) + # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute + K = np.empty(M.shape, dtype=M.dtype) + np.divide(M, -reg, out=K) + np.exp(K, out=K) - tmp_GPU = cudamat.empty(K_GPU.shape) + # print(np.min(K)) + tmp2 = np.empty(b.shape, dtype=M.dtype) + Kp = (1 / a).reshape(-1, 1) * K cpt = 0 err = 1 while (err > stopThr and cpt < numItermax): - uprev_GPU = u_GPU.copy() - vprev_GPU = v_GPU.copy() + uprev = u + vprev = v - KtransposeU_GPU = K_GPU.transpose().dot(u_GPU) - b_GPU.divide(KtransposeU_GPU, target=v_GPU) - ones_GPU.divide(Kp_GPU.dot(v_GPU), target=u_GPU) + KtransposeU = np.dot(K.T, u) + v = np.divide(b, KtransposeU) + u = 1. / np.dot(Kp, v) - if (np.any(KtransposeU_GPU.asarray() == 0) or - not u_GPU.allfinite() or not v_GPU.allfinite()): + if (np.any(KtransposeU == 0) or + np.any(np.isnan(u)) or np.any(np.isnan(v)) or + np.any(np.isinf(u)) or np.any(np.isinf(v))): # we have reached the machine precision # come back to previous solution and quit loop print('Warning: numerical errors at iteration', cpt) - u_GPU = uprev_GPU.copy() - v_GPU = vprev_GPU.copy() + u = uprev + v = vprev break if cpt % 10 == 0: # we can speed up the process by checking for the error only all # the 10th iterations - K_GPU.mult_by_col(u_GPU, target=tmp_GPU) - tmp_GPU.mult_by_row(v_GPU.transpose(), target=tmp_GPU) - - bcopy_GPU = b_GPU.copy().transpose() - bcopy_GPU.add_sums(tmp_GPU, axis=0, beta=-1) - err = bcopy_GPU.euclid_norm()**2 + if nbb: + err = np.sum((u - uprev)**2) / np.sum((u)**2) + \ + np.sum((v - vprev)**2) / np.sum((v)**2) + else: + # compute right marginal tmp2= (diag(u)Kdiag(v))^T1 + tmp2=np.sum(u[:,None]*K*v[None,:],0) + #tmp2=np.einsum('i,ij,j->j', u, K, v) + err = np.linalg.norm(tmp2 - b)**2 # violation of marginal if log: log['err'].append(err) @@ -150,20 +170,31 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, print( '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) print('{:5d}|{:8e}|'.format(cpt, err)) - cpt += 1 - if log: - log['u'] = u_GPU.asarray() - log['v'] = v_GPU.asarray() - - K_GPU.mult_by_col(u_GPU, target=K_GPU) - K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU) - - if returnAsGPU: - res = K_GPU - else: - res = K_GPU.asarray() - + cpt = cpt + 1 if log: - return res, log - else: - return res + log['u'] = u + log['v'] = v + + if nbb: # return only loss + #res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) (explodes cupy memory) + res=np.empty(nbb) + for i in range(nbb): + res[i]=np.sum(u[:,None,i]*(K*M)*v[None,:,i]) + if to_numpy: + res=utils.to_np(res) + if log: + return res, log + else: + return res + + else: # return OT matrix + res=u.reshape((-1, 1)) * K * v.reshape((1, -1)) + if to_numpy: + res=utils.to_np(res) + if log: + return res, log + else: + return res + +# define sinkhorn as sinkhorn_knopp +sinkhorn=sinkhorn_knopp \ No newline at end of file diff --git a/ot/gpu/da.py b/ot/gpu/da.py index 71a485a..8bcc2aa 100644 --- a/ot/gpu/da.py +++ b/ot/gpu/da.py @@ -10,81 +10,24 @@ Domain adaptation with optimal transport with GPU implementation # # License: MIT License - -import numpy as np -from ..utils import unif -from ..da import OTDA +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations +import numpy as npp +from . import utils from .bregman import sinkhorn -import cudamat - - -def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False): - """ - Compute the pairwise euclidean distance between matrices a and b. - - - Parameters - ---------- - a : np.ndarray (n, f) - first matrice - b : np.ndarray (m, f) - second matrice - returnAsGPU : boolean, optional (default False) - if True, returns cudamat matrix still on GPU, else return np.ndarray - squared : boolean, optional (default False) - if True, return squared euclidean distance matrice - - Returns - ------- - c : (n x m) np.ndarray or cudamat.CUDAMatrix - pairwise euclidean distance distance matrix +def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, + numInnerItermax=200, stopInnerThr=1e-9, verbose=False, + log=False,to_numpy=True): """ - # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m). - # First compute in c_GPU the squared euclidean distance. And return its - # square root. At each cell [i,j] of c, we want to have - # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that - # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c: - # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2). - - a_GPU = cudamat.CUDAMatrix(a) - b_GPU = cudamat.CUDAMatrix(b) - - # Multiply a by b transpose to obtain in each cell [i,j] of c the - # value sum{k in range(f)} ( a[i,k]b[j,k] ) - c_GPU = cudamat.dot(a_GPU, b_GPU.transpose()) - # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] ) - c_GPU.mult(-2) - - # Compute the vectors of the sum of squared elements. - a_GPU = cudamat.pow(a_GPU, 2).sum(axis=1) - b_GPU = cudamat.pow(b_GPU, 2).sum(axis=1) - - # Add the vectors in each columns (respectivly rows) of c. - # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] ) - c_GPU.add_col_vec(a_GPU) - # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2) - c_GPU.add_row_vec(b_GPU.transpose()) - - if not squared: - c_GPU = cudamat.sqrt(c_GPU) - - if returnAsGPU: - return c_GPU - else: - return c_GPU.asarray() - - -def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, - numInnerItermax=200, stopInnerThr=1e-9, - verbose=False, log=False): - """ - Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization + Solve the entropic regularization optimal transport problem with nonconvex + group lasso regularization The function solves the following optimization problem: .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ \eta \Omega_g(\gamma) + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma) + + \eta \Omega_g(\gamma) s.t. \gamma 1 = a @@ -94,11 +37,16 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, where : - M is the (ns,nt) metric cost matrix - - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - :math:`\Omega_g` is the group lasso regulaization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` where :math:`\mathcal{I}_c` are the index of samples from class c in the source domain. + - :math:`\Omega_e` is the entropic regularization term + :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega_g` is the group lasso regulaization term + :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` + where :math:`\mathcal{I}_c` are the index of samples from class c + in the source domain. - a and b are source and target weights (sum to 1) - The algorithm used for solving the problem is the generalised conditional gradient as proposed in [5]_ [7]_ + The algorithm used for solving the problem is the generalised conditional + gradient as proposed in [5]_ [7]_ Parameters @@ -109,7 +57,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, labels of samples in the source domain b : np.ndarray (nt,) samples weights in the target domain - M_GPU : cudamat.CUDAMatrix (ns,nt) + M : np.ndarray (ns,nt) loss matrix reg : float Regularization term for entropic regularization >0 @@ -138,8 +86,13 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, References ---------- - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 - .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567. + .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE + Transactions on Pattern Analysis and Machine Intelligence , + vol.PP, no.99, pp.1-1 + .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence + and applications. arXiv preprint arXiv:1510.06567. See Also -------- @@ -148,108 +101,35 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, ot.optim.cg : General regularized OT """ + + a, labels_a, b, M = utils.to_gpu(a, labels_a, b, M) + + p = 0.5 epsilon = 1e-3 - Nfin = len(b) indices_labels = [] - classes = np.unique(labels_a) + labels_a2=cp.asnumpy(labels_a) + classes = npp.unique(labels_a2) for c in classes: - idxc, = np.where(labels_a == c) - indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1))) + idxc, = utils.to_gpu(npp.where(labels_a2 == c)) + indices_labels.append(idxc) - Mreg_GPU = cudamat.empty(M_GPU.shape) - W_GPU = cudamat.empty(M_GPU.shape).assign(0) + W = np.zeros(M.shape) for cpt in range(numItermax): - Mreg_GPU.assign(M_GPU) - Mreg_GPU.add_mult(W_GPU, eta) - transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax, - stopThr=stopInnerThr, returnAsGPU=True) + Mreg = M + eta * W + transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax, + stopThr=stopInnerThr,to_numpy=False) # the transport has been computed. Check if classes are really # separated - W_GPU.assign(1) - W_GPU = W_GPU.transpose() + W = np.ones(M.shape) for (i, c) in enumerate(classes): - (_, nbRow) = indices_labels[i].shape - tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0) - transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU) - majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon) - cudamat.pow(majs_GPU, (p - 1)) - majs_GPU.mult(p) - - tmpC_GPU.assign(0) - tmpC_GPU.add_col_vec(majs_GPU) - W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU) - - W_GPU = W_GPU.transpose() - - return transp_GPU.asarray() + majs = np.sum(transp[indices_labels[i]], axis=0) + majs = p * ((majs + epsilon)**(p - 1)) + W[indices_labels[i]] = majs - -class OTDA_GPU(OTDA): - - def normalizeM(self, norm): - if norm == "median": - self.M_GPU.divide(float(np.median(self.M_GPU.asarray()))) - elif norm == "max": - self.M_GPU.divide(float(np.max(self.M_GPU.asarray()))) - elif norm == "log": - self.M_GPU.add(1) - cudamat.log(self.M_GPU) - elif norm == "loglog": - self.M_GPU.add(1) - cudamat.log(self.M_GPU) - self.M_GPU.add(1) - cudamat.log(self.M_GPU) - - -class OTDA_sinkhorn(OTDA_GPU): - - def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs): - cudamat.init() - xs = np.asarray(xs, dtype=np.float64) - xt = np.asarray(xt, dtype=np.float64) - - self.xs = xs - self.xt = xt - - if wt is None: - wt = unif(xt.shape[0]) - if ws is None: - ws = unif(xs.shape[0]) - - self.ws = ws - self.wt = wt - - self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True, - squared=True) - self.normalizeM(norm) - self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs) - self.computed = True - - -class OTDA_lpl1(OTDA_GPU): - - def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None, - **kwargs): - cudamat.init() - xs = np.asarray(xs, dtype=np.float64) - xt = np.asarray(xt, dtype=np.float64) - - self.xs = xs - self.xt = xt - - if wt is None: - wt = unif(xt.shape[0]) - if ws is None: - ws = unif(xs.shape[0]) - - self.ws = ws - self.wt = wt - - self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True, - squared=True) - self.normalizeM(norm) - self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs) - self.computed = True + if to_numpy: + return utils.to_np(transp) + else: + return transp diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py new file mode 100644 index 0000000..6d0c853 --- /dev/null +++ b/ot/gpu/utils.py @@ -0,0 +1,100 @@ +# -*- coding: utf-8 -*- +""" +Utility functions for GPU +""" + +# Author: Remi Flamary +# Nicolas Courty +# Leo Gautheron +# +# License: MIT License + +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations + + + +def euclidean_distances(a, b, squared=False, to_numpy=True): + """ + Compute the pairwise euclidean distance between matrices a and b. + Parameters + ---------- + a : np.ndarray (n, f) + first matrix + b : np.ndarray (m, f) + second matrix + gpu : boolean, optional (default False) + if True and the module cupy is available, the computation is done + on the GPU and the type of the matrix returned is cupy.ndarray. + Otherwise, compute on the CPU and returns np.ndarray. + squared : boolean, optional (default False) + if True, return squared euclidean distance matrix + Returns + ------- + c : (n x m) np.ndarray or cupy.ndarray + pairwise euclidean distance distance matrix + """ + + a, b = to_gpu(a, b) + + a2=np.sum(np.square(a),1) + b2=np.sum(np.square(b),1) + + c=-2*np.dot(a,b.T) + c+=a2[:,None] + c+=b2[None,:] + + if not squared: + np.sqrt(c, out=c) + if to_numpy: + return to_np(c) + else: + return c + +def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True): + """Compute distance between samples in x1 and x2 on gpu + + Parameters + ---------- + + x1 : np.array (n1,d) + matrix with n1 samples of size d + x2 : np.array (n2,d), optional + matrix with n2 samples of size d (if None then x2=x1) + metric : str + Metric from 'sqeuclidean', 'euclidean', + + + Returns + ------- + + M : np.array (n1,n2) + distance matrix computed with given metric + + """ + if x2 is None: + x2 = x1 + if metric == "sqeuclidean": + return euclidean_distances(x1, x2, squared=True, to_numpy=to_numpy) + elif metric == "euclidean": + return euclidean_distances(x1, x2, squared=False, to_numpy=to_numpy) + else: + raise NotImplementedError + + + +def to_gpu(*args): + """ Upload numpy arrays to GPU and return them""" + if len(args) > 1: + return (cp.asarray(x) for x in args) + else: + return cp.asarray(args[0]) + + + +def to_np(*args): + """ convert GPU arras to numpy and return them""" + if len(args) > 1: + return (cp.asnumpy(x) for x in args) + else: + return cp.asnumpy(args[0]) \ No newline at end of file -- cgit v1.2.3 From f45f7a68b221ec5b619b8fd8de797815a1eecf43 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Sep 2018 14:30:44 +0200 Subject: pep8 --- ot/gpu/bregman.py | 20 ++++++++++---------- ot/gpu/da.py | 16 ++++++++-------- ot/gpu/utils.py | 34 ++++++++++++++++------------------ 3 files changed, 34 insertions(+), 36 deletions(-) (limited to 'ot') diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py index 912104c..6714098 100644 --- a/ot/gpu/bregman.py +++ b/ot/gpu/bregman.py @@ -8,12 +8,11 @@ Bregman projections for regularized OT with GPU # # License: MIT License -import cupy as np # np used for matrix computation -import cupy as cp # cp used for cupy specific operations +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations from . import utils - def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, log=False, to_numpy=True, **kwargs): """ @@ -159,7 +158,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, np.sum((v - vprev)**2) / np.sum((v)**2) else: # compute right marginal tmp2= (diag(u)Kdiag(v))^T1 - tmp2=np.sum(u[:,None]*K*v[None,:],0) + tmp2 = np.sum(u[:, None] * K * v[None, :], 0) #tmp2=np.einsum('i,ij,j->j', u, K, v) err = np.linalg.norm(tmp2 - b)**2 # violation of marginal if log: @@ -177,24 +176,25 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, if nbb: # return only loss #res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) (explodes cupy memory) - res=np.empty(nbb) + res = np.empty(nbb) for i in range(nbb): - res[i]=np.sum(u[:,None,i]*(K*M)*v[None,:,i]) + res[i] = np.sum(u[:, None, i] * (K * M) * v[None, :, i]) if to_numpy: - res=utils.to_np(res) + res = utils.to_np(res) if log: return res, log else: return res else: # return OT matrix - res=u.reshape((-1, 1)) * K * v.reshape((1, -1)) + res = u.reshape((-1, 1)) * K * v.reshape((1, -1)) if to_numpy: - res=utils.to_np(res) + res = utils.to_np(res) if log: return res, log else: return res + # define sinkhorn as sinkhorn_knopp -sinkhorn=sinkhorn_knopp \ No newline at end of file +sinkhorn = sinkhorn_knopp diff --git a/ot/gpu/da.py b/ot/gpu/da.py index 8bcc2aa..8c63870 100644 --- a/ot/gpu/da.py +++ b/ot/gpu/da.py @@ -10,15 +10,16 @@ Domain adaptation with optimal transport with GPU implementation # # License: MIT License -import cupy as np # np used for matrix computation -import cupy as cp # cp used for cupy specific operations +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations import numpy as npp from . import utils from .bregman import sinkhorn + def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, numInnerItermax=200, stopInnerThr=1e-9, verbose=False, - log=False,to_numpy=True): + log=False, to_numpy=True): """ Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization @@ -101,15 +102,14 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, ot.optim.cg : General regularized OT """ - + a, labels_a, b, M = utils.to_gpu(a, labels_a, b, M) - - + p = 0.5 epsilon = 1e-3 indices_labels = [] - labels_a2=cp.asnumpy(labels_a) + labels_a2 = cp.asnumpy(labels_a) classes = npp.unique(labels_a2) for c in classes: idxc, = utils.to_gpu(npp.where(labels_a2 == c)) @@ -120,7 +120,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, for cpt in range(numItermax): Mreg = M + eta * W transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax, - stopThr=stopInnerThr,to_numpy=False) + stopThr=stopInnerThr, to_numpy=False) # the transport has been computed. Check if classes are really # separated W = np.ones(M.shape) diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py index 6d0c853..d349a6d 100644 --- a/ot/gpu/utils.py +++ b/ot/gpu/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -Utility functions for GPU +Utility functions for GPU """ # Author: Remi Flamary @@ -9,9 +9,8 @@ Utility functions for GPU # # License: MIT License -import cupy as np # np used for matrix computation -import cupy as cp # cp used for cupy specific operations - +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations def euclidean_distances(a, b, squared=False, to_numpy=True): @@ -34,16 +33,16 @@ def euclidean_distances(a, b, squared=False, to_numpy=True): c : (n x m) np.ndarray or cupy.ndarray pairwise euclidean distance distance matrix """ - + a, b = to_gpu(a, b) - - a2=np.sum(np.square(a),1) - b2=np.sum(np.square(b),1) - - c=-2*np.dot(a,b.T) - c+=a2[:,None] - c+=b2[None,:] - + + a2 = np.sum(np.square(a), 1) + b2 = np.sum(np.square(b), 1) + + c = -2 * np.dot(a, b.T) + c += a2[:, None] + c += b2[None, :] + if not squared: np.sqrt(c, out=c) if to_numpy: @@ -51,6 +50,7 @@ def euclidean_distances(a, b, squared=False, to_numpy=True): else: return c + def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True): """Compute distance between samples in x1 and x2 on gpu @@ -61,8 +61,8 @@ def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True): matrix with n1 samples of size d x2 : np.array (n2,d), optional matrix with n2 samples of size d (if None then x2=x1) - metric : str - Metric from 'sqeuclidean', 'euclidean', + metric : str + Metric from 'sqeuclidean', 'euclidean', Returns @@ -80,7 +80,6 @@ def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True): return euclidean_distances(x1, x2, squared=False, to_numpy=to_numpy) else: raise NotImplementedError - def to_gpu(*args): @@ -91,10 +90,9 @@ def to_gpu(*args): return cp.asarray(args[0]) - def to_np(*args): """ convert GPU arras to numpy and return them""" if len(args) > 1: return (cp.asnumpy(x) for x in args) else: - return cp.asnumpy(args[0]) \ No newline at end of file + return cp.asnumpy(args[0]) -- cgit v1.2.3 From 75e78022d2df350ea220cee1b5e759ef9fc35a5b Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 24 Sep 2018 15:05:09 +0200 Subject: update tests --- ot/gpu/bregman.py | 10 +++---- test/test_gpu.py | 89 ++++++++++++++++++++++++++++++------------------------- 2 files changed, 53 insertions(+), 46 deletions(-) (limited to 'ot') diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py index 6714098..600ead4 100644 --- a/ot/gpu/bregman.py +++ b/ot/gpu/bregman.py @@ -90,14 +90,14 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, """ - a = cp.asarray(a, dtype=np.float64) - b = cp.asarray(b, dtype=np.float64) - M = cp.asarray(M, dtype=np.float64) + a = cp.asarray(a) + b = cp.asarray(b) + M = cp.asarray(M) if len(a) == 0: - a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0] + a = np.ones((M.shape[0],)) / M.shape[0] if len(b) == 0: - b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] + b = np.ones((M.shape[1],)) / M.shape[1] # init data Nini = len(a) diff --git a/test/test_gpu.py b/test/test_gpu.py index 1e97c45..51a0cff 100644 --- a/test/test_gpu.py +++ b/test/test_gpu.py @@ -17,63 +17,70 @@ except ImportError: @pytest.mark.skipif(nogpu, reason="No GPU available") -def test_gpu_sinkhorn(): +def test_gpu_dist(): rng = np.random.RandomState(0) - def describe_res(r): - print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format( - np.min(r), np.max(r), np.mean(r), np.std(r))) - for n_samples in [50, 100, 500, 1000]: print(n_samples) a = rng.rand(n_samples // 4, 100) b = rng.rand(n_samples, 100) - time1 = time.time() - transport = ot.da.OTDA_sinkhorn() - transport.fit(a, b) - G1 = transport.G - time2 = time.time() - transport = ot.gpu.da.OTDA_sinkhorn() - transport.fit(a, b) - G2 = transport.G - time3 = time.time() - print("Normal sinkhorn, time: {:6.2f} sec ".format(time2 - time1)) - describe_res(G1) - print(" GPU sinkhorn, time: {:6.2f} sec ".format(time3 - time2)) - describe_res(G2) - - np.testing.assert_allclose(G1, G2, rtol=1e-5, atol=1e-5) + + M = ot.dist(a.copy(), b.copy()) + M2 = ot.gpu.dist(a.copy(), b.copy()) + + np.testing.assert_allclose(M, M2, rtol=1e-10) + + M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False) @pytest.mark.skipif(nogpu, reason="No GPU available") -def test_gpu_sinkhorn_lpl1(): +def test_gpu_sinkhorn(): rng = np.random.RandomState(0) - def describe_res(r): - print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}" - .format(np.min(r), np.max(r), np.mean(r), np.std(r))) + for n_samples in [50, 100, 500, 1000]: + a = rng.rand(n_samples // 4, 100) + b = rng.rand(n_samples, 100) + + wa = ot.unif(n_samples // 4) + wb = ot.unif(n_samples) + + M = ot.dist(a.copy(), b.copy()) + M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False) + + reg = 1 + + G = ot.sinkhorn(wa, wb, M, reg) + G1 = ot.gpu.sinkhorn(wa, wb, M, reg) + + np.testing.assert_allclose(G1, G, rtol=1e-10) + + G2 = ot.gpu.sinkhorn(wa, wb, M2, reg, to_numpy=False) + + +@pytest.mark.skipif(nogpu, reason="No GPU available") +def test_gpu_sinkhorn_lpl1(): + + rng = np.random.RandomState(0) for n_samples in [50, 100, 500]: print(n_samples) a = rng.rand(n_samples // 4, 100) labels_a = np.random.randint(10, size=(n_samples // 4)) b = rng.rand(n_samples, 100) - time1 = time.time() - transport = ot.da.OTDA_lpl1() - transport.fit(a, labels_a, b) - G1 = transport.G - time2 = time.time() - transport = ot.gpu.da.OTDA_lpl1() - transport.fit(a, labels_a, b) - G2 = transport.G - time3 = time.time() - print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format( - time2 - time1)) - describe_res(G1) - print(" GPU sinkhorn lpl1, time: {:6.2f} sec ".format( - time3 - time2)) - describe_res(G2) - - np.testing.assert_allclose(G1, G2, rtol=1e-3, atol=1e-3) + + wa = ot.unif(n_samples // 4) + wb = ot.unif(n_samples) + + M = ot.dist(a.copy(), b.copy()) + M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False) + + reg = 1 + + G = ot.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M, reg) + G1 = ot.gpu.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M, reg) + + np.testing.assert_allclose(G1, G, rtol=1e-10) + + G2 = ot.gpu.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M2, reg, to_numpy=False) -- cgit v1.2.3 From ee8ed4fa101861eec9e578f09aee4367af593af1 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Fri, 28 Sep 2018 09:41:22 +0200 Subject: update documentation --- README.md | 10 +++------- docs/source/all.rst | 6 ++++++ docs/source/conf.py | 2 +- ot/gpu/__init__.py | 22 +++++++++++++++++++++- ot/gpu/bregman.py | 7 ++++++- ot/gpu/da.py | 8 +++++++- ot/gpu/utils.py | 15 +++++++++------ 7 files changed, 53 insertions(+), 17 deletions(-) (limited to 'ot') diff --git a/README.md b/README.md index 5f37ad6..e49c8da 100644 --- a/README.md +++ b/README.md @@ -14,7 +14,7 @@ This open source Python library provide several solvers for optimization problem It provides the following solvers: * OT Network Flow solver for the linear program/ Earth Movers Distance [1]. -* Entropic regularization OT solver with Sinkhorn Knopp Algorithm [2] and stabilized version [9][10] with optional GPU implementation (requires cudamat). +* Entropic regularization OT solver with Sinkhorn Knopp Algorithm [2] and stabilized version [9][10] with optional GPU implementation (requires cupy). * Smooth optimal transport solvers (dual and semi-dual) for KL and squared L2 regularizations [17]. * Non regularized Wasserstein barycenters [16] with LP solver (only small scale). * Bregman projections for Wasserstein barycenter [3] and unmixing [4]. @@ -83,12 +83,8 @@ Some sub-modules require additional dependences which are discussed below ``` pip install pymanopt autograd ``` -* **ot.gpu** (GPU accelerated OT) depends on cudamat that have to be installed with: -``` -git clone https://github.com/cudamat/cudamat.git -cd cudamat -python setup.py install --user # for user install (no root) -``` +* **ot.gpu** (GPU accelerated OT) depends on cupy that have to be installed following instructions on [this page](https://docs-cupy.chainer.org/en/stable/install.html). + obviously you need CUDA installed and a compatible GPU. diff --git a/docs/source/all.rst b/docs/source/all.rst index 9459023..1eaf3b1 100644 --- a/docs/source/all.rst +++ b/docs/source/all.rst @@ -48,6 +48,12 @@ ot.da .. automodule:: ot.da :members: + +ot.gpu +-------- + +.. automodule:: ot.gpu + :members: ot.dr -------- diff --git a/docs/source/conf.py b/docs/source/conf.py index 114245d..433eca6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -31,7 +31,7 @@ class Mock(MagicMock): @classmethod def __getattr__(cls, name): return MagicMock() -MOCK_MODULES = ['ot.lp.emd_wrap','autograd','pymanopt','cudamat','autograd.numpy','pymanopt.manifolds','pymanopt.solvers'] +MOCK_MODULES = ['ot.lp.emd_wrap','autograd','pymanopt','cupy','autograd.numpy','pymanopt.manifolds','pymanopt.solvers'] # 'autograd.numpy','pymanopt.manifolds','pymanopt.solvers', sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) # !!!! diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py index de4825d..9de2c40 100644 --- a/ot/gpu/__init__.py +++ b/ot/gpu/__init__.py @@ -1,8 +1,28 @@ # -*- coding: utf-8 -*- +""" + + +This module implement GPU ilmplementation for several OT solvers and utility +functions. The GPU backend in handled by `cupy +`_. + +By default, the functions in this module accept and return numpy arrays +in order to proide drop-in replacement for the other POT function but +the transfer between CPU en GPU comes with a significant overhead. + +In order to get the best erformances, we recommend to given only cupy +arrays to the functions and desactivate the conversion to numpy of the +result of the function with parameter ``to_numpy=False``. + + + + +""" from . import bregman from . import da from .bregman import sinkhorn +from .da from . import utils from .utils import dist, to_gpu, to_np @@ -13,4 +33,4 @@ from .utils import dist, to_gpu, to_np # # License: MIT License -__all__ = ["utils", "dist", "sinkhorn"] +__all__ = ["utils", "dist", "sinkhorn", 'bregman', 'da', 'to_gpu', 'to_np'] diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py index 600ead4..978b307 100644 --- a/ot/gpu/bregman.py +++ b/ot/gpu/bregman.py @@ -16,7 +16,10 @@ from . import utils def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, log=False, to_numpy=True, **kwargs): """ - Solve the entropic regularization optimal transport problem and return the OT matrix + Solve the entropic regularization optimal transport on GPU + + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. The function solves the following optimization problem: @@ -56,6 +59,8 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, Print information along iterations log : bool, optional record log if True + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. Returns diff --git a/ot/gpu/da.py b/ot/gpu/da.py index 8c63870..6aba29c 100644 --- a/ot/gpu/da.py +++ b/ot/gpu/da.py @@ -22,7 +22,11 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, log=False, to_numpy=True): """ Solve the entropic regularization optimal transport problem with nonconvex - group lasso regularization + group lasso regularization on GPU + + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. + The function solves the following optimization problem: @@ -74,6 +78,8 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, Print information along iterations log : bool, optional record log if True + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. Returns diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py index d349a6d..41e168a 100644 --- a/ot/gpu/utils.py +++ b/ot/gpu/utils.py @@ -16,19 +16,22 @@ import cupy as cp # cp used for cupy specific operations def euclidean_distances(a, b, squared=False, to_numpy=True): """ Compute the pairwise euclidean distance between matrices a and b. - Parameters + + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. + + Parameters ---------- a : np.ndarray (n, f) first matrix b : np.ndarray (m, f) second matrix - gpu : boolean, optional (default False) - if True and the module cupy is available, the computation is done - on the GPU and the type of the matrix returned is cupy.ndarray. - Otherwise, compute on the CPU and returns np.ndarray. + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. squared : boolean, optional (default False) if True, return squared euclidean distance matrix - Returns + + Returns ------- c : (n x m) np.ndarray or cupy.ndarray pairwise euclidean distance distance matrix -- cgit v1.2.3 From fa7f3ddbed0267edf634e359ce5b3a335807af3c Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Fri, 28 Sep 2018 09:45:21 +0200 Subject: correction import in ot.gpu --- ot/gpu/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'ot') diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py index 9de2c40..0187a4f 100644 --- a/ot/gpu/__init__.py +++ b/ot/gpu/__init__.py @@ -1,8 +1,7 @@ # -*- coding: utf-8 -*- """ - -This module implement GPU ilmplementation for several OT solvers and utility +This module provides GPU implementation for several OT solvers and utility functions. The GPU backend in handled by `cupy `_. @@ -22,7 +21,7 @@ result of the function with parameter ``to_numpy=False``. from . import bregman from . import da from .bregman import sinkhorn -from .da +from .da import sinkhorn_lpl1_mm from . import utils from .utils import dist, to_gpu, to_np @@ -33,4 +32,5 @@ from .utils import dist, to_gpu, to_np # # License: MIT License -__all__ = ["utils", "dist", "sinkhorn", 'bregman', 'da', 'to_gpu', 'to_np'] +__all__ = ["utils", "dist", "sinkhorn", + "sinkhorn_lpl1_mm", 'bregman', 'da', 'to_gpu', 'to_np'] -- cgit v1.2.3