diff options
-rw-r--r-- | .github/ISSUE_TEMPLATE/bug_report.md | 36 | ||||
-rw-r--r-- | README.md | 10 | ||||
-rw-r--r-- | docs/source/all.rst | 6 | ||||
-rw-r--r-- | docs/source/conf.py | 2 | ||||
-rw-r--r-- | ot/gpu/__init__.py | 34 | ||||
-rw-r--r-- | ot/gpu/bregman.py | 148 | ||||
-rw-r--r-- | ot/gpu/da.py | 136 | ||||
-rw-r--r-- | ot/gpu/utils.py | 101 | ||||
-rw-r--r-- | test/test_bregman.py | 2 | ||||
-rw-r--r-- | test/test_gpu.py | 101 |
10 files changed, 381 insertions, 195 deletions
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md new file mode 100644 index 0000000..7f8acda --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -0,0 +1,36 @@ +--- +name: Bug report +about: Create a report to help us improve POT + +--- + +**Describe the bug** +A clear and concise description of what the bug is. + +**To Reproduce** +Steps to reproduce the behavior: +1 ... +2. + +**Expected behavior** +A clear and concise description of what you expected to happen. + +**Screenshots** +If applicable, add screenshots to help explain your problem. + +**Desktop (please complete the following information):** + - OS: [e.g. MacOSX, Windows, Ubuntu] + - Python version [2.7,3.6] +- How was POT installed [source, pip, conda] + +Output of the following code snippet: +```python +import platform; print(platform.platform()) +import sys; print("Python", sys.version) +import numpy; print("NumPy", numpy.__version__) +import scipy; print("SciPy", scipy.__version__) +import ot; print("POT", ot.__version__) +``` + +**Additional context** +Add any other context about the problem here. @@ -15,7 +15,7 @@ This open source Python library provide several solvers for optimization problem It provides the following solvers: * OT Network Flow solver for the linear program/ Earth Movers Distance [1]. -* Entropic regularization OT solver with Sinkhorn Knopp Algorithm [2] and stabilized version [9][10] and greedy SInkhorn [22] with optional GPU implementation (requires cudamat). +* Entropic regularization OT solver with Sinkhorn Knopp Algorithm [2] and stabilized version [9][10] and greedy SInkhorn [22] with optional GPU implementation (requires cupy). * Smooth optimal transport solvers (dual and semi-dual) for KL and squared L2 regularizations [17]. * Non regularized Wasserstein barycenters [16] with LP solver (only small scale). * Bregman projections for Wasserstein barycenter [3], convolutional barycenter [21] and unmixing [4]. @@ -84,12 +84,8 @@ Some sub-modules require additional dependences which are discussed below ``` pip install pymanopt autograd ``` -* **ot.gpu** (GPU accelerated OT) depends on cudamat that have to be installed with: -``` -git clone https://github.com/cudamat/cudamat.git -cd cudamat -python setup.py install --user # for user install (no root) -``` +* **ot.gpu** (GPU accelerated OT) depends on cupy that have to be installed following instructions on [this page](https://docs-cupy.chainer.org/en/stable/install.html). + obviously you need CUDA installed and a compatible GPU. diff --git a/docs/source/all.rst b/docs/source/all.rst index 94da2ed..32930fd 100644 --- a/docs/source/all.rst +++ b/docs/source/all.rst @@ -43,6 +43,12 @@ ot.da .. automodule:: ot.da :members: + +ot.gpu +-------- + +.. automodule:: ot.gpu + :members: ot.dr -------- diff --git a/docs/source/conf.py b/docs/source/conf.py index 114245d..433eca6 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -31,7 +31,7 @@ class Mock(MagicMock): @classmethod def __getattr__(cls, name): return MagicMock() -MOCK_MODULES = ['ot.lp.emd_wrap','autograd','pymanopt','cudamat','autograd.numpy','pymanopt.manifolds','pymanopt.solvers'] +MOCK_MODULES = ['ot.lp.emd_wrap','autograd','pymanopt','cupy','autograd.numpy','pymanopt.manifolds','pymanopt.solvers'] # 'autograd.numpy','pymanopt.manifolds','pymanopt.solvers', sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES) # !!!! diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py index ed6dcc4..213578c 100644 --- a/ot/gpu/__init__.py +++ b/ot/gpu/__init__.py @@ -1,17 +1,37 @@ # -*- coding: utf-8 -*- +""" -from . import bregman -from . import da -from .bregman import sinkhorn +This module provides GPU implementation for several OT solvers and utility +functions. The GPU backend in handled by `cupy +<https://cupy.chainer.org/>`_. + +By default, the functions in this module accept and return numpy arrays +in order to proide drop-in replacement for the other POT function but +the transfer between CPU en GPU comes with a significant overhead. + +In order to get the best erformances, we recommend to given only cupy +arrays to the functions and desactivate the conversion to numpy of the +result of the function with parameter ``to_numpy=False``. + +""" # Author: Remi Flamary <remi.flamary@unice.fr> # Leo Gautheron <https://github.com/aje> # # License: MIT License -import warnings +from . import bregman +from . import da +from .bregman import sinkhorn +from .da import sinkhorn_lpl1_mm + +from . import utils +from .utils import dist, to_gpu, to_np + + + + -warnings.warn("the ot.gpu module is deprecated because cudamat in no longer maintained", DeprecationWarning, - stacklevel=2) +__all__ = ["utils", "dist", "sinkhorn", + "sinkhorn_lpl1_mm", 'bregman', 'da', 'to_gpu', 'to_np'] -__all__ = ["bregman", "da", "sinkhorn"] diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py index 47939c4..978b307 100644 --- a/ot/gpu/bregman.py +++ b/ot/gpu/bregman.py @@ -8,14 +8,18 @@ Bregman projections for regularized OT with GPU # # License: MIT License -import numpy as np -import cudamat +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations +from . import utils -def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, - log=False, returnAsGPU=False): - r""" - Solve the entropic regularization optimal transport problem on GPU +def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, + verbose=False, log=False, to_numpy=True, **kwargs): + """ + Solve the entropic regularization optimal transport on GPU + + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. The function solves the following optimization problem: @@ -40,9 +44,10 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, ---------- a : np.ndarray (ns,) samples weights in the source domain - b : np.ndarray (nt,) - samples in the target domain - M_GPU : cudamat.CUDAMatrix (ns,nt) + b : np.ndarray (nt,) or np.ndarray (nt,nbb) + samples in the target domain, compute sinkhorn with multiple targets + and fixed M if b is a matrix (return OT loss + dual variables in log) + M : np.ndarray (ns,nt) loss matrix reg : float Regularization term >0 @@ -54,8 +59,9 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, Print information along iterations log : bool, optional record log if True - returnAsGPU : bool, optional - return the OT matrix as a cudamat.CUDAMatrix + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. + Returns ------- @@ -88,60 +94,78 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, ot.optim.cg : General regularized OT """ + + a = cp.asarray(a) + b = cp.asarray(b) + M = cp.asarray(M) + + if len(a) == 0: + a = np.ones((M.shape[0],)) / M.shape[0] + if len(b) == 0: + b = np.ones((M.shape[1],)) / M.shape[1] + # init data Nini = len(a) Nfin = len(b) + if len(b.shape) > 1: + nbb = b.shape[1] + else: + nbb = 0 + if log: log = {'err': []} # we assume that no distances are null except those of the diagonal of # distances - u = (np.ones(Nini) / Nini).reshape((Nini, 1)) - u_GPU = cudamat.CUDAMatrix(u) - a_GPU = cudamat.CUDAMatrix(a.reshape((Nini, 1))) - ones_GPU = cudamat.empty(u_GPU.shape).assign(1) - v = (np.ones(Nfin) / Nfin).reshape((Nfin, 1)) - v_GPU = cudamat.CUDAMatrix(v) - b_GPU = cudamat.CUDAMatrix(b.reshape((Nfin, 1))) - - M_GPU.divide(-reg) + if nbb: + u = np.ones((Nini, nbb)) / Nini + v = np.ones((Nfin, nbb)) / Nfin + else: + u = np.ones(Nini) / Nini + v = np.ones(Nfin) / Nfin - K_GPU = cudamat.exp(M_GPU) + # print(reg) - ones_GPU.divide(a_GPU, target=a_GPU) - Kp_GPU = cudamat.empty(K_GPU.shape) - K_GPU.mult_by_col(a_GPU, target=Kp_GPU) + # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute + K = np.empty(M.shape, dtype=M.dtype) + np.divide(M, -reg, out=K) + np.exp(K, out=K) - tmp_GPU = cudamat.empty(K_GPU.shape) + # print(np.min(K)) + tmp2 = np.empty(b.shape, dtype=M.dtype) + Kp = (1 / a).reshape(-1, 1) * K cpt = 0 err = 1 while (err > stopThr and cpt < numItermax): - uprev_GPU = u_GPU.copy() - vprev_GPU = v_GPU.copy() + uprev = u + vprev = v - KtransposeU_GPU = K_GPU.transpose().dot(u_GPU) - b_GPU.divide(KtransposeU_GPU, target=v_GPU) - ones_GPU.divide(Kp_GPU.dot(v_GPU), target=u_GPU) + KtransposeU = np.dot(K.T, u) + v = np.divide(b, KtransposeU) + u = 1. / np.dot(Kp, v) - if (np.any(KtransposeU_GPU.asarray() == 0) or - not u_GPU.allfinite() or not v_GPU.allfinite()): + if (np.any(KtransposeU == 0) or + np.any(np.isnan(u)) or np.any(np.isnan(v)) or + np.any(np.isinf(u)) or np.any(np.isinf(v))): # we have reached the machine precision # come back to previous solution and quit loop print('Warning: numerical errors at iteration', cpt) - u_GPU = uprev_GPU.copy() - v_GPU = vprev_GPU.copy() + u = uprev + v = vprev break if cpt % 10 == 0: # we can speed up the process by checking for the error only all # the 10th iterations - K_GPU.mult_by_col(u_GPU, target=tmp_GPU) - tmp_GPU.mult_by_row(v_GPU.transpose(), target=tmp_GPU) - - bcopy_GPU = b_GPU.copy().transpose() - bcopy_GPU.add_sums(tmp_GPU, axis=0, beta=-1) - err = bcopy_GPU.euclid_norm()**2 + if nbb: + err = np.sum((u - uprev)**2) / np.sum((u)**2) + \ + np.sum((v - vprev)**2) / np.sum((v)**2) + else: + # compute right marginal tmp2= (diag(u)Kdiag(v))^T1 + tmp2 = np.sum(u[:, None] * K * v[None, :], 0) + #tmp2=np.einsum('i,ij,j->j', u, K, v) + err = np.linalg.norm(tmp2 - b)**2 # violation of marginal if log: log['err'].append(err) @@ -150,20 +174,32 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, print( '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) print('{:5d}|{:8e}|'.format(cpt, err)) - cpt += 1 - if log: - log['u'] = u_GPU.asarray() - log['v'] = v_GPU.asarray() - - K_GPU.mult_by_col(u_GPU, target=K_GPU) - K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU) - - if returnAsGPU: - res = K_GPU - else: - res = K_GPU.asarray() - + cpt = cpt + 1 if log: - return res, log - else: - return res + log['u'] = u + log['v'] = v + + if nbb: # return only loss + #res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) (explodes cupy memory) + res = np.empty(nbb) + for i in range(nbb): + res[i] = np.sum(u[:, None, i] * (K * M) * v[None, :, i]) + if to_numpy: + res = utils.to_np(res) + if log: + return res, log + else: + return res + + else: # return OT matrix + res = u.reshape((-1, 1)) * K * v.reshape((1, -1)) + if to_numpy: + res = utils.to_np(res) + if log: + return res, log + else: + return res + + +# define sinkhorn as sinkhorn_knopp +sinkhorn = sinkhorn_knopp diff --git a/ot/gpu/da.py b/ot/gpu/da.py index ac3f8d7..f5e7daa 100644 --- a/ot/gpu/da.py +++ b/ot/gpu/da.py @@ -11,79 +11,30 @@ Domain adaptation with optimal transport with GPU implementation # License: MIT License -import numpy as np -#from ..utils import unif +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations +import numpy as npp +from . import utils + from .bregman import sinkhorn -import cudamat -def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False): +def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, + numInnerItermax=200, stopInnerThr=1e-9, verbose=False, + log=False, to_numpy=True): """ - Compute the pairwise euclidean distance between matrices a and b. - - - Parameters - ---------- - a : np.ndarray (n, f) - first matrice - b : np.ndarray (m, f) - second matrice - returnAsGPU : boolean, optional (default False) - if True, returns cudamat matrix still on GPU, else return np.ndarray - squared : boolean, optional (default False) - if True, return squared euclidean distance matrice + Solve the entropic regularization optimal transport problem with nonconvex + group lasso regularization on GPU + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. - Returns - ------- - c : (n x m) np.ndarray or cudamat.CUDAMatrix - pairwise euclidean distance distance matrix - """ - # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m). - # First compute in c_GPU the squared euclidean distance. And return its - # square root. At each cell [i,j] of c, we want to have - # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that - # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c: - # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2). - - a_GPU = cudamat.CUDAMatrix(a) - b_GPU = cudamat.CUDAMatrix(b) - - # Multiply a by b transpose to obtain in each cell [i,j] of c the - # value sum{k in range(f)} ( a[i,k]b[j,k] ) - c_GPU = cudamat.dot(a_GPU, b_GPU.transpose()) - # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] ) - c_GPU.mult(-2) - - # Compute the vectors of the sum of squared elements. - a_GPU = cudamat.pow(a_GPU, 2).sum(axis=1) - b_GPU = cudamat.pow(b_GPU, 2).sum(axis=1) - - # Add the vectors in each columns (respectivly rows) of c. - # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] ) - c_GPU.add_col_vec(a_GPU) - # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2) - c_GPU.add_row_vec(b_GPU.transpose()) - - if not squared: - c_GPU = cudamat.sqrt(c_GPU) - - if returnAsGPU: - return c_GPU - else: - return c_GPU.asarray() - - -def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, - numInnerItermax=200, stopInnerThr=1e-9, - verbose=False, log=False): - """ - Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization The function solves the following optimization problem: .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ \eta \Omega_g(\gamma) + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma) + + \eta \Omega_g(\gamma) s.t. \gamma 1 = a @@ -93,11 +44,16 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, where : - M is the (ns,nt) metric cost matrix - - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - :math:`\Omega_g` is the group lasso regulaization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` where :math:`\mathcal{I}_c` are the index of samples from class c in the source domain. + - :math:`\Omega_e` is the entropic regularization term + :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega_g` is the group lasso regulaization term + :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` + where :math:`\mathcal{I}_c` are the index of samples from class c + in the source domain. - a and b are source and target weights (sum to 1) - The algorithm used for solving the problem is the generalised conditional gradient as proposed in [5]_ [7]_ + The algorithm used for solving the problem is the generalised conditional + gradient as proposed in [5]_ [7]_ Parameters @@ -108,7 +64,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, labels of samples in the source domain b : np.ndarray (nt,) samples weights in the target domain - M_GPU : cudamat.CUDAMatrix (ns,nt) + M : np.ndarray (ns,nt) loss matrix reg : float Regularization term for entropic regularization >0 @@ -124,6 +80,8 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, Print information along iterations log : bool, optional record log if True + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. Returns @@ -137,8 +95,13 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, References ---------- - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 - .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567. + .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE + Transactions on Pattern Analysis and Machine Intelligence , + vol.PP, no.99, pp.1-1 + .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence + and applications. arXiv preprint arXiv:1510.06567. See Also -------- @@ -147,29 +110,30 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, ot.optim.cg : General regularized OT """ + + a, labels_a, b, M = utils.to_gpu(a, labels_a, b, M) + p = 0.5 epsilon = 1e-3 - Nfin = len(b) indices_labels = [] - classes = np.unique(labels_a) + labels_a2 = cp.asnumpy(labels_a) + classes = npp.unique(labels_a2) for c in classes: - idxc, = np.where(labels_a == c) - indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1))) + idxc, = utils.to_gpu(npp.where(labels_a2 == c)) + indices_labels.append(idxc) - Mreg_GPU = cudamat.empty(M_GPU.shape) - W_GPU = cudamat.empty(M_GPU.shape).assign(0) + W = np.zeros(M.shape) for cpt in range(numItermax): - Mreg_GPU.assign(M_GPU) - Mreg_GPU.add_mult(W_GPU, eta) - transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax, - stopThr=stopInnerThr, returnAsGPU=True) + Mreg = M + eta * W + transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax, + stopThr=stopInnerThr, to_numpy=False) # the transport has been computed. Check if classes are really # separated - W_GPU.assign(1) - W_GPU = W_GPU.transpose() + W = np.ones(M.shape) for (i, c) in enumerate(classes): +<<<<<<< HEAD (_, nbRow) = indices_labels[i].shape tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0) transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU) @@ -184,3 +148,13 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, W_GPU = W_GPU.transpose() return transp_GPU.asarray() +======= + majs = np.sum(transp[indices_labels[i]], axis=0) + majs = p * ((majs + epsilon)**(p - 1)) + W[indices_labels[i]] = majs + + if to_numpy: + return utils.to_np(transp) + else: + return transp +>>>>>>> master diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py new file mode 100644 index 0000000..41e168a --- /dev/null +++ b/ot/gpu/utils.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- +""" +Utility functions for GPU +""" + +# Author: Remi Flamary <remi.flamary@unice.fr> +# Nicolas Courty <ncourty@irisa.fr> +# Leo Gautheron <https://github.com/aje> +# +# License: MIT License + +import cupy as np # np used for matrix computation +import cupy as cp # cp used for cupy specific operations + + +def euclidean_distances(a, b, squared=False, to_numpy=True): + """ + Compute the pairwise euclidean distance between matrices a and b. + + If the input matrix are in numpy format, they will be uploaded to the + GPU first which can incur significant time overhead. + + Parameters + ---------- + a : np.ndarray (n, f) + first matrix + b : np.ndarray (m, f) + second matrix + to_numpy : boolean, optional (default True) + If true convert back the GPU array result to numpy format. + squared : boolean, optional (default False) + if True, return squared euclidean distance matrix + + Returns + ------- + c : (n x m) np.ndarray or cupy.ndarray + pairwise euclidean distance distance matrix + """ + + a, b = to_gpu(a, b) + + a2 = np.sum(np.square(a), 1) + b2 = np.sum(np.square(b), 1) + + c = -2 * np.dot(a, b.T) + c += a2[:, None] + c += b2[None, :] + + if not squared: + np.sqrt(c, out=c) + if to_numpy: + return to_np(c) + else: + return c + + +def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True): + """Compute distance between samples in x1 and x2 on gpu + + Parameters + ---------- + + x1 : np.array (n1,d) + matrix with n1 samples of size d + x2 : np.array (n2,d), optional + matrix with n2 samples of size d (if None then x2=x1) + metric : str + Metric from 'sqeuclidean', 'euclidean', + + + Returns + ------- + + M : np.array (n1,n2) + distance matrix computed with given metric + + """ + if x2 is None: + x2 = x1 + if metric == "sqeuclidean": + return euclidean_distances(x1, x2, squared=True, to_numpy=to_numpy) + elif metric == "euclidean": + return euclidean_distances(x1, x2, squared=False, to_numpy=to_numpy) + else: + raise NotImplementedError + + +def to_gpu(*args): + """ Upload numpy arrays to GPU and return them""" + if len(args) > 1: + return (cp.asarray(x) for x in args) + else: + return cp.asarray(args[0]) + + +def to_np(*args): + """ convert GPU arras to numpy and return them""" + if len(args) > 1: + return (cp.asnumpy(x) for x in args) + else: + return cp.asnumpy(args[0]) diff --git a/test/test_bregman.py b/test/test_bregman.py index 58afd7a..b0c3358 100644 --- a/test/test_bregman.py +++ b/test/test_bregman.py @@ -108,7 +108,7 @@ def test_bary(): ot.bregman.barycenter(A, M, reg, log=True, verbose=True) -def test_wassersteinbary(): +def test_wasserstein_bary_2d(): size = 100 # size of a square image a1 = np.random.randn(size, size) diff --git a/test/test_gpu.py b/test/test_gpu.py index 1e97c45..6b7fdd4 100644 --- a/test/test_gpu.py +++ b/test/test_gpu.py @@ -6,7 +6,6 @@ import numpy as np import ot -import time import pytest try: # test if cudamat installed @@ -17,63 +16,81 @@ except ImportError: @pytest.mark.skipif(nogpu, reason="No GPU available") -def test_gpu_sinkhorn(): +def test_gpu_dist(): rng = np.random.RandomState(0) - def describe_res(r): - print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format( - np.min(r), np.max(r), np.mean(r), np.std(r))) - for n_samples in [50, 100, 500, 1000]: print(n_samples) a = rng.rand(n_samples // 4, 100) b = rng.rand(n_samples, 100) - time1 = time.time() - transport = ot.da.OTDA_sinkhorn() - transport.fit(a, b) - G1 = transport.G - time2 = time.time() - transport = ot.gpu.da.OTDA_sinkhorn() - transport.fit(a, b) - G2 = transport.G - time3 = time.time() - print("Normal sinkhorn, time: {:6.2f} sec ".format(time2 - time1)) - describe_res(G1) - print(" GPU sinkhorn, time: {:6.2f} sec ".format(time3 - time2)) - describe_res(G2) - - np.testing.assert_allclose(G1, G2, rtol=1e-5, atol=1e-5) + + M = ot.dist(a.copy(), b.copy()) + M2 = ot.gpu.dist(a.copy(), b.copy()) + + np.testing.assert_allclose(M, M2, rtol=1e-10) + + M2 = ot.gpu.dist(a.copy(), b.copy(), metric='euclidean', to_numpy=False) + + # check raise not implemented wrong metric + with pytest.raises(NotImplementedError): + M2 = ot.gpu.dist(a.copy(), b.copy(), metric='cityblock', to_numpy=False) @pytest.mark.skipif(nogpu, reason="No GPU available") -def test_gpu_sinkhorn_lpl1(): +def test_gpu_sinkhorn(): rng = np.random.RandomState(0) - def describe_res(r): - print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}" - .format(np.min(r), np.max(r), np.mean(r), np.std(r))) + for n_samples in [50, 100, 500, 1000]: + a = rng.rand(n_samples // 4, 100) + b = rng.rand(n_samples, 100) + + wa = ot.unif(n_samples // 4) + wb = ot.unif(n_samples) + + wb2 = np.random.rand(n_samples, 20) + wb2 /= wb2.sum(0, keepdims=True) + + M = ot.dist(a.copy(), b.copy()) + M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False) + + reg = 1 + + G = ot.sinkhorn(wa, wb, M, reg) + G1 = ot.gpu.sinkhorn(wa, wb, M, reg) + + np.testing.assert_allclose(G1, G, rtol=1e-10) + + # run all on gpu + ot.gpu.sinkhorn(wa, wb, M2, reg, to_numpy=False, log=True) + + # run sinkhorn for multiple targets + ot.gpu.sinkhorn(wa, wb2, M2, reg, to_numpy=False, log=True) + + +@pytest.mark.skipif(nogpu, reason="No GPU available") +def test_gpu_sinkhorn_lpl1(): + + rng = np.random.RandomState(0) for n_samples in [50, 100, 500]: print(n_samples) a = rng.rand(n_samples // 4, 100) labels_a = np.random.randint(10, size=(n_samples // 4)) b = rng.rand(n_samples, 100) - time1 = time.time() - transport = ot.da.OTDA_lpl1() - transport.fit(a, labels_a, b) - G1 = transport.G - time2 = time.time() - transport = ot.gpu.da.OTDA_lpl1() - transport.fit(a, labels_a, b) - G2 = transport.G - time3 = time.time() - print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format( - time2 - time1)) - describe_res(G1) - print(" GPU sinkhorn lpl1, time: {:6.2f} sec ".format( - time3 - time2)) - describe_res(G2) - - np.testing.assert_allclose(G1, G2, rtol=1e-3, atol=1e-3) + + wa = ot.unif(n_samples // 4) + wb = ot.unif(n_samples) + + M = ot.dist(a.copy(), b.copy()) + M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False) + + reg = 1 + + G = ot.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M, reg) + G1 = ot.gpu.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M, reg) + + np.testing.assert_allclose(G1, G, rtol=1e-10) + + ot.gpu.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M2, reg, to_numpy=False, log=True) |