From 0b223ff883fd73601984a92c31cb70d4aded16e8 Mon Sep 17 00:00:00 2001 From: RĂ©mi Flamary Date: Thu, 7 Apr 2022 14:18:54 +0200 Subject: [MRG] Remove deprecated ot.gpu submodule (#361) * remove all cpu submodule and tests * speedup tests gromov --- ot/gpu/__init__.py | 50 -------------- ot/gpu/bregman.py | 196 ----------------------------------------------------- ot/gpu/da.py | 144 --------------------------------------- ot/gpu/utils.py | 101 --------------------------- 4 files changed, 491 deletions(-) delete mode 100644 ot/gpu/__init__.py delete mode 100644 ot/gpu/bregman.py delete mode 100644 ot/gpu/da.py delete mode 100644 ot/gpu/utils.py (limited to 'ot/gpu') diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py deleted file mode 100644 index 12db605..0000000 --- a/ot/gpu/__init__.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- -""" -GPU implementation for several OT solvers and utility -functions. - -The GPU backend in handled by `cupy -`_. - -.. warning:: - This module is now deprecated and will be removed in future releases. POT - now privides a backend mechanism that allows for solving prolem on GPU wth - the pytorch backend. - - -.. warning:: - Note that by default the module is not imported in :mod:`ot`. In order to - use it you need to explicitely import :mod:`ot.gpu` . - -By default, the functions in this module accept and return numpy arrays -in order to proide drop-in replacement for the other POT function but -the transfer between CPU en GPU comes with a significant overhead. - -In order to get the best performances, we recommend to give only cupy -arrays to the functions and desactivate the conversion to numpy of the -result of the function with parameter ``to_numpy=False``. - -""" - -# Author: Remi Flamary -# Leo Gautheron -# -# License: MIT License - -import warnings - -from . import bregman -from . import da -from .bregman import sinkhorn -from .da import sinkhorn_lpl1_mm - -from . import utils -from .utils import dist, to_gpu, to_np - - -warnings.warn('This module is deprecated and will be removed in the next minor release of POT', category=DeprecationWarning) - - -__all__ = ["utils", "dist", "sinkhorn", - "sinkhorn_lpl1_mm", 'bregman', 'da', 'to_gpu', 'to_np'] - diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py deleted file mode 100644 index 76af00e..0000000 --- a/ot/gpu/bregman.py +++ /dev/null @@ -1,196 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Bregman projections for regularized OT with GPU -""" - -# Author: Remi Flamary -# Leo Gautheron -# -# License: MIT License - -import cupy as np # np used for matrix computation -import cupy as cp # cp used for cupy specific operations -from . import utils - - -def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, - verbose=False, log=False, to_numpy=True, **kwargs): - r""" - Solve the entropic regularization optimal transport on GPU - - If the input matrix are in numpy format, they will be uploaded to the - GPU first which can incur significant time overhead. - - The function solves the following optimization problem: - - .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) - - s.t. \gamma 1 = a - - \gamma^T 1= b - - \gamma\geq 0 - where : - - - M is the (ns,nt) metric cost matrix - - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - a and b are source and target weights (sum to 1) - - The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [2]_ - - - Parameters - ---------- - a : np.ndarray (ns,) - samples weights in the source domain - b : np.ndarray (nt,) or np.ndarray (nt,nbb) - samples in the target domain, compute sinkhorn with multiple targets - and fixed M if b is a matrix (return OT loss + dual variables in log) - M : np.ndarray (ns,nt) - loss matrix - reg : float - Regularization term >0 - numItermax : int, optional - Max number of iterations - stopThr : float, optional - Stop threshold on error (>0) - verbose : bool, optional - Print information along iterations - log : bool, optional - record log if True - to_numpy : boolean, optional (default True) - If true convert back the GPU array result to numpy format. - - - Returns - ------- - gamma : (ns x nt) ndarray - Optimal transportation matrix for the given parameters - log : dict - log dictionary return only if log==True in parameters - - - References - ---------- - - .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013 - - - See Also - -------- - ot.lp.emd : Unregularized OT - ot.optim.cg : General regularized OT - - """ - - a = cp.asarray(a) - b = cp.asarray(b) - M = cp.asarray(M) - - if len(a) == 0: - a = np.ones((M.shape[0],)) / M.shape[0] - if len(b) == 0: - b = np.ones((M.shape[1],)) / M.shape[1] - - # init data - Nini = len(a) - Nfin = len(b) - - if len(b.shape) > 1: - nbb = b.shape[1] - else: - nbb = 0 - - if log: - log = {'err': []} - - # we assume that no distances are null except those of the diagonal of - # distances - if nbb: - u = np.ones((Nini, nbb)) / Nini - v = np.ones((Nfin, nbb)) / Nfin - else: - u = np.ones(Nini) / Nini - v = np.ones(Nfin) / Nfin - - # print(reg) - - # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute - K = np.empty(M.shape, dtype=M.dtype) - np.divide(M, -reg, out=K) - np.exp(K, out=K) - - # print(np.min(K)) - tmp2 = np.empty(b.shape, dtype=M.dtype) - - Kp = (1 / a).reshape(-1, 1) * K - cpt = 0 - err = 1 - while (err > stopThr and cpt < numItermax): - uprev = u - vprev = v - - KtransposeU = np.dot(K.T, u) - v = np.divide(b, KtransposeU) - u = 1. / np.dot(Kp, v) - - if (np.any(KtransposeU == 0) or - np.any(np.isnan(u)) or np.any(np.isnan(v)) or - np.any(np.isinf(u)) or np.any(np.isinf(v))): - # we have reached the machine precision - # come back to previous solution and quit loop - print('Warning: numerical errors at iteration', cpt) - u = uprev - v = vprev - break - if cpt % 10 == 0: - # we can speed up the process by checking for the error only all - # the 10th iterations - if nbb: - err = np.sqrt( - np.sum((u - uprev)**2) / np.sum((u)**2) - + np.sum((v - vprev)**2) / np.sum((v)**2) - ) - else: - # compute right marginal tmp2= (diag(u)Kdiag(v))^T1 - tmp2 = np.sum(u[:, None] * K * v[None, :], 0) - #tmp2=np.einsum('i,ij,j->j', u, K, v) - err = np.linalg.norm(tmp2 - b) # violation of marginal - if log: - log['err'].append(err) - - if verbose: - if cpt % 200 == 0: - print( - '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) - print('{:5d}|{:8e}|'.format(cpt, err)) - cpt = cpt + 1 - if log: - log['u'] = u - log['v'] = v - - if nbb: # return only loss - #res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) (explodes cupy memory) - res = np.empty(nbb) - for i in range(nbb): - res[i] = np.sum(u[:, None, i] * (K * M) * v[None, :, i]) - if to_numpy: - res = utils.to_np(res) - if log: - return res, log - else: - return res - - else: # return OT matrix - res = u.reshape((-1, 1)) * K * v.reshape((1, -1)) - if to_numpy: - res = utils.to_np(res) - if log: - return res, log - else: - return res - - -# define sinkhorn as sinkhorn_knopp -sinkhorn = sinkhorn_knopp diff --git a/ot/gpu/da.py b/ot/gpu/da.py deleted file mode 100644 index 7adb830..0000000 --- a/ot/gpu/da.py +++ /dev/null @@ -1,144 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Domain adaptation with optimal transport with GPU implementation -""" - -# Author: Remi Flamary -# Nicolas Courty -# Michael Perrot -# Leo Gautheron -# -# License: MIT License - - -import cupy as np # np used for matrix computation -import cupy as cp # cp used for cupy specific operations -import numpy as npp -from . import utils - -from .bregman import sinkhorn - - -def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, - numInnerItermax=200, stopInnerThr=1e-9, verbose=False, - log=False, to_numpy=True): - """ - Solve the entropic regularization optimal transport problem with nonconvex - group lasso regularization on GPU - - If the input matrix are in numpy format, they will be uploaded to the - GPU first which can incur significant time overhead. - - - The function solves the following optimization problem: - - .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma) - + \eta \Omega_g(\gamma) - - s.t. \gamma 1 = a - - \gamma^T 1= b - - \gamma\geq 0 - where : - - - M is the (ns,nt) metric cost matrix - - :math:`\Omega_e` is the entropic regularization term - :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - :math:`\Omega_g` is the group lasso regulaization term - :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` - where :math:`\mathcal{I}_c` are the index of samples from class c - in the source domain. - - a and b are source and target weights (sum to 1) - - The algorithm used for solving the problem is the generalised conditional - gradient as proposed in [5]_ [7]_ - - - Parameters - ---------- - a : np.ndarray (ns,) - samples weights in the source domain - labels_a : np.ndarray (ns,) - labels of samples in the source domain - b : np.ndarray (nt,) - samples weights in the target domain - M : np.ndarray (ns,nt) - loss matrix - reg : float - Regularization term for entropic regularization >0 - eta : float, optional - Regularization term for group lasso regularization >0 - numItermax : int, optional - Max number of iterations - numInnerItermax : int, optional - Max number of iterations (inner sinkhorn solver) - stopInnerThr : float, optional - Stop threshold on error (inner sinkhorn solver) (>0) - verbose : bool, optional - Print information along iterations - log : bool, optional - record log if True - to_numpy : boolean, optional (default True) - If true convert back the GPU array result to numpy format. - - - Returns - ------- - gamma : (ns x nt) ndarray - Optimal transportation matrix for the given parameters - log : dict - log dictionary return only if log==True in parameters - - - References - ---------- - - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, - "Optimal Transport for Domain Adaptation," in IEEE - Transactions on Pattern Analysis and Machine Intelligence , - vol.PP, no.99, pp.1-1 - .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). - Generalized conditional gradient: analysis of convergence - and applications. arXiv preprint arXiv:1510.06567. - - See Also - -------- - ot.lp.emd : Unregularized OT - ot.bregman.sinkhorn : Entropic regularized OT - ot.optim.cg : General regularized OT - - """ - - a, labels_a, b, M = utils.to_gpu(a, labels_a, b, M) - - p = 0.5 - epsilon = 1e-3 - - indices_labels = [] - labels_a2 = cp.asnumpy(labels_a) - classes = npp.unique(labels_a2) - for c in classes: - idxc = utils.to_gpu(*npp.where(labels_a2 == c)) - indices_labels.append(idxc) - - W = np.zeros(M.shape) - - for cpt in range(numItermax): - Mreg = M + eta * W - transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax, - stopThr=stopInnerThr, to_numpy=False) - # the transport has been computed. Check if classes are really - # separated - W = np.ones(M.shape) - for (i, c) in enumerate(classes): - - majs = np.sum(transp[indices_labels[i]], axis=0) - majs = p * ((majs + epsilon)**(p - 1)) - W[indices_labels[i]] = majs - - if to_numpy: - return utils.to_np(transp) - else: - return transp diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py deleted file mode 100644 index 41e168a..0000000 --- a/ot/gpu/utils.py +++ /dev/null @@ -1,101 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Utility functions for GPU -""" - -# Author: Remi Flamary -# Nicolas Courty -# Leo Gautheron -# -# License: MIT License - -import cupy as np # np used for matrix computation -import cupy as cp # cp used for cupy specific operations - - -def euclidean_distances(a, b, squared=False, to_numpy=True): - """ - Compute the pairwise euclidean distance between matrices a and b. - - If the input matrix are in numpy format, they will be uploaded to the - GPU first which can incur significant time overhead. - - Parameters - ---------- - a : np.ndarray (n, f) - first matrix - b : np.ndarray (m, f) - second matrix - to_numpy : boolean, optional (default True) - If true convert back the GPU array result to numpy format. - squared : boolean, optional (default False) - if True, return squared euclidean distance matrix - - Returns - ------- - c : (n x m) np.ndarray or cupy.ndarray - pairwise euclidean distance distance matrix - """ - - a, b = to_gpu(a, b) - - a2 = np.sum(np.square(a), 1) - b2 = np.sum(np.square(b), 1) - - c = -2 * np.dot(a, b.T) - c += a2[:, None] - c += b2[None, :] - - if not squared: - np.sqrt(c, out=c) - if to_numpy: - return to_np(c) - else: - return c - - -def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True): - """Compute distance between samples in x1 and x2 on gpu - - Parameters - ---------- - - x1 : np.array (n1,d) - matrix with n1 samples of size d - x2 : np.array (n2,d), optional - matrix with n2 samples of size d (if None then x2=x1) - metric : str - Metric from 'sqeuclidean', 'euclidean', - - - Returns - ------- - - M : np.array (n1,n2) - distance matrix computed with given metric - - """ - if x2 is None: - x2 = x1 - if metric == "sqeuclidean": - return euclidean_distances(x1, x2, squared=True, to_numpy=to_numpy) - elif metric == "euclidean": - return euclidean_distances(x1, x2, squared=False, to_numpy=to_numpy) - else: - raise NotImplementedError - - -def to_gpu(*args): - """ Upload numpy arrays to GPU and return them""" - if len(args) > 1: - return (cp.asarray(x) for x in args) - else: - return cp.asarray(args[0]) - - -def to_np(*args): - """ convert GPU arras to numpy and return them""" - if len(args) > 1: - return (cp.asnumpy(x) for x in args) - else: - return cp.asnumpy(args[0]) -- cgit v1.2.3