summaryrefslogtreecommitdiff
path: root/ot
diff options
context:
space:
mode:
authorRémi Flamary <remi.flamary@gmail.com>2018-09-28 15:32:53 +0200
committerRémi Flamary <remi.flamary@gmail.com>2018-09-28 15:32:53 +0200
commitc18f73739fdd8e8ca0ef88dddcf2ba039a85dacf (patch)
tree9f00d6baa9d68457917190cdad807c027fc85b56 /ot
parentc531fba46d9ea703a8a5ae270efe44466d54a593 (diff)
parent8f6c45534dcdd6e8f80a31205be28481fc79c533 (diff)
merge with new gpu stuff
Diffstat (limited to 'ot')
-rw-r--r--ot/gpu/__init__.py34
-rw-r--r--ot/gpu/bregman.py148
-rw-r--r--ot/gpu/da.py136
-rw-r--r--ot/gpu/utils.py101
4 files changed, 275 insertions, 144 deletions
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
index ed6dcc4..213578c 100644
--- a/ot/gpu/__init__.py
+++ b/ot/gpu/__init__.py
@@ -1,17 +1,37 @@
# -*- coding: utf-8 -*-
+"""
-from . import bregman
-from . import da
-from .bregman import sinkhorn
+This module provides GPU implementation for several OT solvers and utility
+functions. The GPU backend in handled by `cupy
+<https://cupy.chainer.org/>`_.
+
+By default, the functions in this module accept and return numpy arrays
+in order to proide drop-in replacement for the other POT function but
+the transfer between CPU en GPU comes with a significant overhead.
+
+In order to get the best erformances, we recommend to given only cupy
+arrays to the functions and desactivate the conversion to numpy of the
+result of the function with parameter ``to_numpy=False``.
+
+"""
# Author: Remi Flamary <remi.flamary@unice.fr>
# Leo Gautheron <https://github.com/aje>
#
# License: MIT License
-import warnings
+from . import bregman
+from . import da
+from .bregman import sinkhorn
+from .da import sinkhorn_lpl1_mm
+
+from . import utils
+from .utils import dist, to_gpu, to_np
+
+
+
+
-warnings.warn("the ot.gpu module is deprecated because cudamat in no longer maintained", DeprecationWarning,
- stacklevel=2)
+__all__ = ["utils", "dist", "sinkhorn",
+ "sinkhorn_lpl1_mm", 'bregman', 'da', 'to_gpu', 'to_np']
-__all__ = ["bregman", "da", "sinkhorn"]
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
index 47939c4..978b307 100644
--- a/ot/gpu/bregman.py
+++ b/ot/gpu/bregman.py
@@ -8,14 +8,18 @@ Bregman projections for regularized OT with GPU
#
# License: MIT License
-import numpy as np
-import cudamat
+import cupy as np # np used for matrix computation
+import cupy as cp # cp used for cupy specific operations
+from . import utils
-def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
- log=False, returnAsGPU=False):
- r"""
- Solve the entropic regularization optimal transport problem on GPU
+def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
+ verbose=False, log=False, to_numpy=True, **kwargs):
+ """
+ Solve the entropic regularization optimal transport on GPU
+
+ If the input matrix are in numpy format, they will be uploaded to the
+ GPU first which can incur significant time overhead.
The function solves the following optimization problem:
@@ -40,9 +44,10 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
----------
a : np.ndarray (ns,)
samples weights in the source domain
- b : np.ndarray (nt,)
- samples in the target domain
- M_GPU : cudamat.CUDAMatrix (ns,nt)
+ b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+ samples in the target domain, compute sinkhorn with multiple targets
+ and fixed M if b is a matrix (return OT loss + dual variables in log)
+ M : np.ndarray (ns,nt)
loss matrix
reg : float
Regularization term >0
@@ -54,8 +59,9 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
Print information along iterations
log : bool, optional
record log if True
- returnAsGPU : bool, optional
- return the OT matrix as a cudamat.CUDAMatrix
+ to_numpy : boolean, optional (default True)
+ If true convert back the GPU array result to numpy format.
+
Returns
-------
@@ -88,60 +94,78 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
ot.optim.cg : General regularized OT
"""
+
+ a = cp.asarray(a)
+ b = cp.asarray(b)
+ M = cp.asarray(M)
+
+ if len(a) == 0:
+ a = np.ones((M.shape[0],)) / M.shape[0]
+ if len(b) == 0:
+ b = np.ones((M.shape[1],)) / M.shape[1]
+
# init data
Nini = len(a)
Nfin = len(b)
+ if len(b.shape) > 1:
+ nbb = b.shape[1]
+ else:
+ nbb = 0
+
if log:
log = {'err': []}
# we assume that no distances are null except those of the diagonal of
# distances
- u = (np.ones(Nini) / Nini).reshape((Nini, 1))
- u_GPU = cudamat.CUDAMatrix(u)
- a_GPU = cudamat.CUDAMatrix(a.reshape((Nini, 1)))
- ones_GPU = cudamat.empty(u_GPU.shape).assign(1)
- v = (np.ones(Nfin) / Nfin).reshape((Nfin, 1))
- v_GPU = cudamat.CUDAMatrix(v)
- b_GPU = cudamat.CUDAMatrix(b.reshape((Nfin, 1)))
-
- M_GPU.divide(-reg)
+ if nbb:
+ u = np.ones((Nini, nbb)) / Nini
+ v = np.ones((Nfin, nbb)) / Nfin
+ else:
+ u = np.ones(Nini) / Nini
+ v = np.ones(Nfin) / Nfin
- K_GPU = cudamat.exp(M_GPU)
+ # print(reg)
- ones_GPU.divide(a_GPU, target=a_GPU)
- Kp_GPU = cudamat.empty(K_GPU.shape)
- K_GPU.mult_by_col(a_GPU, target=Kp_GPU)
+ # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
+ K = np.empty(M.shape, dtype=M.dtype)
+ np.divide(M, -reg, out=K)
+ np.exp(K, out=K)
- tmp_GPU = cudamat.empty(K_GPU.shape)
+ # print(np.min(K))
+ tmp2 = np.empty(b.shape, dtype=M.dtype)
+ Kp = (1 / a).reshape(-1, 1) * K
cpt = 0
err = 1
while (err > stopThr and cpt < numItermax):
- uprev_GPU = u_GPU.copy()
- vprev_GPU = v_GPU.copy()
+ uprev = u
+ vprev = v
- KtransposeU_GPU = K_GPU.transpose().dot(u_GPU)
- b_GPU.divide(KtransposeU_GPU, target=v_GPU)
- ones_GPU.divide(Kp_GPU.dot(v_GPU), target=u_GPU)
+ KtransposeU = np.dot(K.T, u)
+ v = np.divide(b, KtransposeU)
+ u = 1. / np.dot(Kp, v)
- if (np.any(KtransposeU_GPU.asarray() == 0) or
- not u_GPU.allfinite() or not v_GPU.allfinite()):
+ if (np.any(KtransposeU == 0) or
+ np.any(np.isnan(u)) or np.any(np.isnan(v)) or
+ np.any(np.isinf(u)) or np.any(np.isinf(v))):
# we have reached the machine precision
# come back to previous solution and quit loop
print('Warning: numerical errors at iteration', cpt)
- u_GPU = uprev_GPU.copy()
- v_GPU = vprev_GPU.copy()
+ u = uprev
+ v = vprev
break
if cpt % 10 == 0:
# we can speed up the process by checking for the error only all
# the 10th iterations
- K_GPU.mult_by_col(u_GPU, target=tmp_GPU)
- tmp_GPU.mult_by_row(v_GPU.transpose(), target=tmp_GPU)
-
- bcopy_GPU = b_GPU.copy().transpose()
- bcopy_GPU.add_sums(tmp_GPU, axis=0, beta=-1)
- err = bcopy_GPU.euclid_norm()**2
+ if nbb:
+ err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
+ np.sum((v - vprev)**2) / np.sum((v)**2)
+ else:
+ # compute right marginal tmp2= (diag(u)Kdiag(v))^T1
+ tmp2 = np.sum(u[:, None] * K * v[None, :], 0)
+ #tmp2=np.einsum('i,ij,j->j', u, K, v)
+ err = np.linalg.norm(tmp2 - b)**2 # violation of marginal
if log:
log['err'].append(err)
@@ -150,20 +174,32 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
print(
'{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
print('{:5d}|{:8e}|'.format(cpt, err))
- cpt += 1
- if log:
- log['u'] = u_GPU.asarray()
- log['v'] = v_GPU.asarray()
-
- K_GPU.mult_by_col(u_GPU, target=K_GPU)
- K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU)
-
- if returnAsGPU:
- res = K_GPU
- else:
- res = K_GPU.asarray()
-
+ cpt = cpt + 1
if log:
- return res, log
- else:
- return res
+ log['u'] = u
+ log['v'] = v
+
+ if nbb: # return only loss
+ #res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) (explodes cupy memory)
+ res = np.empty(nbb)
+ for i in range(nbb):
+ res[i] = np.sum(u[:, None, i] * (K * M) * v[None, :, i])
+ if to_numpy:
+ res = utils.to_np(res)
+ if log:
+ return res, log
+ else:
+ return res
+
+ else: # return OT matrix
+ res = u.reshape((-1, 1)) * K * v.reshape((1, -1))
+ if to_numpy:
+ res = utils.to_np(res)
+ if log:
+ return res, log
+ else:
+ return res
+
+
+# define sinkhorn as sinkhorn_knopp
+sinkhorn = sinkhorn_knopp
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index ac3f8d7..f5e7daa 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -11,79 +11,30 @@ Domain adaptation with optimal transport with GPU implementation
# License: MIT License
-import numpy as np
-#from ..utils import unif
+import cupy as np # np used for matrix computation
+import cupy as cp # cp used for cupy specific operations
+import numpy as npp
+from . import utils
+
from .bregman import sinkhorn
-import cudamat
-def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
+def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
+ numInnerItermax=200, stopInnerThr=1e-9, verbose=False,
+ log=False, to_numpy=True):
"""
- Compute the pairwise euclidean distance between matrices a and b.
-
-
- Parameters
- ----------
- a : np.ndarray (n, f)
- first matrice
- b : np.ndarray (m, f)
- second matrice
- returnAsGPU : boolean, optional (default False)
- if True, returns cudamat matrix still on GPU, else return np.ndarray
- squared : boolean, optional (default False)
- if True, return squared euclidean distance matrice
+ Solve the entropic regularization optimal transport problem with nonconvex
+ group lasso regularization on GPU
+ If the input matrix are in numpy format, they will be uploaded to the
+ GPU first which can incur significant time overhead.
- Returns
- -------
- c : (n x m) np.ndarray or cudamat.CUDAMatrix
- pairwise euclidean distance distance matrix
- """
- # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m).
- # First compute in c_GPU the squared euclidean distance. And return its
- # square root. At each cell [i,j] of c, we want to have
- # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that
- # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c:
- # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2).
-
- a_GPU = cudamat.CUDAMatrix(a)
- b_GPU = cudamat.CUDAMatrix(b)
-
- # Multiply a by b transpose to obtain in each cell [i,j] of c the
- # value sum{k in range(f)} ( a[i,k]b[j,k] )
- c_GPU = cudamat.dot(a_GPU, b_GPU.transpose())
- # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] )
- c_GPU.mult(-2)
-
- # Compute the vectors of the sum of squared elements.
- a_GPU = cudamat.pow(a_GPU, 2).sum(axis=1)
- b_GPU = cudamat.pow(b_GPU, 2).sum(axis=1)
-
- # Add the vectors in each columns (respectivly rows) of c.
- # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] )
- c_GPU.add_col_vec(a_GPU)
- # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2)
- c_GPU.add_row_vec(b_GPU.transpose())
-
- if not squared:
- c_GPU = cudamat.sqrt(c_GPU)
-
- if returnAsGPU:
- return c_GPU
- else:
- return c_GPU.asarray()
-
-
-def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
- numInnerItermax=200, stopInnerThr=1e-9,
- verbose=False, log=False):
- """
- Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ \eta \Omega_g(\gamma)
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)
+ + \eta \Omega_g(\gamma)
s.t. \gamma 1 = a
@@ -93,11 +44,16 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
where :
- M is the (ns,nt) metric cost matrix
- - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - :math:`\Omega_g` is the group lasso regulaization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` where :math:`\mathcal{I}_c` are the index of samples from class c in the source domain.
+ - :math:`\Omega_e` is the entropic regularization term
+ :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\Omega_g` is the group lasso regulaization term
+ :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1`
+ where :math:`\mathcal{I}_c` are the index of samples from class c
+ in the source domain.
- a and b are source and target weights (sum to 1)
- The algorithm used for solving the problem is the generalised conditional gradient as proposed in [5]_ [7]_
+ The algorithm used for solving the problem is the generalised conditional
+ gradient as proposed in [5]_ [7]_
Parameters
@@ -108,7 +64,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
labels of samples in the source domain
b : np.ndarray (nt,)
samples weights in the target domain
- M_GPU : cudamat.CUDAMatrix (ns,nt)
+ M : np.ndarray (ns,nt)
loss matrix
reg : float
Regularization term for entropic regularization >0
@@ -124,6 +80,8 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
Print information along iterations
log : bool, optional
record log if True
+ to_numpy : boolean, optional (default True)
+ If true convert back the GPU array result to numpy format.
Returns
@@ -137,8 +95,13 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
References
----------
- .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
- .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567.
+ .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE
+ Transactions on Pattern Analysis and Machine Intelligence ,
+ vol.PP, no.99, pp.1-1
+ .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
+ Generalized conditional gradient: analysis of convergence
+ and applications. arXiv preprint arXiv:1510.06567.
See Also
--------
@@ -147,29 +110,30 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
ot.optim.cg : General regularized OT
"""
+
+ a, labels_a, b, M = utils.to_gpu(a, labels_a, b, M)
+
p = 0.5
epsilon = 1e-3
- Nfin = len(b)
indices_labels = []
- classes = np.unique(labels_a)
+ labels_a2 = cp.asnumpy(labels_a)
+ classes = npp.unique(labels_a2)
for c in classes:
- idxc, = np.where(labels_a == c)
- indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1)))
+ idxc, = utils.to_gpu(npp.where(labels_a2 == c))
+ indices_labels.append(idxc)
- Mreg_GPU = cudamat.empty(M_GPU.shape)
- W_GPU = cudamat.empty(M_GPU.shape).assign(0)
+ W = np.zeros(M.shape)
for cpt in range(numItermax):
- Mreg_GPU.assign(M_GPU)
- Mreg_GPU.add_mult(W_GPU, eta)
- transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax,
- stopThr=stopInnerThr, returnAsGPU=True)
+ Mreg = M + eta * W
+ transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax,
+ stopThr=stopInnerThr, to_numpy=False)
# the transport has been computed. Check if classes are really
# separated
- W_GPU.assign(1)
- W_GPU = W_GPU.transpose()
+ W = np.ones(M.shape)
for (i, c) in enumerate(classes):
+<<<<<<< HEAD
(_, nbRow) = indices_labels[i].shape
tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0)
transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU)
@@ -184,3 +148,13 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
W_GPU = W_GPU.transpose()
return transp_GPU.asarray()
+=======
+ majs = np.sum(transp[indices_labels[i]], axis=0)
+ majs = p * ((majs + epsilon)**(p - 1))
+ W[indices_labels[i]] = majs
+
+ if to_numpy:
+ return utils.to_np(transp)
+ else:
+ return transp
+>>>>>>> master
diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py
new file mode 100644
index 0000000..41e168a
--- /dev/null
+++ b/ot/gpu/utils.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+"""
+Utility functions for GPU
+"""
+
+# Author: Remi Flamary <remi.flamary@unice.fr>
+# Nicolas Courty <ncourty@irisa.fr>
+# Leo Gautheron <https://github.com/aje>
+#
+# License: MIT License
+
+import cupy as np # np used for matrix computation
+import cupy as cp # cp used for cupy specific operations
+
+
+def euclidean_distances(a, b, squared=False, to_numpy=True):
+ """
+ Compute the pairwise euclidean distance between matrices a and b.
+
+ If the input matrix are in numpy format, they will be uploaded to the
+ GPU first which can incur significant time overhead.
+
+ Parameters
+ ----------
+ a : np.ndarray (n, f)
+ first matrix
+ b : np.ndarray (m, f)
+ second matrix
+ to_numpy : boolean, optional (default True)
+ If true convert back the GPU array result to numpy format.
+ squared : boolean, optional (default False)
+ if True, return squared euclidean distance matrix
+
+ Returns
+ -------
+ c : (n x m) np.ndarray or cupy.ndarray
+ pairwise euclidean distance distance matrix
+ """
+
+ a, b = to_gpu(a, b)
+
+ a2 = np.sum(np.square(a), 1)
+ b2 = np.sum(np.square(b), 1)
+
+ c = -2 * np.dot(a, b.T)
+ c += a2[:, None]
+ c += b2[None, :]
+
+ if not squared:
+ np.sqrt(c, out=c)
+ if to_numpy:
+ return to_np(c)
+ else:
+ return c
+
+
+def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True):
+ """Compute distance between samples in x1 and x2 on gpu
+
+ Parameters
+ ----------
+
+ x1 : np.array (n1,d)
+ matrix with n1 samples of size d
+ x2 : np.array (n2,d), optional
+ matrix with n2 samples of size d (if None then x2=x1)
+ metric : str
+ Metric from 'sqeuclidean', 'euclidean',
+
+
+ Returns
+ -------
+
+ M : np.array (n1,n2)
+ distance matrix computed with given metric
+
+ """
+ if x2 is None:
+ x2 = x1
+ if metric == "sqeuclidean":
+ return euclidean_distances(x1, x2, squared=True, to_numpy=to_numpy)
+ elif metric == "euclidean":
+ return euclidean_distances(x1, x2, squared=False, to_numpy=to_numpy)
+ else:
+ raise NotImplementedError
+
+
+def to_gpu(*args):
+ """ Upload numpy arrays to GPU and return them"""
+ if len(args) > 1:
+ return (cp.asarray(x) for x in args)
+ else:
+ return cp.asarray(args[0])
+
+
+def to_np(*args):
+ """ convert GPU arras to numpy and return them"""
+ if len(args) > 1:
+ return (cp.asnumpy(x) for x in args)
+ else:
+ return cp.asnumpy(args[0])