4 files changed, 480 insertions, 0 deletions
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
new file mode 100644
index 0000000..1ab95bb
--- /dev/null
+++ b/ot/gpu/__init__.py
@@ -0,0 +1,41 @@
+# -*- coding: utf-8 -*-
+"""
+
+This module provides GPU implementation for several OT solvers and utility 
+functions. The GPU backend in handled by `cupy 
+<https://cupy.chainer.org/>`_.
+
+.. warning::
+    Note that by default the module is not import in :mod:`ot`. In order to 
+    use it you need to explicitely import :mod:`ot.gpu` .
+
+By default, the functions in this module accept and return numpy arrays 
+in order to proide drop-in replacement for the other POT function but
+the transfer between CPU en GPU comes with a significant overhead.
+
+In order to get the best performances, we recommend to give only cupy 
+arrays to the functions and desactivate the conversion to numpy of the 
+result of the function with parameter ``to_numpy=False``.
+
+"""
+
+# Author: Remi Flamary <remi.flamary@unice.fr>
+#         Leo Gautheron <https://github.com/aje>
+#
+# License: MIT License
+
+from . import bregman
+from . import da
+from .bregman import sinkhorn
+from .da import sinkhorn_lpl1_mm
+
+from . import utils
+from .utils import dist, to_gpu, to_np
+
+
+
+
+
+__all__ = ["utils", "dist", "sinkhorn",
+           "sinkhorn_lpl1_mm", 'bregman', 'da', 'to_gpu', 'to_np']
+
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
new file mode 100644
index 0000000..2e2df83
--- /dev/null
+++ b/ot/gpu/bregman.py
@@ -0,0 +1,194 @@
+# -*- coding: utf-8 -*-
+"""
+Bregman projections for regularized OT with GPU
+"""
+
+# Author: Remi Flamary <remi.flamary@unice.fr>
+#         Leo Gautheron <https://github.com/aje>
+#
+# License: MIT License
+
+import cupy as np  # np used for matrix computation
+import cupy as cp  # cp used for cupy specific operations
+from . import utils
+
+
+def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
+                   verbose=False, log=False, to_numpy=True, **kwargs):
+    """
+    Solve the entropic regularization optimal transport on GPU
+
+    If the input matrix are in numpy format, they will be uploaded to the
+    GPU first which can incur significant time overhead.
+
+    The function solves the following optimization problem:
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+        s.t. \gamma 1 = a
+
+             \gamma^T 1= b
+
+             \gamma\geq 0
+    where :
+
+    - M is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are source and target weights (sum to 1)
+
+    The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [2]_
+
+
+    Parameters
+    ----------
+    a : np.ndarray (ns,)
+        samples weights in the source domain
+    b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+        samples in the target domain, compute sinkhorn with multiple targets
+        and fixed M if b is a matrix (return OT loss + dual variables in log)
+    M : np.ndarray (ns,nt)
+        loss matrix
+    reg : float
+        Regularization term >0
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshol on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+    to_numpy : boolean, optional (default True)
+        If true convert back the GPU array result to numpy format.
+
+
+    Returns
+    -------
+    gamma : (ns x nt) ndarray
+        Optimal transportation matrix for the given parameters
+    log : dict
+        log dictionary return only if log==True in parameters
+
+
+    References
+    ----------
+
+    .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+
+
+    See Also
+    --------
+    ot.lp.emd : Unregularized OT
+    ot.optim.cg : General regularized OT
+
+    """
+
+    a = cp.asarray(a)
+    b = cp.asarray(b)
+    M = cp.asarray(M)
+
+    if len(a) == 0:
+        a = np.ones((M.shape[0],)) / M.shape[0]
+    if len(b) == 0:
+        b = np.ones((M.shape[1],)) / M.shape[1]
+
+    # init data
+    Nini = len(a)
+    Nfin = len(b)
+
+    if len(b.shape) > 1:
+        nbb = b.shape[1]
+    else:
+        nbb = 0
+
+    if log:
+        log = {'err': []}
+
+    # we assume that no distances are null except those of the diagonal of
+    # distances
+    if nbb:
+        u = np.ones((Nini, nbb)) / Nini
+        v = np.ones((Nfin, nbb)) / Nfin
+    else:
+        u = np.ones(Nini) / Nini
+        v = np.ones(Nfin) / Nfin
+
+    # print(reg)
+
+    # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
+    K = np.empty(M.shape, dtype=M.dtype)
+    np.divide(M, -reg, out=K)
+    np.exp(K, out=K)
+
+    # print(np.min(K))
+    tmp2 = np.empty(b.shape, dtype=M.dtype)
+
+    Kp = (1 / a).reshape(-1, 1) * K
+    cpt = 0
+    err = 1
+    while (err > stopThr and cpt < numItermax):
+        uprev = u
+        vprev = v
+
+        KtransposeU = np.dot(K.T, u)
+        v = np.divide(b, KtransposeU)
+        u = 1. / np.dot(Kp, v)
+
+        if (np.any(KtransposeU == 0) or
+                np.any(np.isnan(u)) or np.any(np.isnan(v)) or
+                np.any(np.isinf(u)) or np.any(np.isinf(v))):
+            # we have reached the machine precision
+            # come back to previous solution and quit loop
+            print('Warning: numerical errors at iteration', cpt)
+            u = uprev
+            v = vprev
+            break
+        if cpt % 10 == 0:
+            # we can speed up the process by checking for the error only all
+            # the 10th iterations
+            if nbb:
+                err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
+                    np.sum((v - vprev)**2) / np.sum((v)**2)
+            else:
+                # compute right marginal tmp2= (diag(u)Kdiag(v))^T1
+                tmp2 = np.sum(u[:, None] * K * v[None, :], 0)
+                #tmp2=np.einsum('i,ij,j->j', u, K, v)
+                err = np.linalg.norm(tmp2 - b)**2  # violation of marginal
+            if log:
+                log['err'].append(err)
+
+            if verbose:
+                if cpt % 200 == 0:
+                    print(
+                        '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+                print('{:5d}|{:8e}|'.format(cpt, err))
+        cpt = cpt + 1
+    if log:
+        log['u'] = u
+        log['v'] = v
+
+    if nbb:  # return only loss
+        #res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) (explodes cupy memory)
+        res = np.empty(nbb)
+        for i in range(nbb):
+            res[i] = np.sum(u[:, None, i] * (K * M) * v[None, :, i])
+        if to_numpy:
+            res = utils.to_np(res)
+        if log:
+            return res, log
+        else:
+            return res
+
+    else:  # return OT matrix
+        res = u.reshape((-1, 1)) * K * v.reshape((1, -1))
+        if to_numpy:
+            res = utils.to_np(res)
+        if log:
+            return res, log
+        else:
+            return res
+
+
+# define sinkhorn as sinkhorn_knopp
+sinkhorn = sinkhorn_knopp
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
new file mode 100644
index 0000000..4a98038
--- /dev/null
+++ b/ot/gpu/da.py
@@ -0,0 +1,144 @@
+# -*- coding: utf-8 -*-
+"""
+Domain adaptation with optimal transport with GPU implementation
+"""
+
+# Author: Remi Flamary <remi.flamary@unice.fr>
+#         Nicolas Courty <ncourty@irisa.fr>
+#         Michael Perrot <michael.perrot@univ-st-etienne.fr>
+#         Leo Gautheron <https://github.com/aje>
+#
+# License: MIT License
+
+
+import cupy as np  # np used for matrix computation
+import cupy as cp  # cp used for cupy specific operations
+import numpy as npp
+from . import utils
+
+from .bregman import sinkhorn
+
+
+def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
+                     numInnerItermax=200, stopInnerThr=1e-9, verbose=False,
+                     log=False, to_numpy=True):
+    """
+    Solve the entropic regularization optimal transport problem with nonconvex
+    group lasso regularization on GPU
+
+    If the input matrix are in numpy format, they will be uploaded to the
+    GPU first which can incur significant time overhead.
+
+
+    The function solves the following optimization problem:
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)
+        + \eta \Omega_g(\gamma)
+
+        s.t. \gamma 1 = a
+
+             \gamma^T 1= b
+
+             \gamma\geq 0
+    where :
+
+    - M is the (ns,nt) metric cost matrix
+    - :math:`\Omega_e` is the entropic regularization term
+        :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - :math:`\Omega_g` is the group lasso  regulaization term
+      :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1`
+      where  :math:`\mathcal{I}_c` are the index of samples from class c
+      in the source domain.
+    - a and b are source and target weights (sum to 1)
+
+    The algorithm used for solving the problem is the generalised conditional
+    gradient as proposed in  [5]_ [7]_
+
+
+    Parameters
+    ----------
+    a : np.ndarray (ns,)
+        samples weights in the source domain
+    labels_a : np.ndarray (ns,)
+        labels of samples in the source domain
+    b : np.ndarray (nt,)
+        samples weights in the target domain
+    M : np.ndarray (ns,nt)
+        loss matrix
+    reg : float
+        Regularization term for entropic regularization >0
+    eta : float, optional
+        Regularization term  for group lasso regularization >0
+    numItermax : int, optional
+        Max number of iterations
+    numInnerItermax : int, optional
+        Max number of iterations (inner sinkhorn solver)
+    stopInnerThr : float, optional
+        Stop threshold on error (inner sinkhorn solver) (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+    to_numpy : boolean, optional (default True)
+        If true convert back the GPU array result to numpy format.
+
+
+    Returns
+    -------
+    gamma : (ns x nt) ndarray
+        Optimal transportation matrix for the given parameters
+    log : dict
+        log dictionary return only if log==True in parameters
+
+
+    References
+    ----------
+
+    .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+       "Optimal Transport for Domain Adaptation," in IEEE
+       Transactions on Pattern Analysis and Machine Intelligence ,
+       vol.PP, no.99, pp.1-1
+    .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
+       Generalized conditional gradient: analysis of convergence
+       and applications. arXiv preprint arXiv:1510.06567.
+
+    See Also
+    --------
+    ot.lp.emd : Unregularized OT
+    ot.bregman.sinkhorn : Entropic regularized OT
+    ot.optim.cg : General regularized OT
+
+    """
+
+    a, labels_a, b, M = utils.to_gpu(a, labels_a, b, M)
+
+    p = 0.5
+    epsilon = 1e-3
+
+    indices_labels = []
+    labels_a2 = cp.asnumpy(labels_a)
+    classes = npp.unique(labels_a2)
+    for c in classes:
+        idxc, = utils.to_gpu(npp.where(labels_a2 == c))
+        indices_labels.append(idxc)
+
+    W = np.zeros(M.shape)
+
+    for cpt in range(numItermax):
+        Mreg = M + eta * W
+        transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax,
+                          stopThr=stopInnerThr, to_numpy=False)
+        # the transport has been computed. Check if classes are really
+        # separated
+        W = np.ones(M.shape)
+        for (i, c) in enumerate(classes):
+
+            majs = np.sum(transp[indices_labels[i]], axis=0)
+            majs = p * ((majs + epsilon)**(p - 1))
+            W[indices_labels[i]] = majs
+
+    if to_numpy:
+        return utils.to_np(transp)
+    else:
+        return transp
diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py
new file mode 100644
index 0000000..41e168a
--- /dev/null
+++ b/ot/gpu/utils.py
@@ -0,0 +1,101 @@
+# -*- coding: utf-8 -*-
+"""
+Utility functions for GPU
+"""
+
+# Author: Remi Flamary <remi.flamary@unice.fr>
+#         Nicolas Courty <ncourty@irisa.fr>
+#         Leo Gautheron <https://github.com/aje>
+#
+# License: MIT License
+
+import cupy as np  # np used for matrix computation
+import cupy as cp  # cp used for cupy specific operations
+
+
+def euclidean_distances(a, b, squared=False, to_numpy=True):
+    """
+    Compute the pairwise euclidean distance between matrices a and b.
+
+    If the input matrix are in numpy format, they will be uploaded to the
+    GPU first which can incur significant time overhead.
+
+    Parameters
+    ----------
+    a : np.ndarray (n, f)
+        first matrix
+    b : np.ndarray (m, f)
+        second matrix
+    to_numpy : boolean, optional (default True)
+        If true convert back the GPU array result to numpy format.
+    squared : boolean, optional (default False)
+        if True, return squared euclidean distance matrix
+
+    Returns
+    -------
+    c : (n x m) np.ndarray or cupy.ndarray
+        pairwise euclidean distance distance matrix
+    """
+
+    a, b = to_gpu(a, b)
+
+    a2 = np.sum(np.square(a), 1)
+    b2 = np.sum(np.square(b), 1)
+
+    c = -2 * np.dot(a, b.T)
+    c += a2[:, None]
+    c += b2[None, :]
+
+    if not squared:
+        np.sqrt(c, out=c)
+    if to_numpy:
+        return to_np(c)
+    else:
+        return c
+
+
+def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True):
+    """Compute distance between samples in x1 and x2 on gpu
+
+    Parameters
+    ----------
+
+    x1 : np.array (n1,d)
+        matrix with n1 samples of size d
+    x2 : np.array (n2,d), optional
+        matrix with n2 samples of size d (if None then x2=x1)
+    metric : str
+        Metric from 'sqeuclidean', 'euclidean',
+
+
+    Returns
+    -------
+
+    M : np.array (n1,n2)
+        distance matrix computed with given metric
+
+    """
+    if x2 is None:
+        x2 = x1
+    if metric == "sqeuclidean":
+        return euclidean_distances(x1, x2, squared=True, to_numpy=to_numpy)
+    elif metric == "euclidean":
+        return euclidean_distances(x1, x2, squared=False, to_numpy=to_numpy)
+    else:
+        raise NotImplementedError
+
+
+def to_gpu(*args):
+    """ Upload numpy arrays to GPU and return them"""
+    if len(args) > 1:
+        return (cp.asarray(x) for x in args)
+    else:
+        return cp.asarray(args[0])
+
+
+def to_np(*args):
+    """ convert GPU arras to numpy and return them"""
+    if len(args) > 1:
+        return (cp.asnumpy(x) for x in args)
+    else:
+        return cp.asnumpy(args[0])