From ca08b788af38a076f45f000003eb0e2f227d7fd5 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Mon, 24 Sep 2018 11:09:48 +0200
Subject: deprecate ot.gpu and remove OTDA classes from it

---
 ot/gpu/__init__.py |  5 ++++
 ot/gpu/da.py       | 69 ------------------------------------------------------
 2 files changed, 5 insertions(+), 69 deletions(-)

(limited to 'ot/gpu')

diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
index a2fdd3d..ed6dcc4 100644
--- a/ot/gpu/__init__.py
+++ b/ot/gpu/__init__.py
@@ -9,4 +9,9 @@ from .bregman import sinkhorn
 #
 # License: MIT License
 
+import warnings
+
+warnings.warn("the ot.gpu module is deprecated because cudamat in no longer maintained", DeprecationWarning,
+              stacklevel=2)
+
 __all__ = ["bregman", "da", "sinkhorn"]
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index 71a485a..85d43e6 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -13,7 +13,6 @@ Domain adaptation with optimal transport with GPU implementation
 
 import numpy as np
 from ..utils import unif
-from ..da import OTDA
 from .bregman import sinkhorn
 import cudamat
 
@@ -185,71 +184,3 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
         W_GPU = W_GPU.transpose()
 
     return transp_GPU.asarray()
-
-
-class OTDA_GPU(OTDA):
-
-    def normalizeM(self, norm):
-        if norm == "median":
-            self.M_GPU.divide(float(np.median(self.M_GPU.asarray())))
-        elif norm == "max":
-            self.M_GPU.divide(float(np.max(self.M_GPU.asarray())))
-        elif norm == "log":
-            self.M_GPU.add(1)
-            cudamat.log(self.M_GPU)
-        elif norm == "loglog":
-            self.M_GPU.add(1)
-            cudamat.log(self.M_GPU)
-            self.M_GPU.add(1)
-            cudamat.log(self.M_GPU)
-
-
-class OTDA_sinkhorn(OTDA_GPU):
-
-    def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs):
-        cudamat.init()
-        xs = np.asarray(xs, dtype=np.float64)
-        xt = np.asarray(xt, dtype=np.float64)
-
-        self.xs = xs
-        self.xt = xt
-
-        if wt is None:
-            wt = unif(xt.shape[0])
-        if ws is None:
-            ws = unif(xs.shape[0])
-
-        self.ws = ws
-        self.wt = wt
-
-        self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
-                                          squared=True)
-        self.normalizeM(norm)
-        self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs)
-        self.computed = True
-
-
-class OTDA_lpl1(OTDA_GPU):
-
-    def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None,
-            **kwargs):
-        cudamat.init()
-        xs = np.asarray(xs, dtype=np.float64)
-        xt = np.asarray(xt, dtype=np.float64)
-
-        self.xs = xs
-        self.xt = xt
-
-        if wt is None:
-            wt = unif(xt.shape[0])
-        if ws is None:
-            ws = unif(xs.shape[0])
-
-        self.ws = ws
-        self.wt = wt
-
-        self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
-                                          squared=True)
-        self.normalizeM(norm)
-        self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs)
-        self.computed = True
-- 
cgit v1.2.3


From d258c7d6936410cd78189445a0260d983f7684d6 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Mon, 24 Sep 2018 13:57:42 +0200
Subject: convert ot.gpu to cupy

---
 ot/gpu/__init__.py |   6 +-
 ot/gpu/bregman.py  | 143 +++++++++++++++++++++--------------
 ot/gpu/da.py       | 214 ++++++++++++-----------------------------------------
 ot/gpu/utils.py    | 100 +++++++++++++++++++++++++
 4 files changed, 239 insertions(+), 224 deletions(-)
 create mode 100644 ot/gpu/utils.py

(limited to 'ot/gpu')

diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
index a2fdd3d..de4825d 100644
--- a/ot/gpu/__init__.py
+++ b/ot/gpu/__init__.py
@@ -4,9 +4,13 @@ from . import bregman
 from . import da
 from .bregman import sinkhorn
 
+from . import utils
+from .utils import dist, to_gpu, to_np
+
+
 # Author: Remi Flamary <remi.flamary@unice.fr>
 #         Leo Gautheron <https://github.com/aje>
 #
 # License: MIT License
 
-__all__ = ["bregman", "da", "sinkhorn"]
+__all__ = ["utils", "dist", "sinkhorn"]
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
index 47939c4..912104c 100644
--- a/ot/gpu/bregman.py
+++ b/ot/gpu/bregman.py
@@ -8,14 +8,16 @@ Bregman projections for regularized OT with GPU
 #
 # License: MIT License
 
-import numpy as np
-import cudamat
+import cupy as np # np used for matrix computation
+import cupy as cp # cp used for cupy specific operations
+from . import utils
 
 
-def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
-                log=False, returnAsGPU=False):
-    r"""
-    Solve the entropic regularization optimal transport problem on GPU
+
+def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
+                   verbose=False, log=False, to_numpy=True, **kwargs):
+    """
+    Solve the entropic regularization optimal transport problem and return the OT matrix
 
     The function solves the following optimization problem:
 
@@ -40,9 +42,10 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
     ----------
     a : np.ndarray (ns,)
         samples weights in the source domain
-    b : np.ndarray (nt,)
-        samples in the target domain
-    M_GPU : cudamat.CUDAMatrix (ns,nt)
+    b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+        samples in the target domain, compute sinkhorn with multiple targets
+        and fixed M if b is a matrix (return OT loss + dual variables in log)
+    M : np.ndarray (ns,nt)
         loss matrix
     reg : float
         Regularization term >0
@@ -54,8 +57,7 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
         Print information along iterations
     log : bool, optional
         record log if True
-    returnAsGPU : bool, optional
-        return the OT matrix as a cudamat.CUDAMatrix
+
 
     Returns
     -------
@@ -88,60 +90,78 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
     ot.optim.cg : General regularized OT
 
     """
+
+    a = cp.asarray(a, dtype=np.float64)
+    b = cp.asarray(b, dtype=np.float64)
+    M = cp.asarray(M, dtype=np.float64)
+
+    if len(a) == 0:
+        a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
+    if len(b) == 0:
+        b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
+
     # init data
     Nini = len(a)
     Nfin = len(b)
 
+    if len(b.shape) > 1:
+        nbb = b.shape[1]
+    else:
+        nbb = 0
+
     if log:
         log = {'err': []}
 
     # we assume that no distances are null except those of the diagonal of
     # distances
-    u = (np.ones(Nini) / Nini).reshape((Nini, 1))
-    u_GPU = cudamat.CUDAMatrix(u)
-    a_GPU = cudamat.CUDAMatrix(a.reshape((Nini, 1)))
-    ones_GPU = cudamat.empty(u_GPU.shape).assign(1)
-    v = (np.ones(Nfin) / Nfin).reshape((Nfin, 1))
-    v_GPU = cudamat.CUDAMatrix(v)
-    b_GPU = cudamat.CUDAMatrix(b.reshape((Nfin, 1)))
-
-    M_GPU.divide(-reg)
+    if nbb:
+        u = np.ones((Nini, nbb)) / Nini
+        v = np.ones((Nfin, nbb)) / Nfin
+    else:
+        u = np.ones(Nini) / Nini
+        v = np.ones(Nfin) / Nfin
 
-    K_GPU = cudamat.exp(M_GPU)
+    # print(reg)
 
-    ones_GPU.divide(a_GPU, target=a_GPU)
-    Kp_GPU = cudamat.empty(K_GPU.shape)
-    K_GPU.mult_by_col(a_GPU, target=Kp_GPU)
+    # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
+    K = np.empty(M.shape, dtype=M.dtype)
+    np.divide(M, -reg, out=K)
+    np.exp(K, out=K)
 
-    tmp_GPU = cudamat.empty(K_GPU.shape)
+    # print(np.min(K))
+    tmp2 = np.empty(b.shape, dtype=M.dtype)
 
+    Kp = (1 / a).reshape(-1, 1) * K
     cpt = 0
     err = 1
     while (err > stopThr and cpt < numItermax):
-        uprev_GPU = u_GPU.copy()
-        vprev_GPU = v_GPU.copy()
+        uprev = u
+        vprev = v
 
-        KtransposeU_GPU = K_GPU.transpose().dot(u_GPU)
-        b_GPU.divide(KtransposeU_GPU, target=v_GPU)
-        ones_GPU.divide(Kp_GPU.dot(v_GPU), target=u_GPU)
+        KtransposeU = np.dot(K.T, u)
+        v = np.divide(b, KtransposeU)
+        u = 1. / np.dot(Kp, v)
 
-        if (np.any(KtransposeU_GPU.asarray() == 0) or
-                not u_GPU.allfinite() or not v_GPU.allfinite()):
+        if (np.any(KtransposeU == 0) or
+                np.any(np.isnan(u)) or np.any(np.isnan(v)) or
+                np.any(np.isinf(u)) or np.any(np.isinf(v))):
             # we have reached the machine precision
             # come back to previous solution and quit loop
             print('Warning: numerical errors at iteration', cpt)
-            u_GPU = uprev_GPU.copy()
-            v_GPU = vprev_GPU.copy()
+            u = uprev
+            v = vprev
             break
         if cpt % 10 == 0:
             # we can speed up the process by checking for the error only all
             # the 10th iterations
-            K_GPU.mult_by_col(u_GPU, target=tmp_GPU)
-            tmp_GPU.mult_by_row(v_GPU.transpose(), target=tmp_GPU)
-
-            bcopy_GPU = b_GPU.copy().transpose()
-            bcopy_GPU.add_sums(tmp_GPU, axis=0, beta=-1)
-            err = bcopy_GPU.euclid_norm()**2
+            if nbb:
+                err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
+                    np.sum((v - vprev)**2) / np.sum((v)**2)
+            else:
+                # compute right marginal tmp2= (diag(u)Kdiag(v))^T1
+                tmp2=np.sum(u[:,None]*K*v[None,:],0)
+                #tmp2=np.einsum('i,ij,j->j', u, K, v)
+                err = np.linalg.norm(tmp2 - b)**2  # violation of marginal
             if log:
                 log['err'].append(err)
 
@@ -150,20 +170,31 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
                     print(
                         '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
                 print('{:5d}|{:8e}|'.format(cpt, err))
-        cpt += 1
-    if log:
-        log['u'] = u_GPU.asarray()
-        log['v'] = v_GPU.asarray()
-
-    K_GPU.mult_by_col(u_GPU, target=K_GPU)
-    K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU)
-
-    if returnAsGPU:
-        res = K_GPU
-    else:
-        res = K_GPU.asarray()
-
+        cpt = cpt + 1
     if log:
-        return res, log
-    else:
-        return res
+        log['u'] = u
+        log['v'] = v
+
+    if nbb:  # return only loss
+        #res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) (explodes cupy memory)
+        res=np.empty(nbb)
+        for i in range(nbb):
+            res[i]=np.sum(u[:,None,i]*(K*M)*v[None,:,i])
+        if to_numpy:
+            res=utils.to_np(res)            
+        if log:
+            return res, log
+        else:
+            return res
+
+    else:  # return OT matrix
+        res=u.reshape((-1, 1)) * K * v.reshape((1, -1))
+        if to_numpy:
+            res=utils.to_np(res)
+        if log:
+            return res, log
+        else:
+            return res
+
+# define sinkhorn as sinkhorn_knopp
+sinkhorn=sinkhorn_knopp
\ No newline at end of file
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index 71a485a..8bcc2aa 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -10,81 +10,24 @@ Domain adaptation with optimal transport with GPU implementation
 #
 # License: MIT License
 
-
-import numpy as np
-from ..utils import unif
-from ..da import OTDA
+import cupy as np # np used for matrix computation
+import cupy as cp # cp used for cupy specific operations
+import numpy as npp
+from . import utils
 from .bregman import sinkhorn
-import cudamat
-
-
-def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
-    """
-    Compute the pairwise euclidean distance between matrices a and b.
-
-
-    Parameters
-    ----------
-    a : np.ndarray (n, f)
-        first matrice
-    b : np.ndarray (m, f)
-        second matrice
-    returnAsGPU : boolean, optional (default False)
-        if True, returns cudamat matrix still on GPU, else return np.ndarray
-    squared : boolean, optional (default False)
-        if True, return squared euclidean distance matrice
-
 
-    Returns
-    -------
-    c : (n x m) np.ndarray or cudamat.CUDAMatrix
-        pairwise euclidean distance distance matrix
+def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
+                     numInnerItermax=200, stopInnerThr=1e-9, verbose=False,
+                     log=False,to_numpy=True):
     """
-    # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m).
-    # First compute in c_GPU the squared euclidean distance. And return its
-    # square root. At each cell [i,j] of c, we want to have
-    # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that
-    # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c:
-    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2).
-
-    a_GPU = cudamat.CUDAMatrix(a)
-    b_GPU = cudamat.CUDAMatrix(b)
-
-    # Multiply a by b transpose to obtain in each cell [i,j] of c the
-    # value sum{k in range(f)} ( a[i,k]b[j,k] )
-    c_GPU = cudamat.dot(a_GPU, b_GPU.transpose())
-    # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] )
-    c_GPU.mult(-2)
-
-    # Compute the vectors of the sum of squared elements.
-    a_GPU = cudamat.pow(a_GPU, 2).sum(axis=1)
-    b_GPU = cudamat.pow(b_GPU, 2).sum(axis=1)
-
-    # Add the vectors in each columns (respectivly rows) of c.
-    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] )
-    c_GPU.add_col_vec(a_GPU)
-    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2)
-    c_GPU.add_row_vec(b_GPU.transpose())
-
-    if not squared:
-        c_GPU = cudamat.sqrt(c_GPU)
-
-    if returnAsGPU:
-        return c_GPU
-    else:
-        return c_GPU.asarray()
-
-
-def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
-                     numInnerItermax=200, stopInnerThr=1e-9,
-                     verbose=False, log=False):
-    """
-    Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization
+    Solve the entropic regularization optimal transport problem with nonconvex
+    group lasso regularization
 
     The function solves the following optimization problem:
 
     .. math::
-        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ \eta \Omega_g(\gamma)
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)
+        + \eta \Omega_g(\gamma)
 
         s.t. \gamma 1 = a
 
@@ -94,11 +37,16 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
     where :
 
     - M is the (ns,nt) metric cost matrix
-    - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-    - :math:`\Omega_g` is the group lasso  regulaization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1`   where  :math:`\mathcal{I}_c` are the index of samples from class c in the source domain.
+    - :math:`\Omega_e` is the entropic regularization term
+        :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - :math:`\Omega_g` is the group lasso  regulaization term
+      :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1`
+      where  :math:`\mathcal{I}_c` are the index of samples from class c
+      in the source domain.
     - a and b are source and target weights (sum to 1)
 
-    The algorithm used for solving the problem is the generalised conditional gradient as proposed in  [5]_ [7]_
+    The algorithm used for solving the problem is the generalised conditional
+    gradient as proposed in  [5]_ [7]_
 
 
     Parameters
@@ -109,7 +57,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
         labels of samples in the source domain
     b : np.ndarray (nt,)
         samples weights in the target domain
-    M_GPU : cudamat.CUDAMatrix (ns,nt)
+    M : np.ndarray (ns,nt)
         loss matrix
     reg : float
         Regularization term for entropic regularization >0
@@ -138,8 +86,13 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
     References
     ----------
 
-    .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
-    .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567.
+    .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+       "Optimal Transport for Domain Adaptation," in IEEE
+       Transactions on Pattern Analysis and Machine Intelligence ,
+       vol.PP, no.99, pp.1-1
+    .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
+       Generalized conditional gradient: analysis of convergence
+       and applications. arXiv preprint arXiv:1510.06567.
 
     See Also
     --------
@@ -148,108 +101,35 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
     ot.optim.cg : General regularized OT
 
     """
+    
+    a, labels_a, b, M = utils.to_gpu(a, labels_a, b, M)
+    
+    
     p = 0.5
     epsilon = 1e-3
-    Nfin = len(b)
 
     indices_labels = []
-    classes = np.unique(labels_a)
+    labels_a2=cp.asnumpy(labels_a)
+    classes = npp.unique(labels_a2)
     for c in classes:
-        idxc, = np.where(labels_a == c)
-        indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1)))
+        idxc, = utils.to_gpu(npp.where(labels_a2 == c))
+        indices_labels.append(idxc)
 
-    Mreg_GPU = cudamat.empty(M_GPU.shape)
-    W_GPU = cudamat.empty(M_GPU.shape).assign(0)
+    W = np.zeros(M.shape)
 
     for cpt in range(numItermax):
-        Mreg_GPU.assign(M_GPU)
-        Mreg_GPU.add_mult(W_GPU, eta)
-        transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax,
-                              stopThr=stopInnerThr, returnAsGPU=True)
+        Mreg = M + eta * W
+        transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax,
+                          stopThr=stopInnerThr,to_numpy=False)
         # the transport has been computed. Check if classes are really
         # separated
-        W_GPU.assign(1)
-        W_GPU = W_GPU.transpose()
+        W = np.ones(M.shape)
         for (i, c) in enumerate(classes):
-            (_, nbRow) = indices_labels[i].shape
-            tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0)
-            transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU)
-            majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon)
-            cudamat.pow(majs_GPU, (p - 1))
-            majs_GPU.mult(p)
-
-            tmpC_GPU.assign(0)
-            tmpC_GPU.add_col_vec(majs_GPU)
-            W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU)
-
-        W_GPU = W_GPU.transpose()
-
-    return transp_GPU.asarray()
+            majs = np.sum(transp[indices_labels[i]], axis=0)
+            majs = p * ((majs + epsilon)**(p - 1))
+            W[indices_labels[i]] = majs
 
-
-class OTDA_GPU(OTDA):
-
-    def normalizeM(self, norm):
-        if norm == "median":
-            self.M_GPU.divide(float(np.median(self.M_GPU.asarray())))
-        elif norm == "max":
-            self.M_GPU.divide(float(np.max(self.M_GPU.asarray())))
-        elif norm == "log":
-            self.M_GPU.add(1)
-            cudamat.log(self.M_GPU)
-        elif norm == "loglog":
-            self.M_GPU.add(1)
-            cudamat.log(self.M_GPU)
-            self.M_GPU.add(1)
-            cudamat.log(self.M_GPU)
-
-
-class OTDA_sinkhorn(OTDA_GPU):
-
-    def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs):
-        cudamat.init()
-        xs = np.asarray(xs, dtype=np.float64)
-        xt = np.asarray(xt, dtype=np.float64)
-
-        self.xs = xs
-        self.xt = xt
-
-        if wt is None:
-            wt = unif(xt.shape[0])
-        if ws is None:
-            ws = unif(xs.shape[0])
-
-        self.ws = ws
-        self.wt = wt
-
-        self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
-                                          squared=True)
-        self.normalizeM(norm)
-        self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs)
-        self.computed = True
-
-
-class OTDA_lpl1(OTDA_GPU):
-
-    def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None,
-            **kwargs):
-        cudamat.init()
-        xs = np.asarray(xs, dtype=np.float64)
-        xt = np.asarray(xt, dtype=np.float64)
-
-        self.xs = xs
-        self.xt = xt
-
-        if wt is None:
-            wt = unif(xt.shape[0])
-        if ws is None:
-            ws = unif(xs.shape[0])
-
-        self.ws = ws
-        self.wt = wt
-
-        self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
-                                          squared=True)
-        self.normalizeM(norm)
-        self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs)
-        self.computed = True
+    if to_numpy:
+        return utils.to_np(transp)
+    else:
+        return transp
diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py
new file mode 100644
index 0000000..6d0c853
--- /dev/null
+++ b/ot/gpu/utils.py
@@ -0,0 +1,100 @@
+# -*- coding: utf-8 -*-
+"""
+Utility functions for GPU 
+"""
+
+# Author: Remi Flamary <remi.flamary@unice.fr>
+#         Nicolas Courty <ncourty@irisa.fr>
+#         Leo Gautheron <https://github.com/aje>
+#
+# License: MIT License
+
+import cupy as np # np used for matrix computation
+import cupy as cp # cp used for cupy specific operations
+
+
+
+def euclidean_distances(a, b, squared=False, to_numpy=True):
+    """
+    Compute the pairwise euclidean distance between matrices a and b.
+     Parameters
+    ----------
+    a : np.ndarray (n, f)
+        first matrix
+    b : np.ndarray (m, f)
+        second matrix
+    gpu : boolean, optional (default False)
+        if True and the module cupy is available, the computation is done
+        on the GPU and the type of the matrix returned is cupy.ndarray.
+        Otherwise, compute on the CPU and returns np.ndarray.
+    squared : boolean, optional (default False)
+        if True, return squared euclidean distance matrix
+     Returns
+    -------
+    c : (n x m) np.ndarray or cupy.ndarray
+        pairwise euclidean distance distance matrix
+    """
+    
+    a, b = to_gpu(a, b)
+    
+    a2=np.sum(np.square(a),1)
+    b2=np.sum(np.square(b),1)
+    
+    c=-2*np.dot(a,b.T)
+    c+=a2[:,None]
+    c+=b2[None,:]
+    
+    if not squared:
+        np.sqrt(c, out=c)
+    if to_numpy:
+        return to_np(c)
+    else:
+        return c
+
+def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True):
+    """Compute distance between samples in x1 and x2 on gpu
+
+    Parameters
+    ----------
+
+    x1 : np.array (n1,d)
+        matrix with n1 samples of size d
+    x2 : np.array (n2,d), optional
+        matrix with n2 samples of size d (if None then x2=x1)
+    metric : str  
+        Metric from 'sqeuclidean', 'euclidean', 
+
+
+    Returns
+    -------
+
+    M : np.array (n1,n2)
+        distance matrix computed with given metric
+
+    """
+    if x2 is None:
+        x2 = x1
+    if metric == "sqeuclidean":
+        return euclidean_distances(x1, x2, squared=True, to_numpy=to_numpy)
+    elif metric == "euclidean":
+        return euclidean_distances(x1, x2, squared=False, to_numpy=to_numpy)
+    else:
+        raise NotImplementedError
+    
+
+
+def to_gpu(*args):
+    """ Upload numpy arrays to GPU and return them"""
+    if len(args) > 1:
+        return (cp.asarray(x) for x in args)
+    else:
+        return cp.asarray(args[0])
+
+
+
+def to_np(*args):
+    """ convert GPU arras to numpy and return them"""
+    if len(args) > 1:
+        return (cp.asnumpy(x) for x in args)
+    else:
+        return cp.asnumpy(args[0])
\ No newline at end of file
-- 
cgit v1.2.3


From f45f7a68b221ec5b619b8fd8de797815a1eecf43 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Mon, 24 Sep 2018 14:30:44 +0200
Subject: pep8

---
 ot/gpu/bregman.py | 20 ++++++++++----------
 ot/gpu/da.py      | 16 ++++++++--------
 ot/gpu/utils.py   | 34 ++++++++++++++++------------------
 3 files changed, 34 insertions(+), 36 deletions(-)

(limited to 'ot/gpu')

diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
index 912104c..6714098 100644
--- a/ot/gpu/bregman.py
+++ b/ot/gpu/bregman.py
@@ -8,12 +8,11 @@ Bregman projections for regularized OT with GPU
 #
 # License: MIT License
 
-import cupy as np # np used for matrix computation
-import cupy as cp # cp used for cupy specific operations
+import cupy as np  # np used for matrix computation
+import cupy as cp  # cp used for cupy specific operations
 from . import utils
 
 
-
 def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
                    verbose=False, log=False, to_numpy=True, **kwargs):
     """
@@ -159,7 +158,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
                     np.sum((v - vprev)**2) / np.sum((v)**2)
             else:
                 # compute right marginal tmp2= (diag(u)Kdiag(v))^T1
-                tmp2=np.sum(u[:,None]*K*v[None,:],0)
+                tmp2 = np.sum(u[:, None] * K * v[None, :], 0)
                 #tmp2=np.einsum('i,ij,j->j', u, K, v)
                 err = np.linalg.norm(tmp2 - b)**2  # violation of marginal
             if log:
@@ -177,24 +176,25 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
 
     if nbb:  # return only loss
         #res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) (explodes cupy memory)
-        res=np.empty(nbb)
+        res = np.empty(nbb)
         for i in range(nbb):
-            res[i]=np.sum(u[:,None,i]*(K*M)*v[None,:,i])
+            res[i] = np.sum(u[:, None, i] * (K * M) * v[None, :, i])
         if to_numpy:
-            res=utils.to_np(res)            
+            res = utils.to_np(res)
         if log:
             return res, log
         else:
             return res
 
     else:  # return OT matrix
-        res=u.reshape((-1, 1)) * K * v.reshape((1, -1))
+        res = u.reshape((-1, 1)) * K * v.reshape((1, -1))
         if to_numpy:
-            res=utils.to_np(res)
+            res = utils.to_np(res)
         if log:
             return res, log
         else:
             return res
 
+
 # define sinkhorn as sinkhorn_knopp
-sinkhorn=sinkhorn_knopp
\ No newline at end of file
+sinkhorn = sinkhorn_knopp
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index 8bcc2aa..8c63870 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -10,15 +10,16 @@ Domain adaptation with optimal transport with GPU implementation
 #
 # License: MIT License
 
-import cupy as np # np used for matrix computation
-import cupy as cp # cp used for cupy specific operations
+import cupy as np  # np used for matrix computation
+import cupy as cp  # cp used for cupy specific operations
 import numpy as npp
 from . import utils
 from .bregman import sinkhorn
 
+
 def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
                      numInnerItermax=200, stopInnerThr=1e-9, verbose=False,
-                     log=False,to_numpy=True):
+                     log=False, to_numpy=True):
     """
     Solve the entropic regularization optimal transport problem with nonconvex
     group lasso regularization
@@ -101,15 +102,14 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
     ot.optim.cg : General regularized OT
 
     """
-    
+
     a, labels_a, b, M = utils.to_gpu(a, labels_a, b, M)
-    
-    
+
     p = 0.5
     epsilon = 1e-3
 
     indices_labels = []
-    labels_a2=cp.asnumpy(labels_a)
+    labels_a2 = cp.asnumpy(labels_a)
     classes = npp.unique(labels_a2)
     for c in classes:
         idxc, = utils.to_gpu(npp.where(labels_a2 == c))
@@ -120,7 +120,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
     for cpt in range(numItermax):
         Mreg = M + eta * W
         transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax,
-                          stopThr=stopInnerThr,to_numpy=False)
+                          stopThr=stopInnerThr, to_numpy=False)
         # the transport has been computed. Check if classes are really
         # separated
         W = np.ones(M.shape)
diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py
index 6d0c853..d349a6d 100644
--- a/ot/gpu/utils.py
+++ b/ot/gpu/utils.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 """
-Utility functions for GPU 
+Utility functions for GPU
 """
 
 # Author: Remi Flamary <remi.flamary@unice.fr>
@@ -9,9 +9,8 @@ Utility functions for GPU
 #
 # License: MIT License
 
-import cupy as np # np used for matrix computation
-import cupy as cp # cp used for cupy specific operations
-
+import cupy as np  # np used for matrix computation
+import cupy as cp  # cp used for cupy specific operations
 
 
 def euclidean_distances(a, b, squared=False, to_numpy=True):
@@ -34,16 +33,16 @@ def euclidean_distances(a, b, squared=False, to_numpy=True):
     c : (n x m) np.ndarray or cupy.ndarray
         pairwise euclidean distance distance matrix
     """
-    
+
     a, b = to_gpu(a, b)
-    
-    a2=np.sum(np.square(a),1)
-    b2=np.sum(np.square(b),1)
-    
-    c=-2*np.dot(a,b.T)
-    c+=a2[:,None]
-    c+=b2[None,:]
-    
+
+    a2 = np.sum(np.square(a), 1)
+    b2 = np.sum(np.square(b), 1)
+
+    c = -2 * np.dot(a, b.T)
+    c += a2[:, None]
+    c += b2[None, :]
+
     if not squared:
         np.sqrt(c, out=c)
     if to_numpy:
@@ -51,6 +50,7 @@ def euclidean_distances(a, b, squared=False, to_numpy=True):
     else:
         return c
 
+
 def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True):
     """Compute distance between samples in x1 and x2 on gpu
 
@@ -61,8 +61,8 @@ def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True):
         matrix with n1 samples of size d
     x2 : np.array (n2,d), optional
         matrix with n2 samples of size d (if None then x2=x1)
-    metric : str  
-        Metric from 'sqeuclidean', 'euclidean', 
+    metric : str
+        Metric from 'sqeuclidean', 'euclidean',
 
 
     Returns
@@ -80,7 +80,6 @@ def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True):
         return euclidean_distances(x1, x2, squared=False, to_numpy=to_numpy)
     else:
         raise NotImplementedError
-    
 
 
 def to_gpu(*args):
@@ -91,10 +90,9 @@ def to_gpu(*args):
         return cp.asarray(args[0])
 
 
-
 def to_np(*args):
     """ convert GPU arras to numpy and return them"""
     if len(args) > 1:
         return (cp.asnumpy(x) for x in args)
     else:
-        return cp.asnumpy(args[0])
\ No newline at end of file
+        return cp.asnumpy(args[0])
-- 
cgit v1.2.3


From 75e78022d2df350ea220cee1b5e759ef9fc35a5b Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Mon, 24 Sep 2018 15:05:09 +0200
Subject: update tests

---
 ot/gpu/bregman.py | 10 +++----
 test/test_gpu.py  | 89 ++++++++++++++++++++++++++++++-------------------------
 2 files changed, 53 insertions(+), 46 deletions(-)

(limited to 'ot/gpu')

diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
index 6714098..600ead4 100644
--- a/ot/gpu/bregman.py
+++ b/ot/gpu/bregman.py
@@ -90,14 +90,14 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
 
     """
 
-    a = cp.asarray(a, dtype=np.float64)
-    b = cp.asarray(b, dtype=np.float64)
-    M = cp.asarray(M, dtype=np.float64)
+    a = cp.asarray(a)
+    b = cp.asarray(b)
+    M = cp.asarray(M)
 
     if len(a) == 0:
-        a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
+        a = np.ones((M.shape[0],)) / M.shape[0]
     if len(b) == 0:
-        b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
+        b = np.ones((M.shape[1],)) / M.shape[1]
 
     # init data
     Nini = len(a)
diff --git a/test/test_gpu.py b/test/test_gpu.py
index 1e97c45..51a0cff 100644
--- a/test/test_gpu.py
+++ b/test/test_gpu.py
@@ -17,63 +17,70 @@ except ImportError:
 
 
 @pytest.mark.skipif(nogpu, reason="No GPU available")
-def test_gpu_sinkhorn():
+def test_gpu_dist():
 
     rng = np.random.RandomState(0)
 
-    def describe_res(r):
-        print("min:{:.3E}, max::{:.3E}, mean::{:.3E}, std::{:.3E}".format(
-            np.min(r), np.max(r), np.mean(r), np.std(r)))
-
     for n_samples in [50, 100, 500, 1000]:
         print(n_samples)
         a = rng.rand(n_samples // 4, 100)
         b = rng.rand(n_samples, 100)
-        time1 = time.time()
-        transport = ot.da.OTDA_sinkhorn()
-        transport.fit(a, b)
-        G1 = transport.G
-        time2 = time.time()
-        transport = ot.gpu.da.OTDA_sinkhorn()
-        transport.fit(a, b)
-        G2 = transport.G
-        time3 = time.time()
-        print("Normal sinkhorn, time: {:6.2f} sec ".format(time2 - time1))
-        describe_res(G1)
-        print("   GPU sinkhorn, time: {:6.2f} sec ".format(time3 - time2))
-        describe_res(G2)
-
-        np.testing.assert_allclose(G1, G2, rtol=1e-5, atol=1e-5)
+
+        M = ot.dist(a.copy(), b.copy())
+        M2 = ot.gpu.dist(a.copy(), b.copy())
+
+        np.testing.assert_allclose(M, M2, rtol=1e-10)
+
+        M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False)
 
 
 @pytest.mark.skipif(nogpu, reason="No GPU available")
-def test_gpu_sinkhorn_lpl1():
+def test_gpu_sinkhorn():
 
     rng = np.random.RandomState(0)
 
-    def describe_res(r):
-        print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}"
-              .format(np.min(r), np.max(r), np.mean(r), np.std(r)))
+    for n_samples in [50, 100, 500, 1000]:
+        a = rng.rand(n_samples // 4, 100)
+        b = rng.rand(n_samples, 100)
+
+        wa = ot.unif(n_samples // 4)
+        wb = ot.unif(n_samples)
+
+        M = ot.dist(a.copy(), b.copy())
+        M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False)
+
+        reg = 1
+
+        G = ot.sinkhorn(wa, wb, M, reg)
+        G1 = ot.gpu.sinkhorn(wa, wb, M, reg)
+
+        np.testing.assert_allclose(G1, G, rtol=1e-10)
+
+        G2 = ot.gpu.sinkhorn(wa, wb, M2, reg, to_numpy=False)
+
+
+@pytest.mark.skipif(nogpu, reason="No GPU available")
+def test_gpu_sinkhorn_lpl1():
+
+    rng = np.random.RandomState(0)
 
     for n_samples in [50, 100, 500]:
         print(n_samples)
         a = rng.rand(n_samples // 4, 100)
         labels_a = np.random.randint(10, size=(n_samples // 4))
         b = rng.rand(n_samples, 100)
-        time1 = time.time()
-        transport = ot.da.OTDA_lpl1()
-        transport.fit(a, labels_a, b)
-        G1 = transport.G
-        time2 = time.time()
-        transport = ot.gpu.da.OTDA_lpl1()
-        transport.fit(a, labels_a, b)
-        G2 = transport.G
-        time3 = time.time()
-        print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format(
-            time2 - time1))
-        describe_res(G1)
-        print("   GPU sinkhorn lpl1, time: {:6.2f} sec ".format(
-            time3 - time2))
-        describe_res(G2)
-
-        np.testing.assert_allclose(G1, G2, rtol=1e-3, atol=1e-3)
+
+        wa = ot.unif(n_samples // 4)
+        wb = ot.unif(n_samples)
+
+        M = ot.dist(a.copy(), b.copy())
+        M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False)
+
+        reg = 1
+
+        G = ot.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M, reg)
+        G1 = ot.gpu.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M, reg)
+
+        np.testing.assert_allclose(G1, G, rtol=1e-10)
+
+        G2 = ot.gpu.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M2, reg, to_numpy=False)
-- 
cgit v1.2.3


From c8c397b6e1747d6593ba139d0a4f825fe39f5cc6 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Mon, 24 Sep 2018 15:35:40 +0200
Subject: removed unused import

---
 ot/gpu/da.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'ot/gpu')

diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index 85d43e6..ac3f8d7 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -12,7 +12,7 @@ Domain adaptation with optimal transport with GPU implementation
 
 
 import numpy as np
-from ..utils import unif
+#from ..utils import unif
 from .bregman import sinkhorn
 import cudamat
 
-- 
cgit v1.2.3


From ee8ed4fa101861eec9e578f09aee4367af593af1 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Fri, 28 Sep 2018 09:41:22 +0200
Subject: update documentation

---
 README.md           | 10 +++-------
 docs/source/all.rst |  6 ++++++
 docs/source/conf.py |  2 +-
 ot/gpu/__init__.py  | 22 +++++++++++++++++++++-
 ot/gpu/bregman.py   |  7 ++++++-
 ot/gpu/da.py        |  8 +++++++-
 ot/gpu/utils.py     | 15 +++++++++------
 7 files changed, 53 insertions(+), 17 deletions(-)

(limited to 'ot/gpu')

diff --git a/README.md b/README.md
index 5f37ad6..e49c8da 100644
--- a/README.md
+++ b/README.md
@@ -14,7 +14,7 @@ This open source Python library provide several solvers for optimization problem
 It provides the following solvers:
 
 * OT Network Flow solver for the linear program/ Earth Movers Distance [1].
-* Entropic regularization OT solver with Sinkhorn Knopp Algorithm [2] and stabilized version [9][10] with optional GPU implementation (requires cudamat).
+* Entropic regularization OT solver with Sinkhorn Knopp Algorithm [2] and stabilized version [9][10] with optional GPU implementation (requires cupy).
 * Smooth optimal transport solvers (dual and semi-dual) for KL and squared L2 regularizations [17].
 * Non regularized Wasserstein barycenters [16] with LP solver (only small scale).
 * Bregman projections for Wasserstein barycenter [3] and unmixing [4].
@@ -83,12 +83,8 @@ Some sub-modules require additional dependences which are discussed below
 ```
 pip install pymanopt autograd
 ```
-* **ot.gpu** (GPU accelerated OT) depends on cudamat that have to be installed with:
-```
-git clone https://github.com/cudamat/cudamat.git
-cd cudamat
-python setup.py install --user # for user install (no root)
-```
+* **ot.gpu** (GPU accelerated OT) depends on cupy that have to be installed following instructions on [this page](https://docs-cupy.chainer.org/en/stable/install.html).
+
 
 obviously you need CUDA installed and a compatible GPU.
 
diff --git a/docs/source/all.rst b/docs/source/all.rst
index 9459023..1eaf3b1 100644
--- a/docs/source/all.rst
+++ b/docs/source/all.rst
@@ -48,6 +48,12 @@ ot.da
 
 .. automodule:: ot.da
   :members:
+  
+ot.gpu
+--------
+
+.. automodule:: ot.gpu
+  :members:
 
 ot.dr
 --------
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 114245d..433eca6 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -31,7 +31,7 @@ class Mock(MagicMock):
     @classmethod
     def __getattr__(cls, name):
         return MagicMock()
-MOCK_MODULES = ['ot.lp.emd_wrap','autograd','pymanopt','cudamat','autograd.numpy','pymanopt.manifolds','pymanopt.solvers']
+MOCK_MODULES = ['ot.lp.emd_wrap','autograd','pymanopt','cupy','autograd.numpy','pymanopt.manifolds','pymanopt.solvers']
 # 'autograd.numpy','pymanopt.manifolds','pymanopt.solvers',
 sys.modules.update((mod_name, Mock()) for mod_name in MOCK_MODULES)
 # !!!!
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
index de4825d..9de2c40 100644
--- a/ot/gpu/__init__.py
+++ b/ot/gpu/__init__.py
@@ -1,8 +1,28 @@
 # -*- coding: utf-8 -*-
+"""
+
+
+This module implement GPU ilmplementation for several OT solvers and utility 
+functions. The GPU backend in handled by `cupy 
+<https://cupy.chainer.org/>`_.
+
+By default, the functions in this module accept and return numpy arrays 
+in order to proide drop-in replacement for the other POT function but
+the transfer between CPU en GPU comes with a significant overhead.
+
+In order to get the best erformances, we recommend to given only cupy 
+arrays to the functions and desactivate the conversion to numpy of the 
+result of the function with parameter ``to_numpy=False``.
+
+
+
+
+"""
 
 from . import bregman
 from . import da
 from .bregman import sinkhorn
+from .da 
 
 from . import utils
 from .utils import dist, to_gpu, to_np
@@ -13,4 +33,4 @@ from .utils import dist, to_gpu, to_np
 #
 # License: MIT License
 
-__all__ = ["utils", "dist", "sinkhorn"]
+__all__ = ["utils", "dist", "sinkhorn", 'bregman', 'da', 'to_gpu', 'to_np']
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
index 600ead4..978b307 100644
--- a/ot/gpu/bregman.py
+++ b/ot/gpu/bregman.py
@@ -16,7 +16,10 @@ from . import utils
 def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
                    verbose=False, log=False, to_numpy=True, **kwargs):
     """
-    Solve the entropic regularization optimal transport problem and return the OT matrix
+    Solve the entropic regularization optimal transport on GPU
+
+    If the input matrix are in numpy format, they will be uploaded to the
+    GPU first which can incur significant time overhead.
 
     The function solves the following optimization problem:
 
@@ -56,6 +59,8 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
         Print information along iterations
     log : bool, optional
         record log if True
+    to_numpy : boolean, optional (default True)
+        If true convert back the GPU array result to numpy format.
 
 
     Returns
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index 8c63870..6aba29c 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -22,7 +22,11 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
                      log=False, to_numpy=True):
     """
     Solve the entropic regularization optimal transport problem with nonconvex
-    group lasso regularization
+    group lasso regularization on GPU
+
+    If the input matrix are in numpy format, they will be uploaded to the
+    GPU first which can incur significant time overhead.
+
 
     The function solves the following optimization problem:
 
@@ -74,6 +78,8 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
         Print information along iterations
     log : bool, optional
         record log if True
+    to_numpy : boolean, optional (default True)
+        If true convert back the GPU array result to numpy format.
 
 
     Returns
diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py
index d349a6d..41e168a 100644
--- a/ot/gpu/utils.py
+++ b/ot/gpu/utils.py
@@ -16,19 +16,22 @@ import cupy as cp  # cp used for cupy specific operations
 def euclidean_distances(a, b, squared=False, to_numpy=True):
     """
     Compute the pairwise euclidean distance between matrices a and b.
-     Parameters
+
+    If the input matrix are in numpy format, they will be uploaded to the
+    GPU first which can incur significant time overhead.
+
+    Parameters
     ----------
     a : np.ndarray (n, f)
         first matrix
     b : np.ndarray (m, f)
         second matrix
-    gpu : boolean, optional (default False)
-        if True and the module cupy is available, the computation is done
-        on the GPU and the type of the matrix returned is cupy.ndarray.
-        Otherwise, compute on the CPU and returns np.ndarray.
+    to_numpy : boolean, optional (default True)
+        If true convert back the GPU array result to numpy format.
     squared : boolean, optional (default False)
         if True, return squared euclidean distance matrix
-     Returns
+
+    Returns
     -------
     c : (n x m) np.ndarray or cupy.ndarray
         pairwise euclidean distance distance matrix
-- 
cgit v1.2.3


From fa7f3ddbed0267edf634e359ce5b3a335807af3c Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Fri, 28 Sep 2018 09:45:21 +0200
Subject: correction import in ot.gpu

---
 ot/gpu/__init__.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'ot/gpu')

diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
index 9de2c40..0187a4f 100644
--- a/ot/gpu/__init__.py
+++ b/ot/gpu/__init__.py
@@ -1,8 +1,7 @@
 # -*- coding: utf-8 -*-
 """
 
-
-This module implement GPU ilmplementation for several OT solvers and utility 
+This module provides GPU implementation for several OT solvers and utility 
 functions. The GPU backend in handled by `cupy 
 <https://cupy.chainer.org/>`_.
 
@@ -22,7 +21,7 @@ result of the function with parameter ``to_numpy=False``.
 from . import bregman
 from . import da
 from .bregman import sinkhorn
-from .da 
+from .da import sinkhorn_lpl1_mm
 
 from . import utils
 from .utils import dist, to_gpu, to_np
@@ -33,4 +32,5 @@ from .utils import dist, to_gpu, to_np
 #
 # License: MIT License
 
-__all__ = ["utils", "dist", "sinkhorn", 'bregman', 'da', 'to_gpu', 'to_np']
+__all__ = ["utils", "dist", "sinkhorn",
+           "sinkhorn_lpl1_mm", 'bregman', 'da', 'to_gpu', 'to_np']
-- 
cgit v1.2.3


From 287c4c0747ea5341c989c0c85daf0ed1d5a7cdf0 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Fri, 28 Sep 2018 15:43:03 +0200
Subject: update realease.md

---
 RELEASES.md    | 11 +++++++----
 ot/__init__.py |  2 +-
 ot/gpu/da.py   |  4 +++-
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'ot/gpu')

diff --git a/RELEASES.md b/RELEASES.md
index 68abcb3..57ea61d 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -1,10 +1,10 @@
 # POT Releases
 
 
-## 0.5.0b 
+## 0.5.0 Year 2
 *Sep 2018*
 
-*This is a beta release and is still a work in progress*
+POT is 2 years old! This release bring both numerous new features to the toolbox as listed below but also several 
 
 
 #### Features
@@ -12,7 +12,7 @@
 * Add non regularized Gromov-Wasserstein solver  (PR #41)
 * Linear OT mapping between empirical distributions and 90\% test coverage (PR #42)
 * Add log parameter in class EMDTransport and SinkhornLpL1Transport (PR #44)
-* Add Marddown format for Pipy (PR #45)
+* Add Markdown format for Pipy (PR #45)
 * Test for Python 3.5 and 3.6 on Travis (PR #46)
 * Non regularized Wasserstein barycenter with scipy linear solver and/or cvxopt (PR #47)
 * Rename dataset functions to be more sklearn compliant (PR #49)
@@ -22,10 +22,13 @@
 * Speed-up Sinkhorn function (PR #57 and PR #58)
 * Add convolutional Wassersein barycenters for 2D images (PR #64) 
 * Add Greedy Sinkhorn variant (Greenkhorn) (PR #66)
+* Big ot.gpu update with cupy implementation (instead of un-maintained cudamat) (PR #67)
 
 #### Deprecation
 
-Deprecated OTDA Classes were removed for version 0.5 (PR #48), it has been a year and the deprecation message. 
+Deprecated OTDA Classes were removed for version 0.5 (PR #48 and PR #67). The 
+deprecation messagehas been for a year here since 0.4 and it is time to pull 
+the plug.
 
 #### Closed issues
 
diff --git a/ot/__init__.py b/ot/__init__.py
index fa6600d..b27541d 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -29,7 +29,7 @@ from .da import sinkhorn_lpl1_mm
 # utils functions
 from .utils import dist, unif, tic, toc, toq
 
-__version__ = "0.5.0b"
+__version__ = "0.5.0"
 
 __all__ = ["emd", "emd2", "sinkhorn", "sinkhorn2", "utils", 'datasets',
            'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov',
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index f5e7daa..7f7b2b0 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -133,7 +133,9 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
         # separated
         W = np.ones(M.shape)
         for (i, c) in enumerate(classes):
-<<<<<<< HEAD
+
+
+<< << << < HEAD
             (_, nbRow) = indices_labels[i].shape
             tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0)
             transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU)
-- 
cgit v1.2.3


From 49af12288b4a6e45d1ff85f3e39d8d76839b2d5f Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Fri, 28 Sep 2018 15:46:31 +0200
Subject: correction bug merge ot.gpu.da

---
 ot/gpu/da.py | 18 ------------------
 1 file changed, 18 deletions(-)

(limited to 'ot/gpu')

diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index 7f7b2b0..4a98038 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -134,23 +134,6 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
         W = np.ones(M.shape)
         for (i, c) in enumerate(classes):
 
-
-<< << << < HEAD
-            (_, nbRow) = indices_labels[i].shape
-            tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0)
-            transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU)
-            majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon)
-            cudamat.pow(majs_GPU, (p - 1))
-            majs_GPU.mult(p)
-
-            tmpC_GPU.assign(0)
-            tmpC_GPU.add_col_vec(majs_GPU)
-            W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU)
-
-        W_GPU = W_GPU.transpose()
-
-    return transp_GPU.asarray()
-=======
             majs = np.sum(transp[indices_labels[i]], axis=0)
             majs = p * ((majs + epsilon)**(p - 1))
             W[indices_labels[i]] = majs
@@ -159,4 +142,3 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
         return utils.to_np(transp)
     else:
         return transp
->>>>>>> master
-- 
cgit v1.2.3


From b6977571c1788152b85b756f2a09b1f47b99aac5 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Wed, 3 Oct 2018 08:27:36 +0200
Subject: update releases.md

---
 RELEASES.md        | 31 ++++++++++++++++++++++++++-----
 ot/gpu/__init__.py |  2 +-
 2 files changed, 27 insertions(+), 6 deletions(-)

(limited to 'ot/gpu')

diff --git a/RELEASES.md b/RELEASES.md
index 57ea61d..93af09c 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -4,8 +4,29 @@
 ## 0.5.0 Year 2
 *Sep 2018*
 
-POT is 2 years old! This release bring both numerous new features to the toolbox as listed below but also several 
-
+POT is 2 years old! This release brings numerous new features to the 
+toolbox as listed below but also several bug correction.
+
+Among the new features, we can highlight a [non-regularized Gromov-Wasserstein 
+solver](https://github.com/rflamary/POT/blob/master/notebooks/plot_gromov.ipynb), 
+a new [greedy variant of sinkhorn](https://pot.readthedocs.io/en/latest/all.html#ot.bregman.greenkhorn),  
+[non-regularized](https://pot.readthedocs.io/en/latest/all.html#ot.lp.barycenter), 
+[convolutional (2D)](https://github.com/rflamary/POT/blob/master/notebooks/plot_convolutional_barycenter.ipynb)
+and [free support](https://github.com/rflamary/POT/blob/master/notebooks/plot_free_support_barycenter.ipynb)
+ Wasserstein barycenters and [smooth](https://github.com/rflamary/POT/blob/prV0.5/notebooks/plot_OT_1D_smooth.ipynb) 
+ and [stochastic](https://pot.readthedocs.io/en/latest/all.html#ot.stochastic.sgd_entropic_regularization) 
+implementation of entropic OT.
+
+POT 0.5 also comes with a rewriting of ot.gpu using the cupy framework instead of 
+the unmaintained cudamat. Note that while we tried to keed changes to the 
+minimum, the OTDA classes were deprecated.
+
+The code quality has also improved with 92% code coverage in tests that is now 
+printed to the log in the Travis builds. The documentation has also been 
+greatly improved with new modules and examples/notebooks.
+
+This new release is so full of new stuff and corrections thanks to the old
+and new POT contributors (you can see the list in the readme).
 
 #### Features
 
@@ -26,9 +47,9 @@ POT is 2 years old! This release bring both numerous new features to the toolbox
 
 #### Deprecation
 
-Deprecated OTDA Classes were removed for version 0.5 (PR #48 and PR #67). The 
-deprecation messagehas been for a year here since 0.4 and it is time to pull 
-the plug.
+Deprecated OTDA Classes were removed from ot.da and ot.gpu for version 0.5 
+(PR #48 and PR #67). The deprecation message has been for a year here since 
+0.4 and it is time to pull the plug.
 
 #### Closed issues
 
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
index 213578c..deda6b1 100644
--- a/ot/gpu/__init__.py
+++ b/ot/gpu/__init__.py
@@ -9,7 +9,7 @@ By default, the functions in this module accept and return numpy arrays
 in order to proide drop-in replacement for the other POT function but
 the transfer between CPU en GPU comes with a significant overhead.
 
-In order to get the best erformances, we recommend to given only cupy 
+In order to get the best erformances, we recommend to give only cupy 
 arrays to the functions and desactivate the conversion to numpy of the 
 result of the function with parameter ``to_numpy=False``.
 
-- 
cgit v1.2.3


From 93db239e1156ad1db8edbb13c1ecde973ce009c0 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Mon, 19 Nov 2018 11:17:07 +0100
Subject: remove W605 errors

---
 ot/bregman.py            | 18 +++++++++---------
 ot/externals/funcsigs.py | 46 +++++++++++++++++++++++-----------------------
 ot/gpu/bregman.py        |  6 +++---
 ot/stochastic.py         | 20 ++++++++++----------
 setup.cfg                |  2 +-
 5 files changed, 46 insertions(+), 46 deletions(-)

(limited to 'ot/gpu')

diff --git a/ot/bregman.py b/ot/bregman.py
index d1057ff..43340f7 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -370,9 +370,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
         v = np.divide(b, KtransposeU)
         u = 1. / np.dot(Kp, v)
 
-        if (np.any(KtransposeU == 0) or
-                np.any(np.isnan(u)) or np.any(np.isnan(v)) or
-                np.any(np.isinf(u)) or np.any(np.isinf(v))):
+        if (np.any(KtransposeU == 0)
+                or np.any(np.isnan(u)) or np.any(np.isnan(v))
+                or np.any(np.isinf(u)) or np.any(np.isinf(v))):
             # we have reached the machine precision
             # come back to previous solution and quit loop
             print('Warning: numerical errors at iteration', cpt)
@@ -683,13 +683,13 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
 
     def get_K(alpha, beta):
         """log space computation"""
-        return np.exp(-(M - alpha.reshape((na, 1)) -
-                        beta.reshape((1, nb))) / reg)
+        return np.exp(-(M - alpha.reshape((na, 1))
+                        - beta.reshape((1, nb))) / reg)
 
     def get_Gamma(alpha, beta, u, v):
         """log space gamma computation"""
-        return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) /
-                      reg + np.log(u.reshape((na, 1))) + np.log(v.reshape((1, nb))))
+        return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb)))
+                      / reg + np.log(u.reshape((na, 1))) + np.log(v.reshape((1, nb))))
 
     # print(np.min(K))
 
@@ -899,8 +899,8 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
 
     def get_K(alpha, beta):
         """log space computation"""
-        return np.exp(-(M - alpha.reshape((na, 1)) -
-                        beta.reshape((1, nb))) / reg)
+        return np.exp(-(M - alpha.reshape((na, 1))
+                        - beta.reshape((1, nb))) / reg)
 
     # print(np.min(K))
     def get_reg(n):  # exponential decreasing
diff --git a/ot/externals/funcsigs.py b/ot/externals/funcsigs.py
index c73fdc9..106bde7 100644
--- a/ot/externals/funcsigs.py
+++ b/ot/externals/funcsigs.py
@@ -126,8 +126,8 @@ def signature(obj):
                 new_params[arg_name] = param.replace(default=arg_value,
                                                      _partial_kwarg=True)
 
-            elif (param.kind not in (_VAR_KEYWORD, _VAR_POSITIONAL) and
-                  not param._partial_kwarg):
+            elif (param.kind not in (_VAR_KEYWORD, _VAR_POSITIONAL)
+                  and not param._partial_kwarg):
                 new_params.pop(arg_name)
 
         return sig.replace(parameters=new_params.values())
@@ -333,11 +333,11 @@ class Parameter(object):
         raise TypeError(msg)
 
     def __eq__(self, other):
-        return (issubclass(other.__class__, Parameter) and
-                self._name == other._name and
-                self._kind == other._kind and
-                self._default == other._default and
-                self._annotation == other._annotation)
+        return (issubclass(other.__class__, Parameter)
+                and self._name == other._name
+                and self._kind == other._kind
+                and self._default == other._default
+                and self._annotation == other._annotation)
 
     def __ne__(self, other):
         return not self.__eq__(other)
@@ -372,8 +372,8 @@ class BoundArguments(object):
     def args(self):
         args = []
         for param_name, param in self._signature.parameters.items():
-            if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or
-                    param._partial_kwarg):
+            if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY)
+                    or param._partial_kwarg):
                 # Keyword arguments mapped by 'functools.partial'
                 # (Parameter._partial_kwarg is True) are mapped
                 # in 'BoundArguments.kwargs', along with VAR_KEYWORD &
@@ -402,8 +402,8 @@ class BoundArguments(object):
         kwargs_started = False
         for param_name, param in self._signature.parameters.items():
             if not kwargs_started:
-                if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or
-                        param._partial_kwarg):
+                if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY)
+                        or param._partial_kwarg):
                     kwargs_started = True
                 else:
                     if param_name not in self.arguments:
@@ -432,9 +432,9 @@ class BoundArguments(object):
         raise TypeError(msg)
 
     def __eq__(self, other):
-        return (issubclass(other.__class__, BoundArguments) and
-                self.signature == other.signature and
-                self.arguments == other.arguments)
+        return (issubclass(other.__class__, BoundArguments)
+                and self.signature == other.signature
+                and self.arguments == other.arguments)
 
     def __ne__(self, other):
         return not self.__eq__(other)
@@ -612,9 +612,9 @@ class Signature(object):
         raise TypeError(msg)
 
     def __eq__(self, other):
-        if (not issubclass(type(other), Signature) or
-            self.return_annotation != other.return_annotation or
-                len(self.parameters) != len(other.parameters)):
+        if (not issubclass(type(other), Signature)
+            or self.return_annotation != other.return_annotation
+                or len(self.parameters) != len(other.parameters)):
             return False
 
         other_positions = dict((param, idx)
@@ -635,8 +635,8 @@ class Signature(object):
                 except KeyError:
                     return False
                 else:
-                    if (idx != other_idx or
-                            param != other.parameters[param_name]):
+                    if (idx != other_idx
+                            or param != other.parameters[param_name]):
                         return False
 
         return True
@@ -688,8 +688,8 @@ class Signature(object):
                             raise TypeError(msg)
                         parameters_ex = (param,)
                         break
-                    elif (param.kind == _VAR_KEYWORD or
-                          param.default is not _empty):
+                    elif (param.kind == _VAR_KEYWORD
+                          or param.default is not _empty):
                         # That's fine too - we have a default value for this
                         # parameter.  So, lets start parsing `kwargs`, starting
                         # with the current parameter
@@ -755,8 +755,8 @@ class Signature(object):
                 # if it has a default value, or it is an '*args'-like
                 # parameter, left alone by the processing of positional
                 # arguments.
-                if (not partial and param.kind != _VAR_POSITIONAL and
-                        param.default is _empty):
+                if (not partial and param.kind != _VAR_POSITIONAL
+                        and param.default is _empty):
                     raise TypeError('{arg!r} parameter lacking default value'.
                                     format(arg=param_name))
 
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
index 978b307..3031ed9 100644
--- a/ot/gpu/bregman.py
+++ b/ot/gpu/bregman.py
@@ -146,9 +146,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
         v = np.divide(b, KtransposeU)
         u = 1. / np.dot(Kp, v)
 
-        if (np.any(KtransposeU == 0) or
-                np.any(np.isnan(u)) or np.any(np.isnan(v)) or
-                np.any(np.isinf(u)) or np.any(np.isinf(v))):
+        if (np.any(KtransposeU == 0)
+                or np.any(np.isnan(u)) or np.any(np.isnan(v))
+                or np.any(np.isinf(u)) or np.any(np.isinf(v))):
             # we have reached the machine precision
             # come back to previous solution and quit loop
             print('Warning: numerical errors at iteration', cpt)
diff --git a/ot/stochastic.py b/ot/stochastic.py
index ec53015..1376884 100644
--- a/ot/stochastic.py
+++ b/ot/stochastic.py
@@ -418,8 +418,8 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
         return None
 
     opt_alpha = c_transform_entropic(b, M, reg, opt_beta)
-    pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) *
-          a[:, None] * b[None, :])
+    pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg)
+          * a[:, None] * b[None, :])
 
     if log:
         log = {}
@@ -520,15 +520,15 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
                       arXiv preprint arxiv:1711.02283.
     '''
 
-    G = - (np.exp((alpha[batch_alpha, None] + beta[None, batch_beta] -
-                   M[batch_alpha, :][:, batch_beta]) / reg) *
+    G = - (np.exp((alpha[batch_alpha, None] + beta[None, batch_beta]
+                   - M[batch_alpha, :][:, batch_beta]) / reg) *
            a[batch_alpha, None] * b[None, batch_beta])
     grad_beta = np.zeros(np.shape(M)[1])
     grad_alpha = np.zeros(np.shape(M)[0])
-    grad_beta[batch_beta] = (b[batch_beta] * len(batch_alpha) / np.shape(M)[0] +
-                             G.sum(0))
-    grad_alpha[batch_alpha] = (a[batch_alpha] * len(batch_beta) /
-                               np.shape(M)[1] + G.sum(1))
+    grad_beta[batch_beta] = (b[batch_beta] * len(batch_alpha) / np.shape(M)[0]
+                             + G.sum(0))
+    grad_alpha[batch_alpha] = (a[batch_alpha] * len(batch_beta)
+                               / np.shape(M)[1] + G.sum(1))
 
     return grad_alpha, grad_beta
 
@@ -702,8 +702,8 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
 
     opt_alpha, opt_beta = sgd_entropic_regularization(a, b, M, reg, batch_size,
                                                       numItermax, lr)
-    pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) *
-          a[:, None] * b[None, :])
+    pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg)
+          * a[:, None] * b[None, :])
     if log:
         log = {}
         log['alpha'] = opt_alpha
diff --git a/setup.cfg b/setup.cfg
index b2a2415..24512d2 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -3,4 +3,4 @@ description-file = README.md
 
 [flake8]
 exclude = __init__.py
-ignore = E265,E501
+ignore = E265,E501,W605
-- 
cgit v1.2.3


From de04afc0f9f01fc09a3a8138865eacc0b6f4415d Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Mon, 19 Nov 2018 11:18:29 +0100
Subject: update flake8 parameters

---
 ot/gpu/bregman.py |  6 +++---
 ot/stochastic.py  | 12 ++++++------
 setup.cfg         |  2 +-
 3 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'ot/gpu')

diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
index 3031ed9..978b307 100644
--- a/ot/gpu/bregman.py
+++ b/ot/gpu/bregman.py
@@ -146,9 +146,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
         v = np.divide(b, KtransposeU)
         u = 1. / np.dot(Kp, v)
 
-        if (np.any(KtransposeU == 0)
-                or np.any(np.isnan(u)) or np.any(np.isnan(v))
-                or np.any(np.isinf(u)) or np.any(np.isinf(v))):
+        if (np.any(KtransposeU == 0) or
+                np.any(np.isnan(u)) or np.any(np.isnan(v)) or
+                np.any(np.isinf(u)) or np.any(np.isinf(v))):
             # we have reached the machine precision
             # come back to previous solution and quit loop
             print('Warning: numerical errors at iteration', cpt)
diff --git a/ot/stochastic.py b/ot/stochastic.py
index 1376884..959c6fa 100644
--- a/ot/stochastic.py
+++ b/ot/stochastic.py
@@ -418,8 +418,8 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
         return None
 
     opt_alpha = c_transform_entropic(b, M, reg, opt_beta)
-    pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg)
-          * a[:, None] * b[None, :])
+    pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) *
+          a[:, None] * b[None, :])
 
     if log:
         log = {}
@@ -520,8 +520,8 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
                       arXiv preprint arxiv:1711.02283.
     '''
 
-    G = - (np.exp((alpha[batch_alpha, None] + beta[None, batch_beta]
-                   - M[batch_alpha, :][:, batch_beta]) / reg) *
+    G = - (np.exp((alpha[batch_alpha, None] + beta[None, batch_beta] -
+                   M[batch_alpha, :][:, batch_beta]) / reg) *
            a[batch_alpha, None] * b[None, batch_beta])
     grad_beta = np.zeros(np.shape(M)[1])
     grad_alpha = np.zeros(np.shape(M)[0])
@@ -702,8 +702,8 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
 
     opt_alpha, opt_beta = sgd_entropic_regularization(a, b, M, reg, batch_size,
                                                       numItermax, lr)
-    pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg)
-          * a[:, None] * b[None, :])
+    pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) *
+          a[:, None] * b[None, :])
     if log:
         log = {}
         log['alpha'] = opt_alpha
diff --git a/setup.cfg b/setup.cfg
index 24512d2..aa0ff62 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -3,4 +3,4 @@ description-file = README.md
 
 [flake8]
 exclude = __init__.py
-ignore = E265,E501,W605
+ignore = E265,E501,W605,W503,W504
-- 
cgit v1.2.3