6 files changed, 213 insertions, 18 deletions
diff --git a/ot/__init__.py b/ot/__init__.py
index d89b3a3..13abcda 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -20,6 +20,6 @@ from .utils import dist, unif, tic, toc, toq
 
 __version__ = "0.2"
 
-__all__ = ["emd", "emd2", "sinkhorn", "utils", 'datasets', 'bregman', 'lp', 
+__all__ = ["emd", "emd2", "sinkhorn", "utils", 'datasets', 'bregman', 'lp',
            'plot', 'tic', 'toc', 'toq',
            'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim']
diff --git a/ot/bregman.py b/ot/bregman.py
index 0453f14..c46e5dc 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -112,9 +112,11 @@ def sinkhorn(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False, log=Fa
     while (err>stopThr and cpt<numItermax):
         uprev = u
         vprev = v
-        v = np.divide(b,np.dot(K.T,u))
+        KtransposeU = np.dot(K.T, u)
+        v = np.divide(b, KtransposeU)
         u = 1./np.dot(Kp,v)
-        if (np.any(np.dot(K.T,u)==0) or
+
+        if (np.any(KtransposeU==0) or
            np.any(np.isnan(u)) or np.any(np.isnan(v)) or
            np.any(np.isinf(u)) or np.any(np.isinf(v))):
             # we have reached the machine precision
diff --git a/ot/da.py b/ot/da.py
index 81b6a35..d607e50 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -193,30 +193,30 @@ def sinkhorn_l1l2_gl(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
 
     """
     lstlab=np.unique(labels_a)
-                
+
     def f(G):
         res=0
         for i in range(G.shape[1]):
             for lab in lstlab:
                 temp=G[labels_a==lab,i]
-                res+=np.linalg.norm(temp)           
+                res+=np.linalg.norm(temp)
         return res
-        
+
     def df(G):
-        W=np.zeros(G.shape)    
+        W=np.zeros(G.shape)
         for i in range(G.shape[1]):
             for lab in lstlab:
                 temp=G[labels_a==lab,i]
                 n=np.linalg.norm(temp)
                 if n:
-                    W[labels_a==lab,i]=temp/n 
-        return W   
+                    W[labels_a==lab,i]=temp/n
+        return W
+
 
-            
     return gcg(a,b,M,reg,eta,f,df,G0=None,numItermax = numItermax,numInnerItermax=numInnerItermax, stopThr=stopInnerThr,verbose=verbose,log=log)
-    
-    
-    
+
+
+
 def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbose2=False,numItermax = 100,numInnerItermax = 10,stopInnerThr=1e-6,stopThr=1e-5,log=False,**kwargs):
     """Joint OT and linear mapping estimation as proposed in [8]
 
@@ -606,7 +606,7 @@ class OTDA(object):
         self.computed=False
 
 
-    def fit(self,xs,xt,ws=None,wt=None):
+    def fit(self,xs,xt,ws=None,wt=None,norm=None):
         """ Fit domain adaptation between samples is xs and xt (with optional weights)"""
         self.xs=xs
         self.xt=xt
@@ -620,6 +620,7 @@ class OTDA(object):
         self.wt=wt
 
         self.M=dist(xs,xt,metric=self.metric)
+        self.normalize()
         self.G=emd(ws,wt,self.M)
         self.computed=True
 
@@ -684,12 +685,25 @@ class OTDA(object):
         xf=self.interp(direction)# interp the source samples
         return xf[idx,:]+x-x0[idx,:] # aply the delta to the interpolation
 
+    def normalizeM(self, norm):
+        """
+        It may help to normalize the cost matrix self.M if there are numerical
+        errors during the sinkhorn based algorithms.
+        """
+        if norm == "median":
+            self.M /= float(np.median(self.M))
+        elif norm == "max":
+            self.M /= float(np.max(self.M))
+        elif norm == "log":
+            self.M = np.log(1 + self.M)
+        elif norm == "loglog":
+            self.M = np.log(1 + np.log(1 + self.M))
 
 
 class OTDA_sinkhorn(OTDA):
     """Class for domain adaptation with optimal transport with entropic regularization"""
 
-    def fit(self,xs,xt,reg=1,ws=None,wt=None,**kwargs):
+    def fit(self,xs,xt,reg=1,ws=None,wt=None,norm=None,**kwargs):
         """ Fit regularized domain adaptation between samples is xs and xt (with optional weights)"""
         self.xs=xs
         self.xt=xt
@@ -703,6 +717,7 @@ class OTDA_sinkhorn(OTDA):
         self.wt=wt
 
         self.M=dist(xs,xt,metric=self.metric)
+        self.normalizeM(norm)
         self.G=sinkhorn(ws,wt,self.M,reg,**kwargs)
         self.computed=True
 
@@ -711,7 +726,7 @@ class OTDA_lpl1(OTDA):
     """Class for domain adaptation with optimal transport with entropic and group regularization"""
 
 
-    def fit(self,xs,ys,xt,reg=1,eta=1,ws=None,wt=None,**kwargs):
+    def fit(self,xs,ys,xt,reg=1,eta=1,ws=None,wt=None,norm=None,**kwargs):
         """ Fit regularized domain adaptation between samples is xs and xt (with optional weights),  See ot.da.sinkhorn_lpl1_mm for fit parameters"""
         self.xs=xs
         self.xt=xt
@@ -725,14 +740,15 @@ class OTDA_lpl1(OTDA):
         self.wt=wt
 
         self.M=dist(xs,xt,metric=self.metric)
+        self.normalizeM(norm)
         self.G=sinkhorn_lpl1_mm(ws,ys,wt,self.M,reg,eta,**kwargs)
         self.computed=True
-        
+
 class OTDA_l1l2(OTDA):
     """Class for domain adaptation with optimal transport with entropic and group lasso regularization"""
 
 
-    def fit(self,xs,ys,xt,reg=1,eta=1,ws=None,wt=None,**kwargs):
+    def fit(self,xs,ys,xt,reg=1,eta=1,ws=None,wt=None,norm=None,**kwargs):
         """ Fit regularized domain adaptation between samples is xs and xt (with optional weights),  See ot.da.sinkhorn_lpl1_gl for fit parameters"""
         self.xs=xs
         self.xt=xt
@@ -746,6 +762,7 @@ class OTDA_l1l2(OTDA):
         self.wt=wt
 
         self.M=dist(xs,xt,metric=self.metric)
+        self.normalizeM(norm)
         self.G=sinkhorn_l1l2_gl(ws,ys,wt,self.M,reg,eta,**kwargs)
         self.computed=True
 
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
new file mode 100644
index 0000000..40b11c0
--- /dev/null
+++ b/ot/gpu/__init__.py
@@ -0,0 +1,7 @@
+# -*- coding: utf-8 -*-
+
+from . import bregman
+from . import da
+from .bregman import sinkhorn
+
+__all__ = ["bregman", "da", "sinkhorn"]
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
new file mode 100644
index 0000000..f91f15f
--- /dev/null
+++ b/ot/gpu/bregman.py
@@ -0,0 +1,83 @@
+# -*- coding: utf-8 -*-
+"""
+Bregman projections for regularized OT with GPU
+"""
+
+import numpy as np
+import cudamat
+
+
+def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
+                log=False):
+    # init data
+    Nini = len(a)
+    Nfin = len(b)
+
+    if log:
+        log = {'err': []}
+
+    # we assume that no distances are null except those of the diagonal of
+    # distances
+    u = (np.ones(Nini)/Nini).reshape((Nini, 1))
+    u_GPU = cudamat.CUDAMatrix(u)
+    a_GPU = cudamat.CUDAMatrix(a.reshape((Nini, 1)))
+    ones_GPU = cudamat.empty(u_GPU.shape).assign(1)
+    v = (np.ones(Nfin)/Nfin).reshape((Nfin, 1))
+    v_GPU = cudamat.CUDAMatrix(v)
+    b_GPU = cudamat.CUDAMatrix(b.reshape((Nfin, 1)))
+
+    M_GPU.divide(-reg)
+
+    K_GPU = cudamat.exp(M_GPU)
+
+    ones_GPU.divide(a_GPU, target=a_GPU)
+    Kp_GPU = cudamat.empty(K_GPU.shape)
+    K_GPU.mult_by_col(a_GPU, target=Kp_GPU)
+
+    tmp_GPU = cudamat.empty(K_GPU.shape)
+
+    cpt = 0
+    err = 1
+    while (err > stopThr and cpt < numItermax):
+        uprev_GPU = u_GPU.copy()
+        vprev_GPU = v_GPU.copy()
+
+        KtransposeU_GPU = K_GPU.transpose().dot(u_GPU)
+        b_GPU.divide(KtransposeU_GPU, target=v_GPU)
+        ones_GPU.divide(Kp_GPU.dot(v_GPU), target=u_GPU)
+
+        if (np.any(KtransposeU_GPU.asarray() == 0) or
+           not u_GPU.allfinite() or not v_GPU.allfinite()):
+            # we have reached the machine precision
+            # come back to previous solution and quit loop
+            print('Warning: numerical errors at iteration', cpt)
+            u_GPU = uprev_GPU.copy()
+            v_GPU = vprev_GPU.copy()
+            break
+        if cpt % 10 == 0:
+            # we can speed up the process by checking for the error only all
+            # the 10th iterations
+            K_GPU.mult_by_col(u_GPU, target=tmp_GPU)
+            tmp_GPU.mult_by_row(v_GPU.transpose(), target=tmp_GPU)
+
+            bcopy_GPU = b_GPU.copy().transpose()
+            bcopy_GPU.add_sums(tmp_GPU, axis=0, beta=-1)
+            err = bcopy_GPU.euclid_norm()**2
+            if log:
+                log['err'].append(err)
+
+            if verbose:
+                if cpt % 200 == 0:
+                    print('{:5s}|{:12s}'.format('It.', 'Err')+'\n'+'-'*19)
+                print('{:5d}|{:8e}|'.format(cpt, err))
+        cpt += 1
+    if log:
+        log['u'] = u_GPU.asarray()
+        log['v'] = v_GPU.asarray()
+
+    K_GPU.mult_by_col(u_GPU, target=K_GPU)
+    K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU)
+    if log:
+        return K_GPU.asarray(), log
+    else:
+        return K_GPU.asarray()
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
new file mode 100644
index 0000000..0121e7b
--- /dev/null
+++ b/ot/gpu/da.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+"""
+Domain adaptation with optimal transport and GPU
+"""
+
+import numpy as np
+from ..utils import unif
+from ..da import OTDA
+from .bregman import sinkhorn
+import cudamat
+
+
+def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
+    # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m).
+    # First compute in c_GPU the squared euclidean distance. And return its
+    # square root. At each cell [i,j] of c, we want to have
+    # sum{k in range(f)} ( (a[i,k] - b[j,k])^2 ). We know that
+    # (a-b)^2 = a^2 -2ab +b^2. Thus we want to have in each cell of c:
+    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2).
+
+    a_GPU = cudamat.CUDAMatrix(a)
+    b_GPU = cudamat.CUDAMatrix(b)
+
+    # Multiply a by b transpose to obtain in each cell [i,j] of c the
+    # value sum{k in range(f)} ( a[i,k]b[j,k] )
+    c_GPU = cudamat.dot(a_GPU, b_GPU.transpose())
+    # multiply by -2 to have sum{k in range(f)} ( -2a[i,k]b[j,k] )
+    c_GPU.mult(-2)
+
+    # Compute the vectors of the sum of squared elements.
+    a_GPU = cudamat.pow(a_GPU, 2).sum(axis=1)
+    b_GPU = cudamat.pow(b_GPU, 2).sum(axis=1)
+
+    # Add the vectors in each columns (respectivly rows) of c.
+    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] )
+    c_GPU.add_col_vec(a_GPU)
+    # sum{k in range(f)} ( a[i,k]^2 -2a[i,k]b[j,k] +b[j,k]^2)
+    c_GPU.add_row_vec(b_GPU.transpose())
+
+    if not squared:
+        c_GPU = cudamat.sqrt(c_GPU)
+
+    if returnAsGPU:
+        return c_GPU
+    else:
+        return c_GPU.asarray()
+
+
+class OTDA_GPU(OTDA):
+    def normalizeM(self, norm):
+        if norm == "median":
+            self.M_GPU.divide(float(np.median(self.M_GPU.asarray())))
+        elif norm == "max":
+            self.M_GPU.divide(float(np.max(self.M_GPU.asarray())))
+        elif norm == "log":
+            self.M_GPU.add(1)
+            cudamat.log(self.M_GPU)
+        elif norm == "loglog":
+            self.M_GPU.add(1)
+            cudamat.log(self.M_GPU)
+            self.M_GPU.add(1)
+            cudamat.log(self.M_GPU)
+
+
+class OTDA_sinkhorn(OTDA_GPU):
+    def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs):
+        cudamat.init()
+        xs = np.asarray(xs, dtype=np.float64)
+        xt = np.asarray(xt, dtype=np.float64)
+
+        self.xs = xs
+        self.xt = xt
+
+        if wt is None:
+            wt = unif(xt.shape[0])
+        if ws is None:
+            ws = unif(xs.shape[0])
+
+        self.ws = ws
+        self.wt = wt
+
+        self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
+                                          squared=True)
+        self.normalizeM(norm)
+        self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs)
+        self.computed = True