3 files changed, 146 insertions, 3 deletions
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
index f91f15f..cc610b7 100644
--- a/ot/gpu/bregman.py
+++ b/ot/gpu/bregman.py
@@ -8,7 +8,7 @@ import cudamat
 
 
 def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
-                log=False):
+                log=False, returnAsGPU=False):
     # init data
     Nini = len(a)
     Nfin = len(b)
@@ -77,7 +77,13 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False,
 
     K_GPU.mult_by_col(u_GPU, target=K_GPU)
     K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU)
+
+    if returnAsGPU:
+        res = K_GPU
+    else:
+        res = K_GPU.asarray()
+
     if log:
-        return K_GPU.asarray(), log
+        return res, log
     else:
-        return K_GPU.asarray()
+        return res
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index 0121e7b..700a5e4 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -11,6 +11,27 @@ import cudamat
 
 
 def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
+    """
+    Compute the pairwise euclidean distance between matrices a and b.
+
+
+    Parameters
+    ----------
+    a : np.ndarray (n, f)
+        first matrice
+    b : np.ndarray (m, f)
+        second matrice
+    returnAsGPU : boolean, optional (default False)
+        if True, returns cudamat matrix still on GPU, else return np.ndarray
+    squared : boolean, optional (default False)
+        if True, return squared euclidean distance matrice
+
+
+    Returns
+    -------
+    c : (n x m) np.ndarray or cudamat.CUDAMatrix
+        pairwise euclidean distance distance matrix
+    """
     # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m).
     # First compute in c_GPU the squared euclidean distance. And return its
     # square root. At each cell [i,j] of c, we want to have
@@ -46,6 +67,69 @@ def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
         return c_GPU.asarray()
 
 
+def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
+                     numInnerItermax=200, stopInnerThr=1e-9,
+                     unlabelledValue=-99, verbose=False, log=False):
+    p = 0.5
+    epsilon = 1e-3
+
+    # init data
+    Nfin = len(b)
+
+    indices_labels = []
+    classes = np.unique(labels_a)
+    for c in classes:
+        idxc, = np.where(labels_a == c)
+        indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1)))
+
+    Mreg_GPU = cudamat.empty(M_GPU.shape)
+    W_GPU = cudamat.empty(M_GPU.shape).assign(0)
+
+    for cpt in range(numItermax):
+        Mreg_GPU.assign(M_GPU)
+        Mreg_GPU.add_mult(W_GPU, eta)
+        transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax,
+                              stopThr=stopInnerThr, returnAsGPU=True)
+        # the transport has been computed. Check if classes are really
+        # separated
+        W_GPU.assign(1)
+        W_GPU = W_GPU.transpose()
+        all_majs_GPU = []
+        idx_unlabelled = -1
+        for (i, c) in enumerate(classes):
+            if c != unlabelledValue:
+                (_, nbRow) = indices_labels[i].shape
+                tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0)
+                transp_GPU.transpose().select_columns(indices_labels[i],
+                                                      tmpC_GPU)
+                majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon)
+                cudamat.pow(majs_GPU, (p-1))
+                majs_GPU.mult(p)
+                all_majs_GPU.append(majs_GPU)
+
+                tmpC_GPU.assign(0)
+                tmpC_GPU.add_col_vec(majs_GPU)
+                W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU)
+            else:
+                idx_unlabelled = i
+
+        # now we majorize the unlabelled (if there are any) by the min of
+        # the majorizations. do it only for unlabbled data
+        if idx_unlabelled != -1:
+            all_majs = np.array([m_GPU.asarray() for m_GPU in all_majs_GPU])
+            minMaj_GPU = (cudamat.CUDAMatrix(all_majs).min(axis=0)
+                          .transpose())
+            (_, nbRow) = indices_labels[idx_unlabelled].shape
+            tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0)
+
+            tmpC_GPU.add_col_vec(minMaj_GPU)
+            W_GPU.set_selected_columns(indices_labels[idx_unlabelled],
+                                       tmpC_GPU)
+        W_GPU = W_GPU.transpose()
+
+    return transp_GPU.asarray()
+
+
 class OTDA_GPU(OTDA):
     def normalizeM(self, norm):
         if norm == "median":
@@ -84,3 +168,28 @@ class OTDA_sinkhorn(OTDA_GPU):
         self.normalizeM(norm)
         self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs)
         self.computed = True
+
+
+class OTDA_lpl1(OTDA_GPU):
+    def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None,
+            **kwargs):
+        cudamat.init()
+        xs = np.asarray(xs, dtype=np.float64)
+        xt = np.asarray(xt, dtype=np.float64)
+
+        self.xs = xs
+        self.xt = xt
+
+        if wt is None:
+            wt = unif(xt.shape[0])
+        if ws is None:
+            ws = unif(xs.shape[0])
+
+        self.ws = ws
+        self.wt = wt
+
+        self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
+                                          squared=True)
+        self.normalizeM(norm)
+        self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs)
+        self.computed = True
diff --git a/test/test_gpu_sinkhorn_lpl1.py b/test/test_gpu_sinkhorn_lpl1.py
new file mode 100644
index 0000000..e6cdd31
--- /dev/null
+++ b/test/test_gpu_sinkhorn_lpl1.py
@@ -0,0 +1,28 @@
+import ot
+import numpy as np
+import time
+import ot.gpu
+
+
+def describeRes(r):
+    print("min:{:.3E}, max:{:.3E}, mean:{:.3E}, std:{:.3E}"
+          .format(np.min(r), np.max(r), np.mean(r), np.std(r)))
+
+for n in [5000, 10000, 15000, 20000]:
+    print(n)
+    a = np.random.rand(n // 4, 100)
+    labels_a = np.random.randint(10, size=(n // 4))
+    b = np.random.rand(n, 100)
+    time1 = time.time()
+    transport = ot.da.OTDA_lpl1()
+    transport.fit(a, labels_a, b)
+    G1 = transport.G
+    time2 = time.time()
+    transport = ot.gpu.da.OTDA_lpl1()
+    transport.fit(a, labels_a, b)
+    G2 = transport.G
+    time3 = time.time()
+    print("Normal sinkhorn lpl1, time: {:6.2f} sec ".format(time2 - time1))
+    describeRes(G1)
+    print("   GPU sinkhorn lpl1, time: {:6.2f} sec ".format(time3 - time2))
+    describeRes(G2)