add GPU implementation sinkhorn lpl1

author: Leo gautheron <gautheron@iv-cm-359.creatis.insa-lyon.fr> 2017-04-24 10:43:44 +0200
committer: Leo gautheron <gautheron@iv-cm-359.creatis.insa-lyon.fr> 2017-04-24 10:43:44 +0200
commit: e2cf8538b05f026d73c6033777699af77e7508b5 (patch)
tree: 65e3537011f45cb4200aea2e9fdc5d68ffe1de90 /ot/gpu/da.py
parent: 8fc74ed792cda0fc0073dab218f7fdc08bf3c1ba (diff)
1 files changed, 109 insertions, 0 deletions
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
index 0121e7b..700a5e4 100644
--- a/ot/gpu/da.py
+++ b/ot/gpu/da.py
@@ -11,6 +11,27 @@ import cudamat
 
 
 def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
+    """
+    Compute the pairwise euclidean distance between matrices a and b.
+
+
+    Parameters
+    ----------
+    a : np.ndarray (n, f)
+        first matrice
+    b : np.ndarray (m, f)
+        second matrice
+    returnAsGPU : boolean, optional (default False)
+        if True, returns cudamat matrix still on GPU, else return np.ndarray
+    squared : boolean, optional (default False)
+        if True, return squared euclidean distance matrice
+
+
+    Returns
+    -------
+    c : (n x m) np.ndarray or cudamat.CUDAMatrix
+        pairwise euclidean distance distance matrix
+    """
     # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m).
     # First compute in c_GPU the squared euclidean distance. And return its
     # square root. At each cell [i,j] of c, we want to have
@@ -46,6 +67,69 @@ def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False):
         return c_GPU.asarray()
 
 
+def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10,
+                     numInnerItermax=200, stopInnerThr=1e-9,
+                     unlabelledValue=-99, verbose=False, log=False):
+    p = 0.5
+    epsilon = 1e-3
+
+    # init data
+    Nfin = len(b)
+
+    indices_labels = []
+    classes = np.unique(labels_a)
+    for c in classes:
+        idxc, = np.where(labels_a == c)
+        indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1)))
+
+    Mreg_GPU = cudamat.empty(M_GPU.shape)
+    W_GPU = cudamat.empty(M_GPU.shape).assign(0)
+
+    for cpt in range(numItermax):
+        Mreg_GPU.assign(M_GPU)
+        Mreg_GPU.add_mult(W_GPU, eta)
+        transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax,
+                              stopThr=stopInnerThr, returnAsGPU=True)
+        # the transport has been computed. Check if classes are really
+        # separated
+        W_GPU.assign(1)
+        W_GPU = W_GPU.transpose()
+        all_majs_GPU = []
+        idx_unlabelled = -1
+        for (i, c) in enumerate(classes):
+            if c != unlabelledValue:
+                (_, nbRow) = indices_labels[i].shape
+                tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0)
+                transp_GPU.transpose().select_columns(indices_labels[i],
+                                                      tmpC_GPU)
+                majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon)
+                cudamat.pow(majs_GPU, (p-1))
+                majs_GPU.mult(p)
+                all_majs_GPU.append(majs_GPU)
+
+                tmpC_GPU.assign(0)
+                tmpC_GPU.add_col_vec(majs_GPU)
+                W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU)
+            else:
+                idx_unlabelled = i
+
+        # now we majorize the unlabelled (if there are any) by the min of
+        # the majorizations. do it only for unlabbled data
+        if idx_unlabelled != -1:
+            all_majs = np.array([m_GPU.asarray() for m_GPU in all_majs_GPU])
+            minMaj_GPU = (cudamat.CUDAMatrix(all_majs).min(axis=0)
+                          .transpose())
+            (_, nbRow) = indices_labels[idx_unlabelled].shape
+            tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0)
+
+            tmpC_GPU.add_col_vec(minMaj_GPU)
+            W_GPU.set_selected_columns(indices_labels[idx_unlabelled],
+                                       tmpC_GPU)
+        W_GPU = W_GPU.transpose()
+
+    return transp_GPU.asarray()
+
+
 class OTDA_GPU(OTDA):
     def normalizeM(self, norm):
         if norm == "median":
@@ -84,3 +168,28 @@ class OTDA_sinkhorn(OTDA_GPU):
         self.normalizeM(norm)
         self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs)
         self.computed = True
+
+
+class OTDA_lpl1(OTDA_GPU):
+    def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None,
+            **kwargs):
+        cudamat.init()
+        xs = np.asarray(xs, dtype=np.float64)
+        xt = np.asarray(xt, dtype=np.float64)
+
+        self.xs = xs
+        self.xt = xt
+
+        if wt is None:
+            wt = unif(xt.shape[0])
+        if ws is None:
+            ws = unif(xs.shape[0])
+
+        self.ws = ws
+        self.wt = wt
+
+        self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True,
+                                          squared=True)
+        self.normalizeM(norm)
+        self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs)
+        self.computed = True
author	Leo gautheron <gautheron@iv-cm-359.creatis.insa-lyon.fr>	2017-04-24 10:43:44 +0200
committer	Leo gautheron <gautheron@iv-cm-359.creatis.insa-lyon.fr>	2017-04-24 10:43:44 +0200
commit	e2cf8538b05f026d73c6033777699af77e7508b5 (patch)
tree	65e3537011f45cb4200aea2e9fdc5d68ffe1de90 /ot/gpu/da.py
parent	8fc74ed792cda0fc0073dab218f7fdc08bf3c1ba (diff)