diff options
author | Leo gautheron <gautheron@iv-cm-359.creatis.insa-lyon.fr> | 2017-04-24 10:43:44 +0200 |
---|---|---|
committer | Leo gautheron <gautheron@iv-cm-359.creatis.insa-lyon.fr> | 2017-04-24 10:43:44 +0200 |
commit | e2cf8538b05f026d73c6033777699af77e7508b5 (patch) | |
tree | 65e3537011f45cb4200aea2e9fdc5d68ffe1de90 /ot/gpu/da.py | |
parent | 8fc74ed792cda0fc0073dab218f7fdc08bf3c1ba (diff) |
add GPU implementation sinkhorn lpl1
Diffstat (limited to 'ot/gpu/da.py')
-rw-r--r-- | ot/gpu/da.py | 109 |
1 files changed, 109 insertions, 0 deletions
diff --git a/ot/gpu/da.py b/ot/gpu/da.py index 0121e7b..700a5e4 100644 --- a/ot/gpu/da.py +++ b/ot/gpu/da.py @@ -11,6 +11,27 @@ import cudamat def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False): + """ + Compute the pairwise euclidean distance between matrices a and b. + + + Parameters + ---------- + a : np.ndarray (n, f) + first matrice + b : np.ndarray (m, f) + second matrice + returnAsGPU : boolean, optional (default False) + if True, returns cudamat matrix still on GPU, else return np.ndarray + squared : boolean, optional (default False) + if True, return squared euclidean distance matrice + + + Returns + ------- + c : (n x m) np.ndarray or cudamat.CUDAMatrix + pairwise euclidean distance distance matrix + """ # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m). # First compute in c_GPU the squared euclidean distance. And return its # square root. At each cell [i,j] of c, we want to have @@ -46,6 +67,69 @@ def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False): return c_GPU.asarray() +def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, + numInnerItermax=200, stopInnerThr=1e-9, + unlabelledValue=-99, verbose=False, log=False): + p = 0.5 + epsilon = 1e-3 + + # init data + Nfin = len(b) + + indices_labels = [] + classes = np.unique(labels_a) + for c in classes: + idxc, = np.where(labels_a == c) + indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1))) + + Mreg_GPU = cudamat.empty(M_GPU.shape) + W_GPU = cudamat.empty(M_GPU.shape).assign(0) + + for cpt in range(numItermax): + Mreg_GPU.assign(M_GPU) + Mreg_GPU.add_mult(W_GPU, eta) + transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax, + stopThr=stopInnerThr, returnAsGPU=True) + # the transport has been computed. Check if classes are really + # separated + W_GPU.assign(1) + W_GPU = W_GPU.transpose() + all_majs_GPU = [] + idx_unlabelled = -1 + for (i, c) in enumerate(classes): + if c != unlabelledValue: + (_, nbRow) = indices_labels[i].shape + tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0) + transp_GPU.transpose().select_columns(indices_labels[i], + tmpC_GPU) + majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon) + cudamat.pow(majs_GPU, (p-1)) + majs_GPU.mult(p) + all_majs_GPU.append(majs_GPU) + + tmpC_GPU.assign(0) + tmpC_GPU.add_col_vec(majs_GPU) + W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU) + else: + idx_unlabelled = i + + # now we majorize the unlabelled (if there are any) by the min of + # the majorizations. do it only for unlabbled data + if idx_unlabelled != -1: + all_majs = np.array([m_GPU.asarray() for m_GPU in all_majs_GPU]) + minMaj_GPU = (cudamat.CUDAMatrix(all_majs).min(axis=0) + .transpose()) + (_, nbRow) = indices_labels[idx_unlabelled].shape + tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0) + + tmpC_GPU.add_col_vec(minMaj_GPU) + W_GPU.set_selected_columns(indices_labels[idx_unlabelled], + tmpC_GPU) + W_GPU = W_GPU.transpose() + + return transp_GPU.asarray() + + class OTDA_GPU(OTDA): def normalizeM(self, norm): if norm == "median": @@ -84,3 +168,28 @@ class OTDA_sinkhorn(OTDA_GPU): self.normalizeM(norm) self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs) self.computed = True + + +class OTDA_lpl1(OTDA_GPU): + def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None, + **kwargs): + cudamat.init() + xs = np.asarray(xs, dtype=np.float64) + xt = np.asarray(xt, dtype=np.float64) + + self.xs = xs + self.xt = xt + + if wt is None: + wt = unif(xt.shape[0]) + if ws is None: + ws = unif(xs.shape[0]) + + self.ws = ws + self.wt = wt + + self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True, + squared=True) + self.normalizeM(norm) + self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs) + self.computed = True |