diff options
author | ncourty <ncourty@irisa.fr> | 2017-04-27 11:17:59 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-04-27 11:17:59 +0200 |
commit | 55ff888aab4e3a5f0a4a3180887d40e96fbea3d8 (patch) | |
tree | de0ea03345a1e4192cd404d52fecce5b38e53176 /ot | |
parent | bd325a391aa5156faec0446c455be1f8f94eb80b (diff) | |
parent | 9df60d467ddd3316334578adac8a80667cfa8759 (diff) |
Merge pull request #10 from aje/master
performance improvement sinkhorn lpl1
Diffstat (limited to 'ot')
-rw-r--r-- | ot/da.py | 46 | ||||
-rw-r--r-- | ot/gpu/bregman.py | 12 | ||||
-rw-r--r-- | ot/gpu/da.py | 88 |
3 files changed, 112 insertions, 34 deletions
@@ -11,10 +11,6 @@ from .optim import cg from .optim import gcg -def indices(a, func): - return [i for (i, val) in enumerate(a) if func(val)] - - def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerItermax = 200,stopInnerThr=1e-9,verbose=False,log=False): """ Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization @@ -46,7 +42,7 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter labels_a : np.ndarray (ns,) labels of samples in the source domain b : np.ndarray (nt,) - samples in the target domain + samples weights in the target domain M : np.ndarray (ns,nt) loss matrix reg : float @@ -86,40 +82,28 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter ot.optim.cg : General regularized OT """ - p=0.5 + p = 0.5 epsilon = 1e-3 - # init data - Nini = len(a) - Nfin = len(b) - indices_labels = [] - idx_begin = np.min(labels_a) - for c in range(idx_begin,np.max(labels_a)+1): - idxc = indices(labels_a, lambda x: x==c) + classes = np.unique(labels_a) + for c in classes: + idxc, = np.where(labels_a == c) indices_labels.append(idxc) - W=np.zeros(M.shape) + W = np.zeros(M.shape) for cpt in range(numItermax): Mreg = M + eta*W - transp=sinkhorn(a,b,Mreg,reg,numItermax=numInnerItermax, stopThr=stopInnerThr) - # the transport has been computed. Check if classes are really separated - W = np.ones((Nini,Nfin)) - for t in range(Nfin): - column = transp[:,t] - all_maj = [] - for c in range(idx_begin,np.max(labels_a)+1): - col_c = column[indices_labels[c-idx_begin]] - if c!=-1: - maj = p*((sum(col_c)+epsilon)**(p-1)) - W[indices_labels[c-idx_begin],t]=maj - all_maj.append(maj) - - # now we majorize the unlabelled by the min of the majorizations - # do it only for unlabbled data - if idx_begin==-1: - W[indices_labels[0],t]=np.min(all_maj) + transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax, + stopThr=stopInnerThr) + # the transport has been computed. Check if classes are really + # separated + W = np.ones(M.shape) + for (i, c) in enumerate(classes): + majs = np.sum(transp[indices_labels[i]], axis=0) + majs = p*((majs+epsilon)**(p-1)) + W[indices_labels[i]] = majs return transp diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py index f91f15f..cc610b7 100644 --- a/ot/gpu/bregman.py +++ b/ot/gpu/bregman.py @@ -8,7 +8,7 @@ import cudamat def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, - log=False): + log=False, returnAsGPU=False): # init data Nini = len(a) Nfin = len(b) @@ -77,7 +77,13 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, K_GPU.mult_by_col(u_GPU, target=K_GPU) K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU) + + if returnAsGPU: + res = K_GPU + else: + res = K_GPU.asarray() + if log: - return K_GPU.asarray(), log + return res, log else: - return K_GPU.asarray() + return res diff --git a/ot/gpu/da.py b/ot/gpu/da.py index 0121e7b..399e769 100644 --- a/ot/gpu/da.py +++ b/ot/gpu/da.py @@ -11,6 +11,27 @@ import cudamat def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False): + """ + Compute the pairwise euclidean distance between matrices a and b. + + + Parameters + ---------- + a : np.ndarray (n, f) + first matrice + b : np.ndarray (m, f) + second matrice + returnAsGPU : boolean, optional (default False) + if True, returns cudamat matrix still on GPU, else return np.ndarray + squared : boolean, optional (default False) + if True, return squared euclidean distance matrice + + + Returns + ------- + c : (n x m) np.ndarray or cudamat.CUDAMatrix + pairwise euclidean distance distance matrix + """ # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m). # First compute in c_GPU the squared euclidean distance. And return its # square root. At each cell [i,j] of c, we want to have @@ -46,6 +67,48 @@ def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False): return c_GPU.asarray() +def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, + numInnerItermax=200, stopInnerThr=1e-9, + verbose=False, log=False): + p = 0.5 + epsilon = 1e-3 + Nfin = len(b) + + indices_labels = [] + classes = np.unique(labels_a) + for c in classes: + idxc, = np.where(labels_a == c) + indices_labels.append(cudamat.CUDAMatrix(idxc.reshape(1, -1))) + + Mreg_GPU = cudamat.empty(M_GPU.shape) + W_GPU = cudamat.empty(M_GPU.shape).assign(0) + + for cpt in range(numItermax): + Mreg_GPU.assign(M_GPU) + Mreg_GPU.add_mult(W_GPU, eta) + transp_GPU = sinkhorn(a, b, Mreg_GPU, reg, numItermax=numInnerItermax, + stopThr=stopInnerThr, returnAsGPU=True) + # the transport has been computed. Check if classes are really + # separated + W_GPU.assign(1) + W_GPU = W_GPU.transpose() + for (i, c) in enumerate(classes): + (_, nbRow) = indices_labels[i].shape + tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0) + transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU) + majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon) + cudamat.pow(majs_GPU, (p-1)) + majs_GPU.mult(p) + + tmpC_GPU.assign(0) + tmpC_GPU.add_col_vec(majs_GPU) + W_GPU.set_selected_columns(indices_labels[i], tmpC_GPU) + + W_GPU = W_GPU.transpose() + + return transp_GPU.asarray() + + class OTDA_GPU(OTDA): def normalizeM(self, norm): if norm == "median": @@ -84,3 +147,28 @@ class OTDA_sinkhorn(OTDA_GPU): self.normalizeM(norm) self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs) self.computed = True + + +class OTDA_lpl1(OTDA_GPU): + def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None, + **kwargs): + cudamat.init() + xs = np.asarray(xs, dtype=np.float64) + xt = np.asarray(xt, dtype=np.float64) + + self.xs = xs + self.xt = xt + + if wt is None: + wt = unif(xt.shape[0]) + if ws is None: + ws = unif(xs.shape[0]) + + self.ws = ws + self.wt = wt + + self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True, + squared=True) + self.normalizeM(norm) + self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M_GPU, reg, eta, **kwargs) + self.computed = True |