From fb95157f867d749551fad9a4988eb3dd2813096e Mon Sep 17 00:00:00 2001 From: Leo gautheron Date: Thu, 20 Apr 2017 15:05:53 +0200 Subject: more changes from feeback in addition add the posibility to normalize the cost matrix through the function fit --- ot/da.py | 26 ++++++++++++++++++++++---- ot/gpu/__init__.py | 3 ++- ot/gpu/bregman.py | 4 ++-- ot/gpu/da.py | 39 ++++++++++++++++++++++----------------- 4 files changed, 48 insertions(+), 24 deletions(-) diff --git a/ot/da.py b/ot/da.py index d7b8492..d607e50 100644 --- a/ot/da.py +++ b/ot/da.py @@ -606,7 +606,7 @@ class OTDA(object): self.computed=False - def fit(self,xs,xt,ws=None,wt=None): + def fit(self,xs,xt,ws=None,wt=None,norm=None): """ Fit domain adaptation between samples is xs and xt (with optional weights)""" self.xs=xs self.xt=xt @@ -620,6 +620,7 @@ class OTDA(object): self.wt=wt self.M=dist(xs,xt,metric=self.metric) + self.normalize() self.G=emd(ws,wt,self.M) self.computed=True @@ -684,11 +685,25 @@ class OTDA(object): xf=self.interp(direction)# interp the source samples return xf[idx,:]+x-x0[idx,:] # aply the delta to the interpolation + def normalizeM(self, norm): + """ + It may help to normalize the cost matrix self.M if there are numerical + errors during the sinkhorn based algorithms. + """ + if norm == "median": + self.M /= float(np.median(self.M)) + elif norm == "max": + self.M /= float(np.max(self.M)) + elif norm == "log": + self.M = np.log(1 + self.M) + elif norm == "loglog": + self.M = np.log(1 + np.log(1 + self.M)) + class OTDA_sinkhorn(OTDA): """Class for domain adaptation with optimal transport with entropic regularization""" - def fit(self,xs,xt,reg=1,ws=None,wt=None,**kwargs): + def fit(self,xs,xt,reg=1,ws=None,wt=None,norm=None,**kwargs): """ Fit regularized domain adaptation between samples is xs and xt (with optional weights)""" self.xs=xs self.xt=xt @@ -702,6 +717,7 @@ class OTDA_sinkhorn(OTDA): self.wt=wt self.M=dist(xs,xt,metric=self.metric) + self.normalizeM(norm) self.G=sinkhorn(ws,wt,self.M,reg,**kwargs) self.computed=True @@ -710,7 +726,7 @@ class OTDA_lpl1(OTDA): """Class for domain adaptation with optimal transport with entropic and group regularization""" - def fit(self,xs,ys,xt,reg=1,eta=1,ws=None,wt=None,**kwargs): + def fit(self,xs,ys,xt,reg=1,eta=1,ws=None,wt=None,norm=None,**kwargs): """ Fit regularized domain adaptation between samples is xs and xt (with optional weights), See ot.da.sinkhorn_lpl1_mm for fit parameters""" self.xs=xs self.xt=xt @@ -724,6 +740,7 @@ class OTDA_lpl1(OTDA): self.wt=wt self.M=dist(xs,xt,metric=self.metric) + self.normalizeM(norm) self.G=sinkhorn_lpl1_mm(ws,ys,wt,self.M,reg,eta,**kwargs) self.computed=True @@ -731,7 +748,7 @@ class OTDA_l1l2(OTDA): """Class for domain adaptation with optimal transport with entropic and group lasso regularization""" - def fit(self,xs,ys,xt,reg=1,eta=1,ws=None,wt=None,**kwargs): + def fit(self,xs,ys,xt,reg=1,eta=1,ws=None,wt=None,norm=None,**kwargs): """ Fit regularized domain adaptation between samples is xs and xt (with optional weights), See ot.da.sinkhorn_lpl1_gl for fit parameters""" self.xs=xs self.xt=xt @@ -745,6 +762,7 @@ class OTDA_l1l2(OTDA): self.wt=wt self.M=dist(xs,xt,metric=self.metric) + self.normalizeM(norm) self.G=sinkhorn_l1l2_gl(ws,ys,wt,self.M,reg,eta,**kwargs) self.computed=True diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py index 9319208..40b11c0 100644 --- a/ot/gpu/__init__.py +++ b/ot/gpu/__init__.py @@ -2,5 +2,6 @@ from . import bregman from . import da +from .bregman import sinkhorn -__all__ = ["bregman", "da"] +__all__ = ["bregman", "da", "sinkhorn"] diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py index 4d4a8b7..f91f15f 100644 --- a/ot/gpu/bregman.py +++ b/ot/gpu/bregman.py @@ -4,10 +4,11 @@ Bregman projections for regularized OT with GPU """ import numpy as np +import cudamat def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, - log=False, cudamat=None): + log=False): # init data Nini = len(a) Nfin = len(b) @@ -74,7 +75,6 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, log['u'] = u_GPU.asarray() log['v'] = v_GPU.asarray() - # print('err=',err,' cpt=',cpt) K_GPU.mult_by_col(u_GPU, target=K_GPU) K_GPU.mult_by_row(v_GPU.transpose(), target=K_GPU) if log: diff --git a/ot/gpu/da.py b/ot/gpu/da.py index 4c583a7..0121e7b 100644 --- a/ot/gpu/da.py +++ b/ot/gpu/da.py @@ -7,9 +7,10 @@ import numpy as np from ..utils import unif from ..da import OTDA from .bregman import sinkhorn +import cudamat -def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False, cudamat=None): +def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False): # a is shape (n, f) and b shape (m, f). Return matrix c of shape (n, m). # First compute in c_GPU the squared euclidean distance. And return its # square root. At each cell [i,j] of c, we want to have @@ -45,9 +46,24 @@ def pairwiseEuclideanGPU(a, b, returnAsGPU=False, squared=False, cudamat=None): return c_GPU.asarray() -class OTDA_sinkhorn_GPU(OTDA): +class OTDA_GPU(OTDA): + def normalizeM(self, norm): + if norm == "median": + self.M_GPU.divide(float(np.median(self.M_GPU.asarray()))) + elif norm == "max": + self.M_GPU.divide(float(np.max(self.M_GPU.asarray()))) + elif norm == "log": + self.M_GPU.add(1) + cudamat.log(self.M_GPU) + elif norm == "loglog": + self.M_GPU.add(1) + cudamat.log(self.M_GPU) + self.M_GPU.add(1) + cudamat.log(self.M_GPU) + + +class OTDA_sinkhorn(OTDA_GPU): def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs): - import cudamat cudamat.init() xs = np.asarray(xs, dtype=np.float64) xt = np.asarray(xt, dtype=np.float64) @@ -64,18 +80,7 @@ class OTDA_sinkhorn_GPU(OTDA): self.wt = wt self.M_GPU = pairwiseEuclideanGPU(xs, xt, returnAsGPU=True, - squared=True, cudamat=cudamat) - - if norm == "median": - self.M_GPU.divide(float(np.median(self.M_GPU.asarray()))) - elif norm == "max": - self.M_GPU.divide(float(np.max(self.M_GPU.asarray()))) - elif norm == "log": - M = np.log(1 + self.M_GPU.asarray()) - self.M_GPU = cudamat.CUDAMatrix(M) - elif norm == "loglog": - M = np.log(1 + np.log(1 + self.M_GPU.asarray())) - self.M_GPU = cudamat.CUDAMatrix(M) - - self.G = sinkhorn(ws, wt, self.M_GPU, reg, cudamat=cudamat, **kwargs) + squared=True) + self.normalizeM(norm) + self.G = sinkhorn(ws, wt, self.M_GPU, reg, **kwargs) self.computed = True -- cgit v1.2.3