diff options
author | Antoine Rolet <antoine.rolet@gmail.com> | 2017-09-05 15:30:50 +0900 |
---|---|---|
committer | Antoine Rolet <antoine.rolet@gmail.com> | 2017-09-05 15:30:50 +0900 |
commit | 13dfb3ddbbd8926b4751b82dd41c5570253b1f07 (patch) | |
tree | b28098e98640c64483a599103e2fdb5df46d2c79 /ot | |
parent | 185eb3e2ef34b5ce6b8f90a28a5bcc78432b7fd3 (diff) | |
parent | 16697047eff9326a0ecb483317c13a854a3d3a71 (diff) |
Merge remote-tracking branch 'upstream/master'
Diffstat (limited to 'ot')
-rw-r--r-- | ot/__init__.py | 6 | ||||
-rw-r--r-- | ot/bregman.py | 512 | ||||
-rw-r--r-- | ot/da.py | 1506 | ||||
-rw-r--r-- | ot/datasets.py | 111 | ||||
-rw-r--r-- | ot/dr.py | 201 | ||||
-rw-r--r-- | ot/gpu/__init__.py | 5 | ||||
-rw-r--r-- | ot/gpu/bregman.py | 18 | ||||
-rw-r--r-- | ot/gpu/da.py | 10 | ||||
-rw-r--r-- | ot/lp/EMD.h | 8 | ||||
-rw-r--r-- | ot/lp/EMD_wrapper.cpp | 9 | ||||
-rw-r--r-- | ot/lp/__init__.py | 48 | ||||
-rw-r--r-- | ot/lp/emd_wrap.pyx | 101 | ||||
-rw-r--r-- | ot/optim.py | 139 | ||||
-rw-r--r-- | ot/plot.py | 103 | ||||
-rw-r--r-- | ot/utils.py | 346 |
15 files changed, 2202 insertions, 921 deletions
diff --git a/ot/__init__.py b/ot/__init__.py index a79a5ce..6d4c4c6 100644 --- a/ot/__init__.py +++ b/ot/__init__.py @@ -4,6 +4,10 @@ """ +# Author: Remi Flamary <remi.flamary@unice.fr> +# +# License: MIT License + # All submodules and packages from . import lp @@ -24,6 +28,6 @@ from .utils import dist, unif, tic, toc, toq __version__ = "0.3.1" -__all__ = ["emd", "emd2", "sinkhorn","sinkhorn2", "utils", 'datasets', +__all__ = ["emd", "emd2", "sinkhorn", "sinkhorn2", "utils", 'datasets', 'bregman', 'lp', 'plot', 'tic', 'toc', 'toq', 'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim'] diff --git a/ot/bregman.py b/ot/bregman.py index 0d68602..d63c51d 100644 --- a/ot/bregman.py +++ b/ot/bregman.py @@ -3,9 +3,15 @@ Bregman projections for regularized OT """ +# Author: Remi Flamary <remi.flamary@unice.fr> +# Nicolas Courty <ncourty@irisa.fr> +# +# License: MIT License + import numpy as np -def sinkhorn(a,b, M, reg,method='sinkhorn', numItermax = 1000, stopThr=1e-9, verbose=False, log=False,**kwargs): + +def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs): u""" Solve the entropic regularization optimal transport problem and return the OT matrix @@ -92,22 +98,29 @@ def sinkhorn(a,b, M, reg,method='sinkhorn', numItermax = 1000, stopThr=1e-9, ver """ - if method.lower()=='sinkhorn': - sink= lambda: sinkhorn_knopp(a,b, M, reg,numItermax=numItermax, - stopThr=stopThr, verbose=verbose, log=log,**kwargs) - elif method.lower()=='sinkhorn_stabilized': - sink= lambda: sinkhorn_stabilized(a,b, M, reg,numItermax=numItermax, - stopThr=stopThr, verbose=verbose, log=log, **kwargs) - elif method.lower()=='sinkhorn_epsilon_scaling': - sink= lambda: sinkhorn_epsilon_scaling(a,b, M, reg,numItermax=numItermax, - stopThr=stopThr, verbose=verbose, log=log, **kwargs) + if method.lower() == 'sinkhorn': + def sink(): + return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax, + stopThr=stopThr, verbose=verbose, log=log, **kwargs) + elif method.lower() == 'sinkhorn_stabilized': + def sink(): + return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax, + stopThr=stopThr, verbose=verbose, log=log, **kwargs) + elif method.lower() == 'sinkhorn_epsilon_scaling': + def sink(): + return sinkhorn_epsilon_scaling( + a, b, M, reg, numItermax=numItermax, + stopThr=stopThr, verbose=verbose, log=log, **kwargs) else: print('Warning : unknown method using classic Sinkhorn Knopp') - sink= lambda: sinkhorn_knopp(a,b, M, reg, **kwargs) + + def sink(): + return sinkhorn_knopp(a, b, M, reg, **kwargs) return sink() -def sinkhorn2(a,b, M, reg,method='sinkhorn', numItermax = 1000, stopThr=1e-9, verbose=False, log=False,**kwargs): + +def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs): u""" Solve the entropic regularization optimal transport problem and return the loss @@ -170,7 +183,7 @@ def sinkhorn2(a,b, M, reg,method='sinkhorn', numItermax = 1000, stopThr=1e-9, ve >>> M=[[0.,1.],[1.,0.]] >>> ot.sinkhorn2(a,b,M,1) array([ 0.26894142]) - + References @@ -194,27 +207,33 @@ def sinkhorn2(a,b, M, reg,method='sinkhorn', numItermax = 1000, stopThr=1e-9, ve """ - if method.lower()=='sinkhorn': - sink= lambda: sinkhorn_knopp(a,b, M, reg,numItermax=numItermax, - stopThr=stopThr, verbose=verbose, log=log,**kwargs) - elif method.lower()=='sinkhorn_stabilized': - sink= lambda: sinkhorn_stabilized(a,b, M, reg,numItermax=numItermax, - stopThr=stopThr, verbose=verbose, log=log, **kwargs) - elif method.lower()=='sinkhorn_epsilon_scaling': - sink= lambda: sinkhorn_epsilon_scaling(a,b, M, reg,numItermax=numItermax, - stopThr=stopThr, verbose=verbose, log=log, **kwargs) + if method.lower() == 'sinkhorn': + def sink(): + return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax, + stopThr=stopThr, verbose=verbose, log=log, **kwargs) + elif method.lower() == 'sinkhorn_stabilized': + def sink(): + return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax, + stopThr=stopThr, verbose=verbose, log=log, **kwargs) + elif method.lower() == 'sinkhorn_epsilon_scaling': + def sink(): + return sinkhorn_epsilon_scaling( + a, b, M, reg, numItermax=numItermax, + stopThr=stopThr, verbose=verbose, log=log, **kwargs) else: print('Warning : unknown method using classic Sinkhorn Knopp') - sink= lambda: sinkhorn_knopp(a,b, M, reg, **kwargs) - - b=np.asarray(b,dtype=np.float64) - if len(b.shape)<2: - b=b.reshape((-1,1)) + + def sink(): + return sinkhorn_knopp(a, b, M, reg, **kwargs) + + b = np.asarray(b, dtype=np.float64) + if len(b.shape) < 2: + b = b.reshape((-1, 1)) return sink() -def sinkhorn_knopp(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False, log=False,**kwargs): +def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs): """ Solve the entropic regularization optimal transport problem and return the OT matrix @@ -290,100 +309,101 @@ def sinkhorn_knopp(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False, """ - a=np.asarray(a,dtype=np.float64) - b=np.asarray(b,dtype=np.float64) - M=np.asarray(M,dtype=np.float64) - - - if len(a)==0: - a=np.ones((M.shape[0],),dtype=np.float64)/M.shape[0] - if len(b)==0: - b=np.ones((M.shape[1],),dtype=np.float64)/M.shape[1] + a = np.asarray(a, dtype=np.float64) + b = np.asarray(b, dtype=np.float64) + M = np.asarray(M, dtype=np.float64) + if len(a) == 0: + a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0] + if len(b) == 0: + b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] # init data Nini = len(a) Nfin = len(b) - if len(b.shape)>1: - nbb=b.shape[1] + if len(b.shape) > 1: + nbb = b.shape[1] else: - nbb=0 - + nbb = 0 if log: - log={'err':[]} + log = {'err': []} - # we assume that no distances are null except those of the diagonal of distances + # we assume that no distances are null except those of the diagonal of + # distances if nbb: - u = np.ones((Nini,nbb))/Nini - v = np.ones((Nfin,nbb))/Nfin + u = np.ones((Nini, nbb)) / Nini + v = np.ones((Nfin, nbb)) / Nfin else: - u = np.ones(Nini)/Nini - v = np.ones(Nfin)/Nfin + u = np.ones(Nini) / Nini + v = np.ones(Nfin) / Nfin + # print(reg) - #print(reg) + K = np.exp(-M / reg) + # print(np.min(K)) - K = np.exp(-M/reg) - #print(np.min(K)) - - Kp = (1/a).reshape(-1, 1) * K + Kp = (1 / a).reshape(-1, 1) * K cpt = 0 - err=1 - while (err>stopThr and cpt<numItermax): + err = 1 + while (err > stopThr and cpt < numItermax): uprev = u vprev = v KtransposeU = np.dot(K.T, u) v = np.divide(b, KtransposeU) - u = 1./np.dot(Kp,v) + u = 1. / np.dot(Kp, v) - if (np.any(KtransposeU==0) or - np.any(np.isnan(u)) or np.any(np.isnan(v)) or - np.any(np.isinf(u)) or np.any(np.isinf(v))): + if (np.any(KtransposeU == 0) or + np.any(np.isnan(u)) or np.any(np.isnan(v)) or + np.any(np.isinf(u)) or np.any(np.isinf(v))): # we have reached the machine precision # come back to previous solution and quit loop print('Warning: numerical errors at iteration', cpt) u = uprev v = vprev break - if cpt%10==0: - # we can speed up the process by checking for the error only all the 10th iterations + if cpt % 10 == 0: + # we can speed up the process by checking for the error only all + # the 10th iterations if nbb: - err = np.sum((u-uprev)**2)/np.sum((u)**2)+np.sum((v-vprev)**2)/np.sum((v)**2) + err = np.sum((u - uprev)**2) / np.sum((u)**2) + \ + np.sum((v - vprev)**2) / np.sum((v)**2) else: transp = u.reshape(-1, 1) * (K * v) - err = np.linalg.norm((np.sum(transp,axis=0)-b))**2 + err = np.linalg.norm((np.sum(transp, axis=0) - b))**2 if log: log['err'].append(err) if verbose: - if cpt%200 ==0: - print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19) - print('{:5d}|{:8e}|'.format(cpt,err)) - cpt = cpt +1 + if cpt % 200 == 0: + print( + '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) + print('{:5d}|{:8e}|'.format(cpt, err)) + cpt = cpt + 1 if log: - log['u']=u - log['v']=v + log['u'] = u + log['v'] = v - if nbb: #return only loss - res=np.zeros((nbb)) + if nbb: # return only loss + res = np.zeros((nbb)) for i in range(nbb): - res[i]=np.sum(u[:,i].reshape((-1,1))*K*v[:,i].reshape((1,-1))*M) + res[i] = np.sum( + u[:, i].reshape((-1, 1)) * K * v[:, i].reshape((1, -1)) * M) if log: - return res,log + return res, log else: return res - else: # return OT matrix + else: # return OT matrix if log: - return u.reshape((-1,1))*K*v.reshape((1,-1)),log + return u.reshape((-1, 1)) * K * v.reshape((1, -1)), log else: - return u.reshape((-1,1))*K*v.reshape((1,-1)) + return u.reshape((-1, 1)) * K * v.reshape((1, -1)) -def sinkhorn_stabilized(a,b, M, reg, numItermax = 1000,tau=1e3, stopThr=1e-9,warmstart=None, verbose=False,print_period=20, log=False,**kwargs): +def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9, warmstart=None, verbose=False, print_period=20, log=False, **kwargs): """ Solve the entropic regularization OT problem with log stabilization @@ -468,21 +488,21 @@ def sinkhorn_stabilized(a,b, M, reg, numItermax = 1000,tau=1e3, stopThr=1e-9,war """ - a=np.asarray(a,dtype=np.float64) - b=np.asarray(b,dtype=np.float64) - M=np.asarray(M,dtype=np.float64) + a = np.asarray(a, dtype=np.float64) + b = np.asarray(b, dtype=np.float64) + M = np.asarray(M, dtype=np.float64) - if len(a)==0: - a=np.ones((M.shape[0],),dtype=np.float64)/M.shape[0] - if len(b)==0: - b=np.ones((M.shape[1],),dtype=np.float64)/M.shape[1] + if len(a) == 0: + a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0] + if len(b) == 0: + b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] # test if multiple target - if len(b.shape)>1: - nbb=b.shape[1] - a=a[:,np.newaxis] + if len(b.shape) > 1: + nbb = b.shape[1] + a = a[:, np.newaxis] else: - nbb=0 + nbb = 0 # init data na = len(a) @@ -490,81 +510,80 @@ def sinkhorn_stabilized(a,b, M, reg, numItermax = 1000,tau=1e3, stopThr=1e-9,war cpt = 0 if log: - log={'err':[]} + log = {'err': []} - # we assume that no distances are null except those of the diagonal of distances + # we assume that no distances are null except those of the diagonal of + # distances if warmstart is None: - alpha,beta=np.zeros(na),np.zeros(nb) + alpha, beta = np.zeros(na), np.zeros(nb) else: - alpha,beta=warmstart + alpha, beta = warmstart if nbb: - u,v = np.ones((na,nbb))/na,np.ones((nb,nbb))/nb + u, v = np.ones((na, nbb)) / na, np.ones((nb, nbb)) / nb else: - u,v = np.ones(na)/na,np.ones(nb)/nb + u, v = np.ones(na) / na, np.ones(nb) / nb - def get_K(alpha,beta): + def get_K(alpha, beta): """log space computation""" - return np.exp(-(M-alpha.reshape((na,1))-beta.reshape((1,nb)))/reg) + return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) / reg) - def get_Gamma(alpha,beta,u,v): + def get_Gamma(alpha, beta, u, v): """log space gamma computation""" - return np.exp(-(M-alpha.reshape((na,1))-beta.reshape((1,nb)))/reg+np.log(u.reshape((na,1)))+np.log(v.reshape((1,nb)))) + return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) / reg + np.log(u.reshape((na, 1))) + np.log(v.reshape((1, nb)))) - #print(np.min(K)) + # print(np.min(K)) - K=get_K(alpha,beta) + K = get_K(alpha, beta) transp = K - loop=1 + loop = 1 cpt = 0 - err=1 + err = 1 while loop: - - uprev = u vprev = v # sinkhorn update - v = b/(np.dot(K.T,u)+1e-16) - u = a/(np.dot(K,v)+1e-16) - + v = b / (np.dot(K.T, u) + 1e-16) + u = a / (np.dot(K, v) + 1e-16) # remove numerical problems and store them in K - if np.abs(u).max()>tau or np.abs(v).max()>tau: + if np.abs(u).max() > tau or np.abs(v).max() > tau: if nbb: - alpha,beta=alpha+reg*np.max(np.log(u),1),beta+reg*np.max(np.log(v)) + alpha, beta = alpha + reg * \ + np.max(np.log(u), 1), beta + reg * np.max(np.log(v)) else: - alpha,beta=alpha+reg*np.log(u),beta+reg*np.log(v) + alpha, beta = alpha + reg * np.log(u), beta + reg * np.log(v) if nbb: - u,v = np.ones((na,nbb))/na,np.ones((nb,nbb))/nb + u, v = np.ones((na, nbb)) / na, np.ones((nb, nbb)) / nb else: - u,v = np.ones(na)/na,np.ones(nb)/nb - K=get_K(alpha,beta) - + u, v = np.ones(na) / na, np.ones(nb) / nb + K = get_K(alpha, beta) - if cpt%print_period==0: - # we can speed up the process by checking for the error only all the 10th iterations + if cpt % print_period == 0: + # we can speed up the process by checking for the error only all + # the 10th iterations if nbb: - err = np.sum((u-uprev)**2)/np.sum((u)**2)+np.sum((v-vprev)**2)/np.sum((v)**2) + err = np.sum((u - uprev)**2) / np.sum((u)**2) + \ + np.sum((v - vprev)**2) / np.sum((v)**2) else: - transp = get_Gamma(alpha,beta,u,v) - err = np.linalg.norm((np.sum(transp,axis=0)-b))**2 + transp = get_Gamma(alpha, beta, u, v) + err = np.linalg.norm((np.sum(transp, axis=0) - b))**2 if log: log['err'].append(err) if verbose: - if cpt%(print_period*20) ==0: - print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19) - print('{:5d}|{:8e}|'.format(cpt,err)) + if cpt % (print_period * 20) == 0: + print( + '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) + print('{:5d}|{:8e}|'.format(cpt, err)) + if err <= stopThr: + loop = False - if err<=stopThr: - loop=False - - if cpt>=numItermax: - loop=False - + if cpt >= numItermax: + loop = False if np.any(np.isnan(u)) or np.any(np.isnan(v)): # we have reached the machine precision @@ -574,34 +593,34 @@ def sinkhorn_stabilized(a,b, M, reg, numItermax = 1000,tau=1e3, stopThr=1e-9,war v = vprev break - cpt = cpt +1 + cpt = cpt + 1 - - #print('err=',err,' cpt=',cpt) + # print('err=',err,' cpt=',cpt) if log: - log['logu']=alpha/reg+np.log(u) - log['logv']=beta/reg+np.log(v) - log['alpha']=alpha+reg*np.log(u) - log['beta']=beta+reg*np.log(v) - log['warmstart']=(log['alpha'],log['beta']) + log['logu'] = alpha / reg + np.log(u) + log['logv'] = beta / reg + np.log(v) + log['alpha'] = alpha + reg * np.log(u) + log['beta'] = beta + reg * np.log(v) + log['warmstart'] = (log['alpha'], log['beta']) if nbb: - res=np.zeros((nbb)) + res = np.zeros((nbb)) for i in range(nbb): - res[i]=np.sum(get_Gamma(alpha,beta,u[:,i],v[:,i])*M) - return res,log + res[i] = np.sum(get_Gamma(alpha, beta, u[:, i], v[:, i]) * M) + return res, log else: - return get_Gamma(alpha,beta,u,v),log + return get_Gamma(alpha, beta, u, v), log else: if nbb: - res=np.zeros((nbb)) + res = np.zeros((nbb)) for i in range(nbb): - res[i]=np.sum(get_Gamma(alpha,beta,u[:,i],v[:,i])*M) + res[i] = np.sum(get_Gamma(alpha, beta, u[:, i], v[:, i]) * M) return res else: - return get_Gamma(alpha,beta,u,v) + return get_Gamma(alpha, beta, u, v) + -def sinkhorn_epsilon_scaling(a,b, M, reg, numItermax = 100, epsilon0=1e4, numInnerItermax = 100,tau=1e3, stopThr=1e-9,warmstart=None, verbose=False,print_period=10, log=False,**kwargs): +def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInnerItermax=100, tau=1e3, stopThr=1e-9, warmstart=None, verbose=False, print_period=10, log=False, **kwargs): """ Solve the entropic regularization optimal transport problem with log stabilization and epsilon scaling. @@ -690,14 +709,14 @@ def sinkhorn_epsilon_scaling(a,b, M, reg, numItermax = 100, epsilon0=1e4, numInn """ - a=np.asarray(a,dtype=np.float64) - b=np.asarray(b,dtype=np.float64) - M=np.asarray(M,dtype=np.float64) + a = np.asarray(a, dtype=np.float64) + b = np.asarray(b, dtype=np.float64) + M = np.asarray(M, dtype=np.float64) - if len(a)==0: - a=np.ones((M.shape[0],),dtype=np.float64)/M.shape[0] - if len(b)==0: - b=np.ones((M.shape[1],),dtype=np.float64)/M.shape[1] + if len(a) == 0: + a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0] + if len(b) == 0: + b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] # init data na = len(a) @@ -705,88 +724,94 @@ def sinkhorn_epsilon_scaling(a,b, M, reg, numItermax = 100, epsilon0=1e4, numInn # nrelative umerical precision with 64 bits numItermin = 35 - numItermax=max(numItermin,numItermax) # ensure that last velue is exact - + numItermax = max(numItermin, numItermax) # ensure that last velue is exact cpt = 0 if log: - log={'err':[]} + log = {'err': []} - # we assume that no distances are null except those of the diagonal of distances + # we assume that no distances are null except those of the diagonal of + # distances if warmstart is None: - alpha,beta=np.zeros(na),np.zeros(nb) + alpha, beta = np.zeros(na), np.zeros(nb) else: - alpha,beta=warmstart - + alpha, beta = warmstart - def get_K(alpha,beta): + def get_K(alpha, beta): """log space computation""" - return np.exp(-(M-alpha.reshape((na,1))-beta.reshape((1,nb)))/reg) + return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) / reg) - #print(np.min(K)) - def get_reg(n): # exponential decreasing - return (epsilon0-reg)*np.exp(-n)+reg + # print(np.min(K)) + def get_reg(n): # exponential decreasing + return (epsilon0 - reg) * np.exp(-n) + reg - loop=1 + loop = 1 cpt = 0 - err=1 + err = 1 while loop: - regi=get_reg(cpt) + regi = get_reg(cpt) - G,logi=sinkhorn_stabilized(a,b, M, regi, numItermax = numInnerItermax, stopThr=1e-9,warmstart=(alpha,beta), verbose=False,print_period=20,tau=tau, log=True) + G, logi = sinkhorn_stabilized(a, b, M, regi, numItermax=numInnerItermax, stopThr=1e-9, warmstart=( + alpha, beta), verbose=False, print_period=20, tau=tau, log=True) - alpha=logi['alpha'] - beta=logi['beta'] + alpha = logi['alpha'] + beta = logi['beta'] - if cpt>=numItermax: - loop=False + if cpt >= numItermax: + loop = False - if cpt%(print_period)==0: # spsion nearly converged - # we can speed up the process by checking for the error only all the 10th iterations + if cpt % (print_period) == 0: # spsion nearly converged + # we can speed up the process by checking for the error only all + # the 10th iterations transp = G - err = np.linalg.norm((np.sum(transp,axis=0)-b))**2+np.linalg.norm((np.sum(transp,axis=1)-a))**2 + err = np.linalg.norm( + (np.sum(transp, axis=0) - b))**2 + np.linalg.norm((np.sum(transp, axis=1) - a))**2 if log: log['err'].append(err) if verbose: - if cpt%(print_period*10) ==0: - print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19) - print('{:5d}|{:8e}|'.format(cpt,err)) + if cpt % (print_period * 10) == 0: + print( + '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) + print('{:5d}|{:8e}|'.format(cpt, err)) - if err<=stopThr and cpt>numItermin: - loop=False + if err <= stopThr and cpt > numItermin: + loop = False - cpt = cpt +1 - #print('err=',err,' cpt=',cpt) + cpt = cpt + 1 + # print('err=',err,' cpt=',cpt) if log: - log['alpha']=alpha - log['beta']=beta - log['warmstart']=(log['alpha'],log['beta']) - return G,log + log['alpha'] = alpha + log['beta'] = beta + log['warmstart'] = (log['alpha'], log['beta']) + return G, log else: return G -def geometricBar(weights,alldistribT): +def geometricBar(weights, alldistribT): """return the weighted geometric mean of distributions""" - assert(len(weights)==alldistribT.shape[1]) - return np.exp(np.dot(np.log(alldistribT),weights.T)) + assert(len(weights) == alldistribT.shape[1]) + return np.exp(np.dot(np.log(alldistribT), weights.T)) + def geometricMean(alldistribT): """return the geometric mean of distributions""" - return np.exp(np.mean(np.log(alldistribT),axis=1)) + return np.exp(np.mean(np.log(alldistribT), axis=1)) -def projR(gamma,p): + +def projR(gamma, p): """return the KL projection on the row constrints """ - return np.multiply(gamma.T,p/np.maximum(np.sum(gamma,axis=1),1e-10)).T + return np.multiply(gamma.T, p / np.maximum(np.sum(gamma, axis=1), 1e-10)).T + -def projC(gamma,q): +def projC(gamma, q): """return the KL projection on the column constrints """ - return np.multiply(gamma,q/np.maximum(np.sum(gamma,axis=0),1e-10)) + return np.multiply(gamma, q / np.maximum(np.sum(gamma, axis=0), 1e-10)) -def barycenter(A,M,reg, weights=None, numItermax = 1000, stopThr=1e-4,verbose=False,log=False): +def barycenter(A, M, reg, weights=None, numItermax=1000, stopThr=1e-4, verbose=False, log=False): """Compute the entropic regularized wasserstein barycenter of distributions A The function solves the following optimization problem: @@ -837,49 +862,49 @@ def barycenter(A,M,reg, weights=None, numItermax = 1000, stopThr=1e-4,verbose=Fa """ - if weights is None: - weights=np.ones(A.shape[1])/A.shape[1] + weights = np.ones(A.shape[1]) / A.shape[1] else: - assert(len(weights)==A.shape[1]) + assert(len(weights) == A.shape[1]) if log: - log={'err':[]} + log = {'err': []} - #M = M/np.median(M) # suggested by G. Peyre - K = np.exp(-M/reg) + # M = M/np.median(M) # suggested by G. Peyre + K = np.exp(-M / reg) cpt = 0 - err=1 + err = 1 - UKv=np.dot(K,np.divide(A.T,np.sum(K,axis=0)).T) - u = (geometricMean(UKv)/UKv.T).T + UKv = np.dot(K, np.divide(A.T, np.sum(K, axis=0)).T) + u = (geometricMean(UKv) / UKv.T).T - while (err>stopThr and cpt<numItermax): - cpt = cpt +1 - UKv=u*np.dot(K,np.divide(A,np.dot(K,u))) - u = (u.T*geometricBar(weights,UKv)).T/UKv + while (err > stopThr and cpt < numItermax): + cpt = cpt + 1 + UKv = u * np.dot(K, np.divide(A, np.dot(K, u))) + u = (u.T * geometricBar(weights, UKv)).T / UKv - if cpt%10==1: - err=np.sum(np.std(UKv,axis=1)) + if cpt % 10 == 1: + err = np.sum(np.std(UKv, axis=1)) # log and verbose print if log: log['err'].append(err) if verbose: - if cpt%200 ==0: - print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19) - print('{:5d}|{:8e}|'.format(cpt,err)) + if cpt % 200 == 0: + print( + '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) + print('{:5d}|{:8e}|'.format(cpt, err)) if log: - log['niter']=cpt - return geometricBar(weights,UKv),log + log['niter'] = cpt + return geometricBar(weights, UKv), log else: - return geometricBar(weights,UKv) + return geometricBar(weights, UKv) -def unmix(a,D,M,M0,h0,reg,reg0,alpha,numItermax = 1000, stopThr=1e-3,verbose=False,log=False): +def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000, stopThr=1e-3, verbose=False, log=False): """ Compute the unmixing of an observation with a given dictionary using Wasserstein distance @@ -942,44 +967,45 @@ def unmix(a,D,M,M0,h0,reg,reg0,alpha,numItermax = 1000, stopThr=1e-3,verbose=Fal """ - #M = M/np.median(M) - K = np.exp(-M/reg) + # M = M/np.median(M) + K = np.exp(-M / reg) - #M0 = M0/np.median(M0) - K0 = np.exp(-M0/reg0) + # M0 = M0/np.median(M0) + K0 = np.exp(-M0 / reg0) old = h0 - err=1 - cpt=0 - #log = {'niter':0, 'all_err':[]} + err = 1 + cpt = 0 + # log = {'niter':0, 'all_err':[]} if log: - log={'err':[]} - - - while (err>stopThr and cpt<numItermax): - K = projC(K,a) - K0 = projC(K0,h0) - new = np.sum(K0,axis=1) - inv_new = np.dot(D,new) # we recombine the current selection from dictionnary - other = np.sum(K,axis=1) - delta = np.exp(alpha*np.log(other)+(1-alpha)*np.log(inv_new)) # geometric interpolation - K = projR(K,delta) - K0 = np.dot(np.diag(np.dot(D.T,delta/inv_new)),K0) - - err=np.linalg.norm(np.sum(K0,axis=1)-old) + log = {'err': []} + + while (err > stopThr and cpt < numItermax): + K = projC(K, a) + K0 = projC(K0, h0) + new = np.sum(K0, axis=1) + # we recombine the current selection from dictionnary + inv_new = np.dot(D, new) + other = np.sum(K, axis=1) + # geometric interpolation + delta = np.exp(alpha * np.log(other) + (1 - alpha) * np.log(inv_new)) + K = projR(K, delta) + K0 = np.dot(np.diag(np.dot(D.T, delta / inv_new)), K0) + + err = np.linalg.norm(np.sum(K0, axis=1) - old) old = new if log: log['err'].append(err) if verbose: - if cpt%200 ==0: - print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19) - print('{:5d}|{:8e}|'.format(cpt,err)) + if cpt % 200 == 0: + print('{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) + print('{:5d}|{:8e}|'.format(cpt, err)) - cpt = cpt+1 + cpt = cpt + 1 if log: - log['niter']=cpt - return np.sum(K0,axis=1),log + log['niter'] = cpt + return np.sum(K0, axis=1), log else: - return np.sum(K0,axis=1) + return np.sum(K0, axis=1) @@ -3,22 +3,34 @@ Domain adaptation with optimal transport """ +# Author: Remi Flamary <remi.flamary@unice.fr> +# Nicolas Courty <ncourty@irisa.fr> +# Michael Perrot <michael.perrot@univ-st-etienne.fr> +# +# License: MIT License + import numpy as np + from .bregman import sinkhorn from .lp import emd -from .utils import unif,dist,kernel +from .utils import unif, dist, kernel, cost_normalization +from .utils import check_params, deprecated, BaseEstimator from .optim import cg from .optim import gcg -def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerItermax = 200,stopInnerThr=1e-9,verbose=False,log=False): +def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, + numInnerItermax=200, stopInnerThr=1e-9, verbose=False, + log=False): """ - Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization + Solve the entropic regularization optimal transport problem with nonconvex + group lasso regularization The function solves the following optimization problem: .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ \eta \Omega_g(\gamma) + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma) + + \eta \Omega_g(\gamma) s.t. \gamma 1 = a @@ -28,11 +40,16 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter where : - M is the (ns,nt) metric cost matrix - - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - :math:`\Omega_g` is the group lasso regulaization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` where :math:`\mathcal{I}_c` are the index of samples from class c in the source domain. + - :math:`\Omega_e` is the entropic regularization term + :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega_g` is the group lasso regulaization term + :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` + where :math:`\mathcal{I}_c` are the index of samples from class c + in the source domain. - a and b are source and target weights (sum to 1) - The algorithm used for solving the problem is the generalised conditional gradient as proposed in [5]_ [7]_ + The algorithm used for solving the problem is the generalised conditional + gradient as proposed in [5]_ [7]_ Parameters @@ -72,8 +89,13 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter References ---------- - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 - .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567. + .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE + Transactions on Pattern Analysis and Machine Intelligence , + vol.PP, no.99, pp.1-1 + .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence + and applications. arXiv preprint arXiv:1510.06567. See Also -------- @@ -94,7 +116,7 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter W = np.zeros(M.shape) for cpt in range(numItermax): - Mreg = M + eta*W + Mreg = M + eta * W transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax, stopThr=stopInnerThr) # the transport has been computed. Check if classes are really @@ -102,19 +124,24 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter W = np.ones(M.shape) for (i, c) in enumerate(classes): majs = np.sum(transp[indices_labels[i]], axis=0) - majs = p*((majs+epsilon)**(p-1)) + majs = p * ((majs + epsilon)**(p - 1)) W[indices_labels[i]] = majs return transp -def sinkhorn_l1l2_gl(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerItermax = 200,stopInnerThr=1e-9,verbose=False,log=False): + +def sinkhorn_l1l2_gl(a, labels_a, b, M, reg, eta=0.1, numItermax=10, + numInnerItermax=200, stopInnerThr=1e-9, verbose=False, + log=False): """ - Solve the entropic regularization optimal transport problem with group lasso regularization + Solve the entropic regularization optimal transport problem with group + lasso regularization The function solves the following optimization problem: .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ \eta \Omega_g(\gamma) + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ + \eta \Omega_g(\gamma) s.t. \gamma 1 = a @@ -124,11 +151,16 @@ def sinkhorn_l1l2_gl(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter where : - M is the (ns,nt) metric cost matrix - - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - :math:`\Omega_g` is the group lasso regulaization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^2` where :math:`\mathcal{I}_c` are the index of samples from class c in the source domain. + - :math:`\Omega_e` is the entropic regularization term + :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega_g` is the group lasso regulaization term + :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^2` + where :math:`\mathcal{I}_c` are the index of samples from class + c in the source domain. - a and b are source and target weights (sum to 1) - The algorithm used for solving the problem is the generalised conditional gradient as proposed in [5]_ [7]_ + The algorithm used for solving the problem is the generalised conditional + gradient as proposed in [5]_ [7]_ Parameters @@ -168,46 +200,54 @@ def sinkhorn_l1l2_gl(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter References ---------- - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 - .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567. + .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE Transactions + on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 + .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence and + applications. arXiv preprint arXiv:1510.06567. See Also -------- ot.optim.gcg : Generalized conditional gradient for OT problems """ - lstlab=np.unique(labels_a) + lstlab = np.unique(labels_a) def f(G): - res=0 + res = 0 for i in range(G.shape[1]): for lab in lstlab: - temp=G[labels_a==lab,i] - res+=np.linalg.norm(temp) + temp = G[labels_a == lab, i] + res += np.linalg.norm(temp) return res def df(G): - W=np.zeros(G.shape) + W = np.zeros(G.shape) for i in range(G.shape[1]): for lab in lstlab: - temp=G[labels_a==lab,i] - n=np.linalg.norm(temp) + temp = G[labels_a == lab, i] + n = np.linalg.norm(temp) if n: - W[labels_a==lab,i]=temp/n + W[labels_a == lab, i] = temp / n return W - - return gcg(a,b,M,reg,eta,f,df,G0=None,numItermax = numItermax,numInnerItermax=numInnerItermax, stopThr=stopInnerThr,verbose=verbose,log=log) + return gcg(a, b, M, reg, eta, f, df, G0=None, numItermax=numItermax, + numInnerItermax=numInnerItermax, stopThr=stopInnerThr, + verbose=verbose, log=log) - -def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbose2=False,numItermax = 100,numInnerItermax = 10,stopInnerThr=1e-6,stopThr=1e-5,log=False,**kwargs): +def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False, + verbose2=False, numItermax=100, numInnerItermax=10, + stopInnerThr=1e-6, stopThr=1e-5, log=False, + **kwargs): """Joint OT and linear mapping estimation as proposed in [8] The function solves the following optimization problem: .. math:: - \min_{\gamma,L}\quad \|L(X_s) -n_s\gamma X_t\|^2_F + \mu<\gamma,M>_F + \eta \|L -I\|^2_F + \min_{\gamma,L}\quad \|L(X_s) -n_s\gamma X_t\|^2_F + + \mu<\gamma,M>_F + \eta \|L -I\|^2_F s.t. \gamma 1 = a @@ -216,8 +256,10 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos \gamma\geq 0 where : - - M is the (ns,nt) squared euclidean cost matrix between samples in Xs and Xt (scaled by ns) - - :math:`L` is a dxd linear operator that approximates the barycentric mapping + - M is the (ns,nt) squared euclidean cost matrix between samples in + Xs and Xt (scaled by ns) + - :math:`L` is a dxd linear operator that approximates the barycentric + mapping - :math:`I` is the identity matrix (neutral linear mapping) - a and b are uniform source and target weights @@ -272,7 +314,9 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos References ---------- - .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard, "Mapping estimation for discrete optimal transport", Neural Information Processing Systems (NIPS), 2016. + .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard, + "Mapping estimation for discrete optimal transport", + Neural Information Processing Systems (NIPS), 2016. See Also -------- @@ -281,103 +325,116 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos """ - ns,nt,d=xs.shape[0],xt.shape[0],xt.shape[1] + ns, nt, d = xs.shape[0], xt.shape[0], xt.shape[1] if bias: - xs1=np.hstack((xs,np.ones((ns,1)))) - xstxs=xs1.T.dot(xs1) - I=np.eye(d+1) - I[-1]=0 - I0=I[:,:-1] - sel=lambda x : x[:-1,:] + xs1 = np.hstack((xs, np.ones((ns, 1)))) + xstxs = xs1.T.dot(xs1) + I = np.eye(d + 1) + I[-1] = 0 + I0 = I[:, :-1] + + def sel(x): + return x[:-1, :] else: - xs1=xs - xstxs=xs1.T.dot(xs1) - I=np.eye(d) - I0=I - sel=lambda x : x + xs1 = xs + xstxs = xs1.T.dot(xs1) + I = np.eye(d) + I0 = I + + def sel(x): + return x if log: - log={'err':[]} + log = {'err': []} - a,b=unif(ns),unif(nt) - M=dist(xs,xt)*ns - G=emd(a,b,M) + a, b = unif(ns), unif(nt) + M = dist(xs, xt) * ns + G = emd(a, b, M) - vloss=[] + vloss = [] - def loss(L,G): + def loss(L, G): """Compute full loss""" - return np.sum((xs1.dot(L)-ns*G.dot(xt))**2)+mu*np.sum(G*M)+eta*np.sum(sel(L-I0)**2) + return np.sum((xs1.dot(L) - ns * G.dot(xt))**2) + mu * np.sum(G * M) + eta * np.sum(sel(L - I0)**2) def solve_L(G): """ solve L problem with fixed G (least square)""" - xst=ns*G.dot(xt) - return np.linalg.solve(xstxs+eta*I,xs1.T.dot(xst)+eta*I0) + xst = ns * G.dot(xt) + return np.linalg.solve(xstxs + eta * I, xs1.T.dot(xst) + eta * I0) - def solve_G(L,G0): + def solve_G(L, G0): """Update G with CG algorithm""" - xsi=xs1.dot(L) + xsi = xs1.dot(L) + def f(G): - return np.sum((xsi-ns*G.dot(xt))**2) + return np.sum((xsi - ns * G.dot(xt))**2) + def df(G): - return -2*ns*(xsi-ns*G.dot(xt)).dot(xt.T) - G=cg(a,b,M,1.0/mu,f,df,G0=G0,numItermax=numInnerItermax,stopThr=stopInnerThr) + return -2 * ns * (xsi - ns * G.dot(xt)).dot(xt.T) + G = cg(a, b, M, 1.0 / mu, f, df, G0=G0, + numItermax=numInnerItermax, stopThr=stopInnerThr) return G + L = solve_L(G) - L=solve_L(G) - - vloss.append(loss(L,G)) + vloss.append(loss(L, G)) if verbose: - print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32) - print('{:5d}|{:8e}|{:8e}'.format(0,vloss[-1],0)) - + print('{:5s}|{:12s}|{:8s}'.format( + 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32) + print('{:5d}|{:8e}|{:8e}'.format(0, vloss[-1], 0)) # init loop - if numItermax>0: - loop=1 + if numItermax > 0: + loop = 1 else: - loop=0 - it=0 + loop = 0 + it = 0 while loop: - it+=1 + it += 1 # update G - G=solve_G(L,G) + G = solve_G(L, G) - #update L - L=solve_L(G) + # update L + L = solve_L(G) - vloss.append(loss(L,G)) + vloss.append(loss(L, G)) - if it>=numItermax: - loop=0 + if it >= numItermax: + loop = 0 - if abs(vloss[-1]-vloss[-2])/abs(vloss[-2])<stopThr: - loop=0 + if abs(vloss[-1] - vloss[-2]) / abs(vloss[-2]) < stopThr: + loop = 0 if verbose: - if it%20==0: - print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32) - print('{:5d}|{:8e}|{:8e}'.format(it,vloss[-1],(vloss[-1]-vloss[-2])/abs(vloss[-2]))) + if it % 20 == 0: + print('{:5s}|{:12s}|{:8s}'.format( + 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32) + print('{:5d}|{:8e}|{:8e}'.format( + it, vloss[-1], (vloss[-1] - vloss[-2]) / abs(vloss[-2]))) if log: - log['loss']=vloss - return G,L,log + log['loss'] = vloss + return G, L, log else: - return G,L + return G, L -def joint_OT_mapping_kernel(xs,xt,mu=1,eta=0.001,kerneltype='gaussian',sigma=1,bias=False,verbose=False,verbose2=False,numItermax = 100,numInnerItermax = 10,stopInnerThr=1e-6,stopThr=1e-5,log=False,**kwargs): +def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian', + sigma=1, bias=False, verbose=False, verbose2=False, + numItermax=100, numInnerItermax=10, + stopInnerThr=1e-6, stopThr=1e-5, log=False, + **kwargs): """Joint OT and nonlinear mapping estimation with kernels as proposed in [8] The function solves the following optimization problem: .. math:: - \min_{\gamma,L\in\mathcal{H}}\quad \|L(X_s) -n_s\gamma X_t\|^2_F + \mu<\gamma,M>_F + \eta \|L\|^2_\mathcal{H} + \min_{\gamma,L\in\mathcal{H}}\quad \|L(X_s) - + n_s\gamma X_t\|^2_F + \mu<\gamma,M>_F + \eta \|L\|^2_\mathcal{H} s.t. \gamma 1 = a @@ -386,8 +443,10 @@ def joint_OT_mapping_kernel(xs,xt,mu=1,eta=0.001,kerneltype='gaussian',sigma=1,b \gamma\geq 0 where : - - M is the (ns,nt) squared euclidean cost matrix between samples in Xs and Xt (scaled by ns) - - :math:`L` is a ns x d linear operator on a kernel matrix that approximates the barycentric mapping + - M is the (ns,nt) squared euclidean cost matrix between samples in + Xs and Xt (scaled by ns) + - :math:`L` is a ns x d linear operator on a kernel matrix that + approximates the barycentric mapping - a and b are uniform source and target weights The problem consist in solving jointly an optimal transport matrix @@ -445,7 +504,9 @@ def joint_OT_mapping_kernel(xs,xt,mu=1,eta=0.001,kerneltype='gaussian',sigma=1,b References ---------- - .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard, "Mapping estimation for discrete optimal transport", Neural Information Processing Systems (NIPS), 2016. + .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard, + "Mapping estimation for discrete optimal transport", + Neural Information Processing Systems (NIPS), 2016. See Also -------- @@ -454,161 +515,170 @@ def joint_OT_mapping_kernel(xs,xt,mu=1,eta=0.001,kerneltype='gaussian',sigma=1,b """ - ns,nt=xs.shape[0],xt.shape[0] + ns, nt = xs.shape[0], xt.shape[0] - K=kernel(xs,xs,method=kerneltype,sigma=sigma) + K = kernel(xs, xs, method=kerneltype, sigma=sigma) if bias: - K1=np.hstack((K,np.ones((ns,1)))) - I=np.eye(ns+1) - I[-1]=0 - Kp=np.eye(ns+1) - Kp[:ns,:ns]=K + K1 = np.hstack((K, np.ones((ns, 1)))) + I = np.eye(ns + 1) + I[-1] = 0 + Kp = np.eye(ns + 1) + Kp[:ns, :ns] = K # ls regu - #K0 = K1.T.dot(K1)+eta*I - #Kreg=I + # K0 = K1.T.dot(K1)+eta*I + # Kreg=I # RKHS regul - K0 = K1.T.dot(K1)+eta*Kp - Kreg=Kp + K0 = K1.T.dot(K1) + eta * Kp + Kreg = Kp else: - K1=K - I=np.eye(ns) + K1 = K + I = np.eye(ns) # ls regul - #K0 = K1.T.dot(K1)+eta*I - #Kreg=I + # K0 = K1.T.dot(K1)+eta*I + # Kreg=I # proper kernel ridge - K0=K+eta*I - Kreg=K - - - + K0 = K + eta * I + Kreg = K if log: - log={'err':[]} + log = {'err': []} - a,b=unif(ns),unif(nt) - M=dist(xs,xt)*ns - G=emd(a,b,M) + a, b = unif(ns), unif(nt) + M = dist(xs, xt) * ns + G = emd(a, b, M) - vloss=[] + vloss = [] - def loss(L,G): + def loss(L, G): """Compute full loss""" - return np.sum((K1.dot(L)-ns*G.dot(xt))**2)+mu*np.sum(G*M)+eta*np.trace(L.T.dot(Kreg).dot(L)) + return np.sum((K1.dot(L) - ns * G.dot(xt))**2) + mu * np.sum(G * M) + eta * np.trace(L.T.dot(Kreg).dot(L)) def solve_L_nobias(G): """ solve L problem with fixed G (least square)""" - xst=ns*G.dot(xt) - return np.linalg.solve(K0,xst) + xst = ns * G.dot(xt) + return np.linalg.solve(K0, xst) def solve_L_bias(G): """ solve L problem with fixed G (least square)""" - xst=ns*G.dot(xt) - return np.linalg.solve(K0,K1.T.dot(xst)) + xst = ns * G.dot(xt) + return np.linalg.solve(K0, K1.T.dot(xst)) - def solve_G(L,G0): + def solve_G(L, G0): """Update G with CG algorithm""" - xsi=K1.dot(L) + xsi = K1.dot(L) + def f(G): - return np.sum((xsi-ns*G.dot(xt))**2) + return np.sum((xsi - ns * G.dot(xt))**2) + def df(G): - return -2*ns*(xsi-ns*G.dot(xt)).dot(xt.T) - G=cg(a,b,M,1.0/mu,f,df,G0=G0,numItermax=numInnerItermax,stopThr=stopInnerThr) + return -2 * ns * (xsi - ns * G.dot(xt)).dot(xt.T) + G = cg(a, b, M, 1.0 / mu, f, df, G0=G0, + numItermax=numInnerItermax, stopThr=stopInnerThr) return G if bias: - solve_L=solve_L_bias + solve_L = solve_L_bias else: - solve_L=solve_L_nobias + solve_L = solve_L_nobias - L=solve_L(G) + L = solve_L(G) - vloss.append(loss(L,G)) + vloss.append(loss(L, G)) if verbose: - print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32) - print('{:5d}|{:8e}|{:8e}'.format(0,vloss[-1],0)) - + print('{:5s}|{:12s}|{:8s}'.format( + 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32) + print('{:5d}|{:8e}|{:8e}'.format(0, vloss[-1], 0)) # init loop - if numItermax>0: - loop=1 + if numItermax > 0: + loop = 1 else: - loop=0 - it=0 + loop = 0 + it = 0 while loop: - it+=1 + it += 1 # update G - G=solve_G(L,G) + G = solve_G(L, G) - #update L - L=solve_L(G) + # update L + L = solve_L(G) - vloss.append(loss(L,G)) + vloss.append(loss(L, G)) - if it>=numItermax: - loop=0 + if it >= numItermax: + loop = 0 - if abs(vloss[-1]-vloss[-2])/abs(vloss[-2])<stopThr: - loop=0 + if abs(vloss[-1] - vloss[-2]) / abs(vloss[-2]) < stopThr: + loop = 0 if verbose: - if it%20==0: - print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32) - print('{:5d}|{:8e}|{:8e}'.format(it,vloss[-1],(vloss[-1]-vloss[-2])/abs(vloss[-2]))) + if it % 20 == 0: + print('{:5s}|{:12s}|{:8s}'.format( + 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32) + print('{:5d}|{:8e}|{:8e}'.format( + it, vloss[-1], (vloss[-1] - vloss[-2]) / abs(vloss[-2]))) if log: - log['loss']=vloss - return G,L,log + log['loss'] = vloss + return G, L, log else: - return G,L + return G, L +@deprecated("The class OTDA is deprecated in 0.3.1 and will be " + "removed in 0.5" + "\n\tfor standard transport use class EMDTransport instead.") class OTDA(object): + """Class for domain adaptation with optimal transport as proposed in [5] References ---------- - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 + .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE Transactions on + Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 """ - def __init__(self,metric='sqeuclidean'): + def __init__(self, metric='sqeuclidean', norm=None): """ Class initialization""" - self.xs=0 - self.xt=0 - self.G=0 - self.metric=metric - self.computed=False - - - def fit(self,xs,xt,ws=None,wt=None,norm=None): - """ Fit domain adaptation between samples is xs and xt (with optional weights)""" - self.xs=xs - self.xt=xt + self.xs = 0 + self.xt = 0 + self.G = 0 + self.metric = metric + self.norm = norm + self.computed = False + + def fit(self, xs, xt, ws=None, wt=None, max_iter=100000): + """Fit domain adaptation between samples is xs and xt + (with optional weights)""" + self.xs = xs + self.xt = xt if wt is None: - wt=unif(xt.shape[0]) + wt = unif(xt.shape[0]) if ws is None: - ws=unif(xs.shape[0]) + ws = unif(xs.shape[0]) - self.ws=ws - self.wt=wt + self.ws = ws + self.wt = wt - self.M=dist(xs,xt,metric=self.metric) - self.normalizeM(norm) - self.G=emd(ws,wt,self.M) - self.computed=True + self.M = dist(xs, xt, metric=self.metric) + self.M = cost_normalization(self.M, self.norm) + self.G = emd(ws, wt, self.M, max_iter) + self.computed = True - def interp(self,direction=1): + def interp(self, direction=1): """Barycentric interpolation for the source (1) or target (-1) samples This Barycentric interpolation solves for each source (resp target) @@ -623,28 +693,28 @@ class OTDA(object): metric could be used in the future. """ - if direction>0: # >0 then source to target - G=self.G - w=self.ws.reshape((self.xs.shape[0],1)) - x=self.xt + if direction > 0: # >0 then source to target + G = self.G + w = self.ws.reshape((self.xs.shape[0], 1)) + x = self.xt else: - G=self.G.T - w=self.wt.reshape((self.xt.shape[0],1)) - x=self.xs + G = self.G.T + w = self.wt.reshape((self.xt.shape[0], 1)) + x = self.xs if self.computed: - if self.metric=='sqeuclidean': - return np.dot(G/w,x) # weighted mean + if self.metric == 'sqeuclidean': + return np.dot(G / w, x) # weighted mean else: - print("Warning, metric not handled yet, using weighted average") - return np.dot(G/w,x) # weighted mean + print( + "Warning, metric not handled yet, using weighted average") + return np.dot(G / w, x) # weighted mean return None else: print("Warning, model not fitted yet, returning None") return None - - def predict(self,x,direction=1): + def predict(self, x, direction=1): """ Out of sample mapping using the formulation from [6] For each sample x to map, it finds the nearest source sample xs and @@ -654,183 +724,1019 @@ class OTDA(object): References ---------- - .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014). Regularized discrete optimal transport. SIAM Journal on Imaging Sciences, 7(3), 1853-1882. + .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014). + Regularized discrete optimal transport. SIAM Journal on Imaging + Sciences, 7(3), 1853-1882. """ - if direction>0: # >0 then source to target - xf=self.xt - x0=self.xs + if direction > 0: # >0 then source to target + xf = self.xt + x0 = self.xs else: - xf=self.xs - x0=self.xt - - D0=dist(x,x0) # dist netween new samples an source - idx=np.argmin(D0,1) # closest one - xf=self.interp(direction)# interp the source samples - return xf[idx,:]+x-x0[idx,:] # aply the delta to the interpolation - - def normalizeM(self, norm): - """ Apply normalization to the loss matrix - - - Parameters - ---------- - norm : str - type of normalization from 'median','max','log','loglog' - - """ - - if norm == "median": - self.M /= float(np.median(self.M)) - elif norm == "max": - self.M /= float(np.max(self.M)) - elif norm == "log": - self.M = np.log(1 + self.M) - elif norm == "loglog": - self.M = np.log(1 + np.log(1 + self.M)) - + xf = self.xs + x0 = self.xt + D0 = dist(x, x0) # dist netween new samples an source + idx = np.argmin(D0, 1) # closest one + xf = self.interp(direction) # interp the source samples + # aply the delta to the interpolation + return xf[idx, :] + x - x0[idx, :] + +@deprecated("The class OTDA_sinkhorn is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class SinkhornTransport instead.") class OTDA_sinkhorn(OTDA): - """Class for domain adaptation with optimal transport with entropic regularization""" - def fit(self,xs,xt,reg=1,ws=None,wt=None,norm=None,**kwargs): - """ Fit regularized domain adaptation between samples is xs and xt (with optional weights)""" - self.xs=xs - self.xt=xt + """Class for domain adaptation with optimal transport with entropic + regularization + + + """ + + def fit(self, xs, xt, reg=1, ws=None, wt=None, **kwargs): + """Fit regularized domain adaptation between samples is xs and xt + (with optional weights)""" + self.xs = xs + self.xt = xt if wt is None: - wt=unif(xt.shape[0]) + wt = unif(xt.shape[0]) if ws is None: - ws=unif(xs.shape[0]) + ws = unif(xs.shape[0]) - self.ws=ws - self.wt=wt + self.ws = ws + self.wt = wt - self.M=dist(xs,xt,metric=self.metric) - self.normalizeM(norm) - self.G=sinkhorn(ws,wt,self.M,reg,**kwargs) - self.computed=True + self.M = dist(xs, xt, metric=self.metric) + self.M = cost_normalization(self.M, self.norm) + self.G = sinkhorn(ws, wt, self.M, reg, **kwargs) + self.computed = True +@deprecated("The class OTDA_lpl1 is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class SinkhornLpl1Transport instead.") class OTDA_lpl1(OTDA): - """Class for domain adaptation with optimal transport with entropic and group regularization""" + """Class for domain adaptation with optimal transport with entropic and + group regularization""" - def fit(self,xs,ys,xt,reg=1,eta=1,ws=None,wt=None,norm=None,**kwargs): - """ Fit regularized domain adaptation between samples is xs and xt (with optional weights), See ot.da.sinkhorn_lpl1_mm for fit parameters""" - self.xs=xs - self.xt=xt + def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, **kwargs): + """Fit regularized domain adaptation between samples is xs and xt + (with optional weights), See ot.da.sinkhorn_lpl1_mm for fit + parameters""" + self.xs = xs + self.xt = xt if wt is None: - wt=unif(xt.shape[0]) + wt = unif(xt.shape[0]) if ws is None: - ws=unif(xs.shape[0]) + ws = unif(xs.shape[0]) + + self.ws = ws + self.wt = wt - self.ws=ws - self.wt=wt + self.M = dist(xs, xt, metric=self.metric) + self.M = cost_normalization(self.M, self.norm) + self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M, reg, eta, **kwargs) + self.computed = True - self.M=dist(xs,xt,metric=self.metric) - self.normalizeM(norm) - self.G=sinkhorn_lpl1_mm(ws,ys,wt,self.M,reg,eta,**kwargs) - self.computed=True +@deprecated("The class OTDA_l1L2 is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class SinkhornL1l2Transport instead.") class OTDA_l1l2(OTDA): - """Class for domain adaptation with optimal transport with entropic and group lasso regularization""" + """Class for domain adaptation with optimal transport with entropic + and group lasso regularization""" - def fit(self,xs,ys,xt,reg=1,eta=1,ws=None,wt=None,norm=None,**kwargs): - """ Fit regularized domain adaptation between samples is xs and xt (with optional weights), See ot.da.sinkhorn_lpl1_gl for fit parameters""" - self.xs=xs - self.xt=xt + def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, **kwargs): + """Fit regularized domain adaptation between samples is xs and xt + (with optional weights), See ot.da.sinkhorn_lpl1_gl for fit + parameters""" + self.xs = xs + self.xt = xt if wt is None: - wt=unif(xt.shape[0]) + wt = unif(xt.shape[0]) if ws is None: - ws=unif(xs.shape[0]) + ws = unif(xs.shape[0]) + + self.ws = ws + self.wt = wt - self.ws=ws - self.wt=wt + self.M = dist(xs, xt, metric=self.metric) + self.M = cost_normalization(self.M, self.norm) + self.G = sinkhorn_l1l2_gl(ws, ys, wt, self.M, reg, eta, **kwargs) + self.computed = True - self.M=dist(xs,xt,metric=self.metric) - self.normalizeM(norm) - self.G=sinkhorn_l1l2_gl(ws,ys,wt,self.M,reg,eta,**kwargs) - self.computed=True +@deprecated("The class OTDA_mapping_linear is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class MappingTransport instead.") class OTDA_mapping_linear(OTDA): - """Class for optimal transport with joint linear mapping estimation as in [8]""" + """Class for optimal transport with joint linear mapping estimation as in + [8] + """ def __init__(self): """ Class initialization""" + self.xs = 0 + self.xt = 0 + self.G = 0 + self.L = 0 + self.bias = False + self.computed = False + self.metric = 'sqeuclidean' - self.xs=0 - self.xt=0 - self.G=0 - self.L=0 - self.bias=False - self.computed=False - self.metric='sqeuclidean' - - def fit(self,xs,xt,mu=1,eta=1,bias=False,**kwargs): + def fit(self, xs, xt, mu=1, eta=1, bias=False, **kwargs): """ Fit domain adaptation between samples is xs and xt (with optional weights)""" - self.xs=xs - self.xt=xt - self.bias=bias + self.xs = xs + self.xt = xt + self.bias = bias + self.ws = unif(xs.shape[0]) + self.wt = unif(xt.shape[0]) - self.ws=unif(xs.shape[0]) - self.wt=unif(xt.shape[0]) - - self.G,self.L=joint_OT_mapping_linear(xs,xt,mu=mu,eta=eta,bias=bias,**kwargs) - self.computed=True + self.G, self.L = joint_OT_mapping_linear( + xs, xt, mu=mu, eta=eta, bias=bias, **kwargs) + self.computed = True def mapping(self): return lambda x: self.predict(x) - - def predict(self,x): + def predict(self, x): """ Out of sample mapping estimated during the call to fit""" if self.computed: if self.bias: - x=np.hstack((x,np.ones((x.shape[0],1)))) - return x.dot(self.L) # aply the delta to the interpolation + x = np.hstack((x, np.ones((x.shape[0], 1)))) + return x.dot(self.L) # aply the delta to the interpolation else: print("Warning, model not fitted yet, returning None") return None -class OTDA_mapping_kernel(OTDA_mapping_linear): - """Class for optimal transport with joint nonlinear mapping estimation as in [8]""" +@deprecated("The class OTDA_mapping_kernel is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class MappingTransport instead.") +class OTDA_mapping_kernel(OTDA_mapping_linear): + """Class for optimal transport with joint nonlinear mapping + estimation as in [8]""" - def fit(self,xs,xt,mu=1,eta=1,bias=False,kerneltype='gaussian',sigma=1,**kwargs): + def fit(self, xs, xt, mu=1, eta=1, bias=False, kerneltype='gaussian', + sigma=1, **kwargs): """ Fit domain adaptation between samples is xs and xt """ - self.xs=xs - self.xt=xt - self.bias=bias - - self.ws=unif(xs.shape[0]) - self.wt=unif(xt.shape[0]) - self.kernel=kerneltype - self.sigma=sigma - self.kwargs=kwargs - + self.xs = xs + self.xt = xt + self.bias = bias - self.G,self.L=joint_OT_mapping_kernel(xs,xt,mu=mu,eta=eta,bias=bias,**kwargs) - self.computed=True + self.ws = unif(xs.shape[0]) + self.wt = unif(xt.shape[0]) + self.kernel = kerneltype + self.sigma = sigma + self.kwargs = kwargs + self.G, self.L = joint_OT_mapping_kernel( + xs, xt, mu=mu, eta=eta, bias=bias, **kwargs) + self.computed = True - def predict(self,x): + def predict(self, x): """ Out of sample mapping estimated during the call to fit""" if self.computed: - K=kernel(x,self.xs,method=self.kernel,sigma=self.sigma,**self.kwargs) + K = kernel( + x, self.xs, method=self.kernel, sigma=self.sigma, + **self.kwargs) if self.bias: - K=np.hstack((K,np.ones((x.shape[0],1)))) + K = np.hstack((K, np.ones((x.shape[0], 1)))) return K.dot(self.L) else: print("Warning, model not fitted yet, returning None") - return None
\ No newline at end of file + return None + + +def distribution_estimation_uniform(X): + """estimates a uniform distribution from an array of samples X + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + The array of samples + + Returns + ------- + mu : array-like, shape (n_samples,) + The uniform distribution estimated from X + """ + + return unif(X.shape[0]) + + +class BaseTransport(BaseEstimator): + """Base class for OTDA objects + + Notes + ----- + All estimators should specify all the parameters that can be set + at the class level in their ``__init__`` as explicit keyword + arguments (no ``*args`` or ``**kwargs``). + + fit method should: + - estimate a cost matrix and store it in a `cost_` attribute + - estimate a coupling matrix and store it in a `coupling_` + attribute + - estimate distributions from source and target data and store them in + mu_s and mu_t attributes + - store Xs and Xt in attributes to be used later on in transform and + inverse_transform methods + + transform method should always get as input a Xs parameter + inverse_transform method should always get as input a Xt parameter + """ + + def fit(self, Xs=None, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + + Returns + ------- + self : object + Returns self. + """ + + # check the necessary inputs parameters are here + if check_params(Xs=Xs, Xt=Xt): + + # pairwise distance + self.cost_ = dist(Xs, Xt, metric=self.metric) + self.cost_ = cost_normalization(self.cost_, self.norm) + + if (ys is not None) and (yt is not None): + + if self.limit_max != np.infty: + self.limit_max = self.limit_max * np.max(self.cost_) + + # assumes labeled source samples occupy the first rows + # and labeled target samples occupy the first columns + classes = np.unique(ys) + for c in classes: + idx_s = np.where((ys != c) & (ys != -1)) + idx_t = np.where(yt == c) + + # all the coefficients corresponding to a source sample + # and a target sample : + # with different labels get a infinite + for j in idx_t[0]: + self.cost_[idx_s[0], j] = self.limit_max + + # distribution estimation + self.mu_s = self.distribution_estimation(Xs) + self.mu_t = self.distribution_estimation(Xt) + + # store arrays of samples + self.xs_ = Xs + self.xt_ = Xt + + return self + + def fit_transform(self, Xs=None, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) and transports source samples Xs onto target + ones Xt + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + + Returns + ------- + transp_Xs : array-like, shape (n_source_samples, n_features) + The source samples samples. + """ + + return self.fit(Xs, ys, Xt, yt).transform(Xs, ys, Xt, yt) + + def transform(self, Xs=None, ys=None, Xt=None, yt=None, batch_size=128): + """Transports source samples Xs onto target ones Xt + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + batch_size : int, optional (default=128) + The batch size for out of sample inverse transform + + Returns + ------- + transp_Xs : array-like, shape (n_source_samples, n_features) + The transport source samples. + """ + + # check the necessary inputs parameters are here + if check_params(Xs=Xs): + + if np.array_equal(self.xs_, Xs): + + # perform standard barycentric mapping + transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None] + + # set nans to 0 + transp[~ np.isfinite(transp)] = 0 + + # compute transported samples + transp_Xs = np.dot(transp, self.xt_) + else: + # perform out of sample mapping + indices = np.arange(Xs.shape[0]) + batch_ind = [ + indices[i:i + batch_size] + for i in range(0, len(indices), batch_size)] + + transp_Xs = [] + for bi in batch_ind: + + # get the nearest neighbor in the source domain + D0 = dist(Xs[bi], self.xs_) + idx = np.argmin(D0, axis=1) + + # transport the source samples + transp = self.coupling_ / np.sum( + self.coupling_, 1)[:, None] + transp[~ np.isfinite(transp)] = 0 + transp_Xs_ = np.dot(transp, self.xt_) + + # define the transported points + transp_Xs_ = transp_Xs_[idx, :] + Xs[bi] - self.xs_[idx, :] + + transp_Xs.append(transp_Xs_) + + transp_Xs = np.concatenate(transp_Xs, axis=0) + + return transp_Xs + + def inverse_transform(self, Xs=None, ys=None, Xt=None, yt=None, + batch_size=128): + """Transports target samples Xt onto target samples Xs + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + batch_size : int, optional (default=128) + The batch size for out of sample inverse transform + + Returns + ------- + transp_Xt : array-like, shape (n_source_samples, n_features) + The transported target samples. + """ + + # check the necessary inputs parameters are here + if check_params(Xt=Xt): + + if np.array_equal(self.xt_, Xt): + + # perform standard barycentric mapping + transp_ = self.coupling_.T / np.sum(self.coupling_, 0)[:, None] + + # set nans to 0 + transp_[~ np.isfinite(transp_)] = 0 + + # compute transported samples + transp_Xt = np.dot(transp_, self.xs_) + else: + # perform out of sample mapping + indices = np.arange(Xt.shape[0]) + batch_ind = [ + indices[i:i + batch_size] + for i in range(0, len(indices), batch_size)] + + transp_Xt = [] + for bi in batch_ind: + + D0 = dist(Xt[bi], self.xt_) + idx = np.argmin(D0, axis=1) + + # transport the target samples + transp_ = self.coupling_.T / np.sum( + self.coupling_, 0)[:, None] + transp_[~ np.isfinite(transp_)] = 0 + transp_Xt_ = np.dot(transp_, self.xs_) + + # define the transported points + transp_Xt_ = transp_Xt_[idx, :] + Xt[bi] - self.xt_[idx, :] + + transp_Xt.append(transp_Xt_) + + transp_Xt = np.concatenate(transp_Xt, axis=0) + + return transp_Xt + + +class SinkhornTransport(BaseTransport): + """Domain Adapatation OT method based on Sinkhorn Algorithm + + Parameters + ---------- + reg_e : float, optional (default=1) + Entropic regularization parameter + max_iter : int, float, optional (default=1000) + The minimum number of iteration before stopping the optimization + algorithm if no it has not converged + tol : float, optional (default=10e-9) + The precision required to stop the optimization algorithm. + mapping : string, optional (default="barycentric") + The kind of mapping to apply to transport samples from a domain into + another one. + if "barycentric" only the samples used to estimate the coupling can + be transported from a domain to another one. + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. + distribution : string, optional (default="uniform") + The kind of distribution estimation to employ + verbose : int, optional (default=0) + Controls the verbosity of the optimization algorithm + log : int, optional (default=0) + Controls the logs of the optimization algorithm + limit_max: float, optional (defaul=np.infty) + Controls the semi supervised mode. Transport between labeled source + and target samples of different classes will exhibit an infinite cost + + Attributes + ---------- + coupling_ : array-like, shape (n_source_samples, n_target_samples) + The optimal coupling + log_ : dictionary + The dictionary of log, empty dic if parameter log is not True + + References + ---------- + .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE Transactions + on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 + .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal + Transport, Advances in Neural Information Processing Systems (NIPS) + 26, 2013 + """ + + def __init__(self, reg_e=1., max_iter=1000, + tol=10e-9, verbose=False, log=False, + metric="sqeuclidean", norm=None, + distribution_estimation=distribution_estimation_uniform, + out_of_sample_map='ferradans', limit_max=np.infty): + + self.reg_e = reg_e + self.max_iter = max_iter + self.tol = tol + self.verbose = verbose + self.log = log + self.metric = metric + self.norm = norm + self.limit_max = limit_max + self.distribution_estimation = distribution_estimation + self.out_of_sample_map = out_of_sample_map + + def fit(self, Xs=None, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + + Returns + ------- + self : object + Returns self. + """ + + super(SinkhornTransport, self).fit(Xs, ys, Xt, yt) + + # coupling estimation + returned_ = sinkhorn( + a=self.mu_s, b=self.mu_t, M=self.cost_, reg=self.reg_e, + numItermax=self.max_iter, stopThr=self.tol, + verbose=self.verbose, log=self.log) + + # deal with the value of log + if self.log: + self.coupling_, self.log_ = returned_ + else: + self.coupling_ = returned_ + self.log_ = dict() + + return self + + +class EMDTransport(BaseTransport): + """Domain Adapatation OT method based on Earth Mover's Distance + + Parameters + ---------- + mapping : string, optional (default="barycentric") + The kind of mapping to apply to transport samples from a domain into + another one. + if "barycentric" only the samples used to estimate the coupling can + be transported from a domain to another one. + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. + distribution : string, optional (default="uniform") + The kind of distribution estimation to employ + verbose : int, optional (default=0) + Controls the verbosity of the optimization algorithm + log : int, optional (default=0) + Controls the logs of the optimization algorithm + limit_max: float, optional (default=10) + Controls the semi supervised mode. Transport between labeled source + and target samples of different classes will exhibit an infinite cost + (10 times the maximum value of the cost matrix) + max_iter : int, optional (default=100000) + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. + + Attributes + ---------- + coupling_ : array-like, shape (n_source_samples, n_target_samples) + The optimal coupling + + References + ---------- + .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE Transactions + on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 + """ + + def __init__(self, metric="sqeuclidean", norm=None, + distribution_estimation=distribution_estimation_uniform, + out_of_sample_map='ferradans', limit_max=10, + max_iter=100000): + + self.metric = metric + self.norm = norm + self.limit_max = limit_max + self.distribution_estimation = distribution_estimation + self.out_of_sample_map = out_of_sample_map + self.max_iter = max_iter + + def fit(self, Xs, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + + Returns + ------- + self : object + Returns self. + """ + + super(EMDTransport, self).fit(Xs, ys, Xt, yt) + + # coupling estimation + self.coupling_ = emd( + a=self.mu_s, b=self.mu_t, M=self.cost_, numItermax=self.max_iter + ) + + return self + + +class SinkhornLpl1Transport(BaseTransport): + """Domain Adapatation OT method based on sinkhorn algorithm + + LpL1 class regularization. + + Parameters + ---------- + reg_e : float, optional (default=1) + Entropic regularization parameter + reg_cl : float, optional (default=0.1) + Class regularization parameter + mapping : string, optional (default="barycentric") + The kind of mapping to apply to transport samples from a domain into + another one. + if "barycentric" only the samples used to estimate the coupling can + be transported from a domain to another one. + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. + distribution : string, optional (default="uniform") + The kind of distribution estimation to employ + max_iter : int, float, optional (default=10) + The minimum number of iteration before stopping the optimization + algorithm if no it has not converged + max_inner_iter : int, float, optional (default=200) + The number of iteration in the inner loop + verbose : int, optional (default=0) + Controls the verbosity of the optimization algorithm + limit_max: float, optional (defaul=np.infty) + Controls the semi supervised mode. Transport between labeled source + and target samples of different classes will exhibit an infinite cost + + Attributes + ---------- + coupling_ : array-like, shape (n_source_samples, n_target_samples) + The optimal coupling + + References + ---------- + + .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE + Transactions on Pattern Analysis and Machine Intelligence , + vol.PP, no.99, pp.1-1 + .. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence + and applications. arXiv preprint arXiv:1510.06567. + + """ + + def __init__(self, reg_e=1., reg_cl=0.1, + max_iter=10, max_inner_iter=200, + tol=10e-9, verbose=False, + metric="sqeuclidean", norm=None, + distribution_estimation=distribution_estimation_uniform, + out_of_sample_map='ferradans', limit_max=np.infty): + + self.reg_e = reg_e + self.reg_cl = reg_cl + self.max_iter = max_iter + self.max_inner_iter = max_inner_iter + self.tol = tol + self.verbose = verbose + self.metric = metric + self.norm = norm + self.distribution_estimation = distribution_estimation + self.out_of_sample_map = out_of_sample_map + self.limit_max = limit_max + + def fit(self, Xs, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + + Returns + ------- + self : object + Returns self. + """ + + # check the necessary inputs parameters are here + if check_params(Xs=Xs, Xt=Xt, ys=ys): + + super(SinkhornLpl1Transport, self).fit(Xs, ys, Xt, yt) + + self.coupling_ = sinkhorn_lpl1_mm( + a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.cost_, + reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter, + numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol, + verbose=self.verbose) + + return self + + +class SinkhornL1l2Transport(BaseTransport): + """Domain Adapatation OT method based on sinkhorn algorithm + + l1l2 class regularization. + + Parameters + ---------- + reg_e : float, optional (default=1) + Entropic regularization parameter + reg_cl : float, optional (default=0.1) + Class regularization parameter + mapping : string, optional (default="barycentric") + The kind of mapping to apply to transport samples from a domain into + another one. + if "barycentric" only the samples used to estimate the coupling can + be transported from a domain to another one. + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. + distribution : string, optional (default="uniform") + The kind of distribution estimation to employ + max_iter : int, float, optional (default=10) + The minimum number of iteration before stopping the optimization + algorithm if no it has not converged + max_inner_iter : int, float, optional (default=200) + The number of iteration in the inner loop + verbose : int, optional (default=0) + Controls the verbosity of the optimization algorithm + log : int, optional (default=0) + Controls the logs of the optimization algorithm + limit_max: float, optional (default=10) + Controls the semi supervised mode. Transport between labeled source + and target samples of different classes will exhibit an infinite cost + (10 times the maximum value of the cost matrix) + + Attributes + ---------- + coupling_ : array-like, shape (n_source_samples, n_target_samples) + The optimal coupling + log_ : dictionary + The dictionary of log, empty dic if parameter log is not True + + References + ---------- + + .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE + Transactions on Pattern Analysis and Machine Intelligence , + vol.PP, no.99, pp.1-1 + .. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence + and applications. arXiv preprint arXiv:1510.06567. + + """ + + def __init__(self, reg_e=1., reg_cl=0.1, + max_iter=10, max_inner_iter=200, + tol=10e-9, verbose=False, log=False, + metric="sqeuclidean", norm=None, + distribution_estimation=distribution_estimation_uniform, + out_of_sample_map='ferradans', limit_max=10): + + self.reg_e = reg_e + self.reg_cl = reg_cl + self.max_iter = max_iter + self.max_inner_iter = max_inner_iter + self.tol = tol + self.verbose = verbose + self.log = log + self.metric = metric + self.norm = norm + self.distribution_estimation = distribution_estimation + self.out_of_sample_map = out_of_sample_map + self.limit_max = limit_max + + def fit(self, Xs, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + + Returns + ------- + self : object + Returns self. + """ + + # check the necessary inputs parameters are here + if check_params(Xs=Xs, Xt=Xt, ys=ys): + + super(SinkhornL1l2Transport, self).fit(Xs, ys, Xt, yt) + + returned_ = sinkhorn_l1l2_gl( + a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.cost_, + reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter, + numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol, + verbose=self.verbose, log=self.log) + + # deal with the value of log + if self.log: + self.coupling_, self.log_ = returned_ + else: + self.coupling_ = returned_ + self.log_ = dict() + + return self + + +class MappingTransport(BaseEstimator): + """MappingTransport: DA methods that aims at jointly estimating a optimal + transport coupling and the associated mapping + + Parameters + ---------- + mu : float, optional (default=1) + Weight for the linear OT loss (>0) + eta : float, optional (default=0.001) + Regularization term for the linear mapping L (>0) + bias : bool, optional (default=False) + Estimate linear mapping with constant bias + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. + kernel : string, optional (default="linear") + The kernel to use either linear or gaussian + sigma : float, optional (default=1) + The gaussian kernel parameter + max_iter : int, optional (default=100) + Max number of BCD iterations + tol : float, optional (default=1e-5) + Stop threshold on relative loss decrease (>0) + max_inner_iter : int, optional (default=10) + Max number of iterations (inner CG solver) + inner_tol : float, optional (default=1e-6) + Stop threshold on error (inner CG solver) (>0) + verbose : bool, optional (default=False) + Print information along iterations + log : bool, optional (default=False) + record log if True + + Attributes + ---------- + coupling_ : array-like, shape (n_source_samples, n_target_samples) + The optimal coupling + mapping_ : array-like, shape (n_features (+ 1), n_features) + (if bias) for kernel == linear + The associated mapping + array-like, shape (n_source_samples (+ 1), n_features) + (if bias) for kernel == gaussian + log_ : dictionary + The dictionary of log, empty dic if parameter log is not True + + References + ---------- + + .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard, + "Mapping estimation for discrete optimal transport", + Neural Information Processing Systems (NIPS), 2016. + + """ + + def __init__(self, mu=1, eta=0.001, bias=False, metric="sqeuclidean", + norm=None, kernel="linear", sigma=1, max_iter=100, tol=1e-5, + max_inner_iter=10, inner_tol=1e-6, log=False, verbose=False, + verbose2=False): + + self.metric = metric + self.norm = norm + self.mu = mu + self.eta = eta + self.bias = bias + self.kernel = kernel + self.sigma = sigma + self.max_iter = max_iter + self.tol = tol + self.max_inner_iter = max_inner_iter + self.inner_tol = inner_tol + self.log = log + self.verbose = verbose + self.verbose2 = verbose2 + + def fit(self, Xs=None, ys=None, Xt=None, yt=None): + """Builds an optimal coupling and estimates the associated mapping + from source and target sets of samples (Xs, ys) and (Xt, yt) + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + + Returns + ------- + self : object + Returns self + """ + + # check the necessary inputs parameters are here + if check_params(Xs=Xs, Xt=Xt): + + self.xs_ = Xs + self.xt_ = Xt + + if self.kernel == "linear": + returned_ = joint_OT_mapping_linear( + Xs, Xt, mu=self.mu, eta=self.eta, bias=self.bias, + verbose=self.verbose, verbose2=self.verbose2, + numItermax=self.max_iter, + numInnerItermax=self.max_inner_iter, stopThr=self.tol, + stopInnerThr=self.inner_tol, log=self.log) + + elif self.kernel == "gaussian": + returned_ = joint_OT_mapping_kernel( + Xs, Xt, mu=self.mu, eta=self.eta, bias=self.bias, + sigma=self.sigma, verbose=self.verbose, + verbose2=self.verbose, numItermax=self.max_iter, + numInnerItermax=self.max_inner_iter, + stopInnerThr=self.inner_tol, stopThr=self.tol, + log=self.log) + + # deal with the value of log + if self.log: + self.coupling_, self.mapping_, self.log_ = returned_ + else: + self.coupling_, self.mapping_ = returned_ + self.log_ = dict() + + return self + + def transform(self, Xs): + """Transports source samples Xs onto target ones Xt + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + + Returns + ------- + transp_Xs : array-like, shape (n_source_samples, n_features) + The transport source samples. + """ + + # check the necessary inputs parameters are here + if check_params(Xs=Xs): + + if np.array_equal(self.xs_, Xs): + # perform standard barycentric mapping + transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None] + + # set nans to 0 + transp[~ np.isfinite(transp)] = 0 + + # compute transported samples + transp_Xs = np.dot(transp, self.xt_) + else: + if self.kernel == "gaussian": + K = kernel(Xs, self.xs_, method=self.kernel, + sigma=self.sigma) + elif self.kernel == "linear": + K = Xs + if self.bias: + K = np.hstack((K, np.ones((Xs.shape[0], 1)))) + transp_Xs = K.dot(self.mapping_) + + return transp_Xs diff --git a/ot/datasets.py b/ot/datasets.py index 7816833..e4fe118 100644 --- a/ot/datasets.py +++ b/ot/datasets.py @@ -2,12 +2,16 @@ Simple example datasets for OT """ +# Author: Remi Flamary <remi.flamary@unice.fr> +# +# License: MIT License + import numpy as np import scipy as sp -def get_1D_gauss(n,m,s): +def get_1D_gauss(n, m, s): """return a 1D histogram for a gaussian distribution (n bins, mean m and std s) Parameters @@ -27,12 +31,12 @@ def get_1D_gauss(n,m,s): 1D histogram for a gaussian distribution """ - x=np.arange(n,dtype=np.float64) - h=np.exp(-(x-m)**2/(2*s**2)) - return h/h.sum() + x = np.arange(n, dtype=np.float64) + h = np.exp(-(x - m)**2 / (2 * s**2)) + return h / h.sum() -def get_2D_samples_gauss(n,m,sigma): +def get_2D_samples_gauss(n, m, sigma): """return n samples drawn from 2D gaussian N(m,sigma) Parameters @@ -52,17 +56,17 @@ def get_2D_samples_gauss(n,m,sigma): n samples drawn from N(m,sigma) """ - if np.isscalar(sigma): - sigma=np.array([sigma,]) - if len(sigma)>1: - P=sp.linalg.sqrtm(sigma) - res= np.random.randn(n,2).dot(P)+m + if np.isscalar(sigma): + sigma = np.array([sigma, ]) + if len(sigma) > 1: + P = sp.linalg.sqrtm(sigma) + res = np.random.randn(n, 2).dot(P) + m else: - res= np.random.randn(n,2)*np.sqrt(sigma)+m + res = np.random.randn(n, 2) * np.sqrt(sigma) + m return res -def get_data_classif(dataset,n,nz=.5,theta=0,**kwargs): +def get_data_classif(dataset, n, nz=.5, theta=0, **kwargs): """ dataset generation for classification problems Parameters @@ -84,48 +88,53 @@ def get_data_classif(dataset,n,nz=.5,theta=0,**kwargs): labels of the samples """ - if dataset.lower()=='3gauss': - y=np.floor((np.arange(n)*1.0/n*3))+1 - x=np.zeros((n,2)) + if dataset.lower() == '3gauss': + y = np.floor((np.arange(n) * 1.0 / n * 3)) + 1 + x = np.zeros((n, 2)) # class 1 - x[y==1,0]=-1.; x[y==1,1]=-1. - x[y==2,0]=-1.; x[y==2,1]=1. - x[y==3,0]=1. ; x[y==3,1]=0 - - x[y!=3,:]+=1.5*nz*np.random.randn(sum(y!=3),2) - x[y==3,:]+=2*nz*np.random.randn(sum(y==3),2) - - elif dataset.lower()=='3gauss2': - y=np.floor((np.arange(n)*1.0/n*3))+1 - x=np.zeros((n,2)) - y[y==4]=3 + x[y == 1, 0] = -1. + x[y == 1, 1] = -1. + x[y == 2, 0] = -1. + x[y == 2, 1] = 1. + x[y == 3, 0] = 1. + x[y == 3, 1] = 0 + + x[y != 3, :] += 1.5 * nz * np.random.randn(sum(y != 3), 2) + x[y == 3, :] += 2 * nz * np.random.randn(sum(y == 3), 2) + + elif dataset.lower() == '3gauss2': + y = np.floor((np.arange(n) * 1.0 / n * 3)) + 1 + x = np.zeros((n, 2)) + y[y == 4] = 3 # class 1 - x[y==1,0]=-2.; x[y==1,1]=-2. - x[y==2,0]=-2.; x[y==2,1]=2. - x[y==3,0]=2. ; x[y==3,1]=0 - - x[y!=3,:]+=nz*np.random.randn(sum(y!=3),2) - x[y==3,:]+=2*nz*np.random.randn(sum(y==3),2) - - elif dataset.lower()=='gaussrot' : - rot=np.array([[np.cos(theta),np.sin(theta)],[-np.sin(theta),np.cos(theta)]]) - m1=np.array([-1,1]) - m2=np.array([1,-1]) - y=np.floor((np.arange(n)*1.0/n*2))+1 - n1=np.sum(y==1) - n2=np.sum(y==2) - x=np.zeros((n,2)) - - x[y==1,:]=get_2D_samples_gauss(n1,m1,nz) - x[y==2,:]=get_2D_samples_gauss(n2,m2,nz) - - x=x.dot(rot) - - + x[y == 1, 0] = -2. + x[y == 1, 1] = -2. + x[y == 2, 0] = -2. + x[y == 2, 1] = 2. + x[y == 3, 0] = 2. + x[y == 3, 1] = 0 + + x[y != 3, :] += nz * np.random.randn(sum(y != 3), 2) + x[y == 3, :] += 2 * nz * np.random.randn(sum(y == 3), 2) + + elif dataset.lower() == 'gaussrot': + rot = np.array( + [[np.cos(theta), np.sin(theta)], [-np.sin(theta), np.cos(theta)]]) + m1 = np.array([-1, 1]) + m2 = np.array([1, -1]) + y = np.floor((np.arange(n) * 1.0 / n * 2)) + 1 + n1 = np.sum(y == 1) + n2 = np.sum(y == 2) + x = np.zeros((n, 2)) + + x[y == 1, :] = get_2D_samples_gauss(n1, m1, nz) + x[y == 2, :] = get_2D_samples_gauss(n2, m2, nz) + + x = x.dot(rot) else: - x=np.array(0) - y=np.array(0) + x = np.array(0) + y = np.array(0) print("unknown dataset") - return x,y.astype(int)
\ No newline at end of file + return x, y.astype(int) @@ -3,43 +3,50 @@ Dimension reduction with optimal transport """ +# Author: Remi Flamary <remi.flamary@unice.fr> +# +# License: MIT License + +from scipy import linalg import autograd.numpy as np from pymanopt.manifolds import Stiefel from pymanopt import Problem from pymanopt.solvers import SteepestDescent, TrustRegions -import scipy.linalg as la -def dist(x1,x2): + +def dist(x1, x2): """ Compute squared euclidean distance between samples (autograd) """ - x1p2=np.sum(np.square(x1),1) - x2p2=np.sum(np.square(x2),1) - return x1p2.reshape((-1,1))+x2p2.reshape((1,-1))-2*np.dot(x1,x2.T) + x1p2 = np.sum(np.square(x1), 1) + x2p2 = np.sum(np.square(x2), 1) + return x1p2.reshape((-1, 1)) + x2p2.reshape((1, -1)) - 2 * np.dot(x1, x2.T) + -def sinkhorn(w1,w2,M,reg,k): +def sinkhorn(w1, w2, M, reg, k): """Sinkhorn algorithm with fixed number of iteration (autograd) """ - K=np.exp(-M/reg) - ui=np.ones((M.shape[0],)) - vi=np.ones((M.shape[1],)) + K = np.exp(-M / reg) + ui = np.ones((M.shape[0],)) + vi = np.ones((M.shape[1],)) for i in range(k): - vi=w2/(np.dot(K.T,ui)) - ui=w1/(np.dot(K,vi)) - G=ui.reshape((M.shape[0],1))*K*vi.reshape((1,M.shape[1])) + vi = w2 / (np.dot(K.T, ui)) + ui = w1 / (np.dot(K, vi)) + G = ui.reshape((M.shape[0], 1)) * K * vi.reshape((1, M.shape[1])) return G -def split_classes(X,y): + +def split_classes(X, y): """split samples in X by classes in y """ - lstsclass=np.unique(y) - return [X[y==i,:].astype(np.float32) for i in lstsclass] + lstsclass = np.unique(y) + return [X[y == i, :].astype(np.float32) for i in lstsclass] + + +def fda(X, y, p=2, reg=1e-16): + """ + Fisher Discriminant Analysis -def fda(X,y,p=2,reg=1e-16): - """ - Fisher Discriminant Analysis - - Parameters ---------- X : numpy.ndarray (n,d) @@ -59,62 +66,62 @@ def fda(X,y,p=2,reg=1e-16): proj : fun projection function including mean centering - - """ - - mx=np.mean(X) - X-=mx.reshape((1,-1)) - + + """ + + mx = np.mean(X) + X -= mx.reshape((1, -1)) + # data split between classes - d=X.shape[1] - xc=split_classes(X,y) - nc=len(xc) - - p=min(nc-1,p) - - Cw=0 + d = X.shape[1] + xc = split_classes(X, y) + nc = len(xc) + + p = min(nc - 1, p) + + Cw = 0 for x in xc: - Cw+=np.cov(x,rowvar=False) - Cw/=nc - - mxc=np.zeros((d,nc)) - + Cw += np.cov(x, rowvar=False) + Cw /= nc + + mxc = np.zeros((d, nc)) + for i in range(nc): - mxc[:,i]=np.mean(xc[i]) - - mx0=np.mean(mxc,1) - Cb=0 + mxc[:, i] = np.mean(xc[i]) + + mx0 = np.mean(mxc, 1) + Cb = 0 for i in range(nc): - Cb+=(mxc[:,i]-mx0).reshape((-1,1))*(mxc[:,i]-mx0).reshape((1,-1)) - - w,V=la.eig(Cb,Cw+reg*np.eye(d)) - - idx=np.argsort(w.real) - - Popt=V[:,idx[-p:]] - - - + Cb += (mxc[:, i] - mx0).reshape((-1, 1)) * \ + (mxc[:, i] - mx0).reshape((1, -1)) + + w, V = linalg.eig(Cb, Cw + reg * np.eye(d)) + + idx = np.argsort(w.real) + + Popt = V[:, idx[-p:]] + def proj(X): - return (X-mx.reshape((1,-1))).dot(Popt) - + return (X - mx.reshape((1, -1))).dot(Popt) + return Popt, proj -def wda(X,y,p=2,reg=1,k=10,solver = None,maxiter=100,verbose=0,P0=None): - """ + +def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None): + """ Wasserstein Discriminant Analysis [11]_ - + The function solves the following optimization problem: .. math:: P = \\text{arg}\min_P \\frac{\\sum_i W(PX^i,PX^i)}{\\sum_{i,j\\neq i} W(PX^i,PX^j)} where : - + - :math:`P` is a linear projection operator in the Stiefel(p,d) manifold - :math:`W` is entropic regularized Wasserstein distances - - :math:`X^i` are samples in the dataset corresponding to class i - + - :math:`X^i` are samples in the dataset corresponding to class i + Parameters ---------- X : numpy.ndarray (n,d) @@ -147,54 +154,50 @@ def wda(X,y,p=2,reg=1,k=10,solver = None,maxiter=100,verbose=0,P0=None): ---------- .. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016). Wasserstein Discriminant Analysis. arXiv preprint arXiv:1608.08063. - - """ - - mx=np.mean(X) - X-=mx.reshape((1,-1)) - + + """ # noqa + + mx = np.mean(X) + X -= mx.reshape((1, -1)) + # data split between classes - d=X.shape[1] - xc=split_classes(X,y) + d = X.shape[1] + xc = split_classes(X, y) # compute uniform weighs - wc=[np.ones((x.shape[0]),dtype=np.float32)/x.shape[0] for x in xc] - + wc = [np.ones((x.shape[0]), dtype=np.float32) / x.shape[0] for x in xc] + def cost(P): # wda loss - loss_b=0 - loss_w=0 - - for i,xi in enumerate(xc): - xi=np.dot(xi,P) - for j,xj in enumerate(xc[i:]): - xj=np.dot(xj,P) - M=dist(xi,xj) - G=sinkhorn(wc[i],wc[j+i],M,reg,k) - if j==0: - loss_w+=np.sum(G*M) + loss_b = 0 + loss_w = 0 + + for i, xi in enumerate(xc): + xi = np.dot(xi, P) + for j, xj in enumerate(xc[i:]): + xj = np.dot(xj, P) + M = dist(xi, xj) + G = sinkhorn(wc[i], wc[j + i], M, reg, k) + if j == 0: + loss_w += np.sum(G * M) else: - loss_b+=np.sum(G*M) - - # loss inversed because minimization - return loss_w/loss_b - - + loss_b += np.sum(G * M) + + # loss inversed because minimization + return loss_w / loss_b + # declare manifold and problem - manifold = Stiefel(d, p) + manifold = Stiefel(d, p) problem = Problem(manifold=manifold, cost=cost) - + # declare solver and solve if solver is None: - solver= SteepestDescent(maxiter=maxiter,logverbosity=verbose) - elif solver in ['tr','TrustRegions']: - solver= TrustRegions(maxiter=maxiter,logverbosity=verbose) - - Popt = solver.solve(problem,x=P0) - - def proj(X): - return (X-mx.reshape((1,-1))).dot(Popt) - - return Popt, proj + solver = SteepestDescent(maxiter=maxiter, logverbosity=verbose) + elif solver in ['tr', 'TrustRegions']: + solver = TrustRegions(maxiter=maxiter, logverbosity=verbose) + Popt = solver.solve(problem, x=P0) + def proj(X): + return (X - mx.reshape((1, -1))).dot(Popt) + return Popt, proj diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py index 40b11c0..c8f9433 100644 --- a/ot/gpu/__init__.py +++ b/ot/gpu/__init__.py @@ -4,4 +4,9 @@ from . import bregman from . import da from .bregman import sinkhorn +# Author: Remi Flamary <remi.flamary@unice.fr> +# Leo Gautheron <https://github.com/aje> +# +# License: MIT License + __all__ = ["bregman", "da", "sinkhorn"] diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py index 2c3e317..47939c4 100644 --- a/ot/gpu/bregman.py +++ b/ot/gpu/bregman.py @@ -3,13 +3,18 @@ Bregman projections for regularized OT with GPU """ +# Author: Remi Flamary <remi.flamary@unice.fr> +# Leo Gautheron <https://github.com/aje> +# +# License: MIT License + import numpy as np import cudamat def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, log=False, returnAsGPU=False): - """ + r""" Solve the entropic regularization optimal transport problem on GPU The function solves the following optimization problem: @@ -82,7 +87,7 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, ot.lp.emd : Unregularized OT ot.optim.cg : General regularized OT - """ + """ # init data Nini = len(a) Nfin = len(b) @@ -92,11 +97,11 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, # we assume that no distances are null except those of the diagonal of # distances - u = (np.ones(Nini)/Nini).reshape((Nini, 1)) + u = (np.ones(Nini) / Nini).reshape((Nini, 1)) u_GPU = cudamat.CUDAMatrix(u) a_GPU = cudamat.CUDAMatrix(a.reshape((Nini, 1))) ones_GPU = cudamat.empty(u_GPU.shape).assign(1) - v = (np.ones(Nfin)/Nfin).reshape((Nfin, 1)) + v = (np.ones(Nfin) / Nfin).reshape((Nfin, 1)) v_GPU = cudamat.CUDAMatrix(v) b_GPU = cudamat.CUDAMatrix(b.reshape((Nfin, 1))) @@ -121,7 +126,7 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, ones_GPU.divide(Kp_GPU.dot(v_GPU), target=u_GPU) if (np.any(KtransposeU_GPU.asarray() == 0) or - not u_GPU.allfinite() or not v_GPU.allfinite()): + not u_GPU.allfinite() or not v_GPU.allfinite()): # we have reached the machine precision # come back to previous solution and quit loop print('Warning: numerical errors at iteration', cpt) @@ -142,7 +147,8 @@ def sinkhorn(a, b, M_GPU, reg, numItermax=1000, stopThr=1e-9, verbose=False, if verbose: if cpt % 200 == 0: - print('{:5s}|{:12s}'.format('It.', 'Err')+'\n'+'-'*19) + print( + '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) print('{:5d}|{:8e}|'.format(cpt, err)) cpt += 1 if log: diff --git a/ot/gpu/da.py b/ot/gpu/da.py index b05ff70..05c580f 100644 --- a/ot/gpu/da.py +++ b/ot/gpu/da.py @@ -3,6 +3,14 @@ Domain adaptation with optimal transport with GPU implementation """ +# Author: Remi Flamary <remi.flamary@unice.fr> +# Nicolas Courty <ncourty@irisa.fr> +# Michael Perrot <michael.perrot@univ-st-etienne.fr> +# Leo Gautheron <https://github.com/aje> +# +# License: MIT License + + import numpy as np from ..utils import unif from ..da import OTDA @@ -167,7 +175,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M_GPU, reg, eta=0.1, numItermax=10, tmpC_GPU = cudamat.empty((Nfin, nbRow)).assign(0) transp_GPU.transpose().select_columns(indices_labels[i], tmpC_GPU) majs_GPU = tmpC_GPU.sum(axis=1).add(epsilon) - cudamat.pow(majs_GPU, (p-1)) + cudamat.pow(majs_GPU, (p - 1)) majs_GPU.mult(p) tmpC_GPU.assign(0) diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h index fb7feca..15e9115 100644 --- a/ot/lp/EMD.h +++ b/ot/lp/EMD.h @@ -23,8 +23,12 @@ using namespace lemon; typedef unsigned int node_id_type; +enum ProblemType { + INFEASIBLE, + OPTIMAL, + UNBOUNDED +}; -void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, - double* alpha, double* beta, double *cost, int max_iter); +int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int max_iter); #endif diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index 0977e75..8ac43c7 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -15,10 +15,11 @@ #include "EMD.h" -void EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, +int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, double* alpha, double* beta, double *cost, int max_iter) { // beware M and C anre strored in row major C style!!! - int n, m, i,cur; + int n, m, i, cur; + double max; typedef FullBipartiteDigraph Digraph; DIGRAPH_TYPEDEFS(FullBipartiteDigraph); @@ -44,7 +45,7 @@ void EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, std::vector<int> indI(n), indJ(m); std::vector<double> weights1(n), weights2(m); Digraph di(n, m); - NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m,max_iter); + NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, max_iter); // Set supply and demand, don't account for 0 values (faster) @@ -108,5 +109,5 @@ void EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, }; - + return ret; } diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 915a18c..a14d4e4 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -3,6 +3,10 @@ Solvers for the original linear program OT problem """ +# Author: Remi Flamary <remi.flamary@unice.fr> +# +# License: MIT License + import numpy as np # import compiled emd from .emd_wrap import emd_c, emd2_c @@ -10,8 +14,7 @@ from ..utils import parmap import multiprocessing - -def emd(a, b, M, dual_variables=False, max_iter=-1): +def emd(a, b, M, numItermax=100000, dual_variables=False): """Solves the Earth Movers distance problem and returns the OT matrix @@ -36,6 +39,9 @@ def emd(a, b, M, dual_variables=False, max_iter=-1): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix + numItermax : int, optional (default=100000) + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. Returns ------- @@ -48,7 +54,7 @@ def emd(a, b, M, dual_variables=False, max_iter=-1): Simple example with obvious solution. The function emd accepts lists and perform automatic conversion to numpy arrays - + >>> import ot >>> a=[.5,.5] >>> b=[.5,.5] @@ -80,13 +86,13 @@ def emd(a, b, M, dual_variables=False, max_iter=-1): if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - G, alpha, beta = emd_c(a, b, M, max_iter) + G, alpha, beta = emd_c(a, b, M, numItermax) if dual_variables: return G, alpha, beta return G -def emd2(a, b, M,processes=multiprocessing.cpu_count(), max_iter=-1): - """Solves the Earth Movers distance problem and returns the loss +def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000): + """Solves the Earth Movers distance problem and returns the loss .. math:: \gamma = arg\min_\gamma <\gamma,M>_F @@ -109,6 +115,9 @@ def emd2(a, b, M,processes=multiprocessing.cpu_count(), max_iter=-1): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix + numItermax : int, optional (default=100000) + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. Returns ------- @@ -121,15 +130,15 @@ def emd2(a, b, M,processes=multiprocessing.cpu_count(), max_iter=-1): Simple example with obvious solution. The function emd accepts lists and perform automatic conversion to numpy arrays - - + + >>> import ot >>> a=[.5,.5] >>> b=[.5,.5] >>> M=[[0.,1.],[1.,0.]] >>> ot.emd2(a,b,M) 0.0 - + References ---------- @@ -152,16 +161,13 @@ def emd2(a, b, M,processes=multiprocessing.cpu_count(), max_iter=-1): a = np.ones((M.shape[0], ), dtype=np.float64)/M.shape[0] if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - + if len(b.shape)==1: - return emd2_c(a, b, M, max_iter)[0] - else: - nb=b.shape[1] - #res=[emd2_c(a,b[:,i].copy(),M) for i in range(nb)] - def f(b): - return emd2_c(a,b,M, max_iter)[0] - res= parmap(f, [b[:,i] for i in range(nb)],processes) - return np.array(res) - - -
\ No newline at end of file + return emd2_c(a, b, M, numItermax)[0] + nb = b.shape[1] + # res = [emd2_c(a, b[:, i].copy(), M, numItermax) for i in range(nb)] + + def f(b): + return emd2_c(a,b,M, max_iter)[0] + res= parmap(f, [b[:,i] for i in range(nb)],processes) + return np.array(res) diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 4febb32..7056e0e 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -1,9 +1,12 @@ # -*- coding: utf-8 -*- """ -Created on Thu Sep 11 08:42:08 2014 - -@author: rflamary +Cython linker with C solver """ + +# Author: Remi Flamary <remi.flamary@unice.fr> +# +# License: MIT License + import numpy as np cimport numpy as np @@ -12,47 +15,50 @@ cimport cython cdef extern from "EMD.h": - void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, - double* alpha, double* beta, int max_iter) + int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int max_iter) + cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED @cython.boundscheck(False) @cython.wraparound(False) -def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int maxiter): +def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int max_iter): """ Solves the Earth Movers distance problem and returns the optimal transport matrix - + gamm=emd(a,b,M) - + .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F - + \gamma = arg\min_\gamma <\gamma,M>_F + s.t. \gamma 1 = a - - \gamma^T 1= b - + + \gamma^T 1= b + \gamma\geq 0 where : - + - M is the metric cost matrix - a and b are the sample weights - + Parameters ---------- a : (ns,) ndarray, float64 - source histogram + source histogram b : (nt,) ndarray, float64 target histogram M : (ns,nt) ndarray, float64 - loss matrix - - + loss matrix + max_iter : int + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. + + Returns ------- gamma: (ns x nt) ndarray Optimal transportation matrix for the given parameters - + """ cdef int n1= M.shape[0] cdef int n2= M.shape[1] @@ -61,7 +67,8 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2]) cdef np.ndarray[double, ndim=1, mode="c"] alpha=np.zeros(n1) cdef np.ndarray[double, ndim=1, mode="c"] beta=np.zeros(n2) - + + if not len(a): a=np.ones((n1,))/n1 @@ -69,47 +76,54 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod b=np.ones((n2,))/n2 # calling the function - EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data, - <double*> alpha.data, <double*> beta.data, <double*> &cost, maxiter) + cdef int resultSolver = EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter) + if resultSolver != OPTIMAL: + if resultSolver == INFEASIBLE: + print("Problem infeasible. Try to increase numItermax.") + elif resultSolver == UNBOUNDED: + print("Problem unbounded") return G, alpha, beta @cython.boundscheck(False) @cython.wraparound(False) -def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int maxiter): +def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int max_iter): """ Solves the Earth Movers distance problem and returns the optimal transport loss - + gamm=emd(a,b,M) - + .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F - + \gamma = arg\min_\gamma <\gamma,M>_F + s.t. \gamma 1 = a - - \gamma^T 1= b - + + \gamma^T 1= b + \gamma\geq 0 where : - + - M is the metric cost matrix - a and b are the sample weights - + Parameters ---------- a : (ns,) ndarray, float64 - source histogram + source histogram b : (nt,) ndarray, float64 target histogram M : (ns,nt) ndarray, float64 - loss matrix - - + loss matrix + max_iter : int + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. + + Returns ------- gamma: (ns x nt) ndarray Optimal transportation matrix for the given parameters - + """ cdef int n1= M.shape[0] cdef int n2= M.shape[1] @@ -119,16 +133,19 @@ def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mo cdef np.ndarray[double, ndim = 1, mode = "c"] alpha = np.zeros([n1]) cdef np.ndarray[double, ndim = 1, mode = "c"] beta = np.zeros([n2]) - + if not len(a): a=np.ones((n1,))/n1 if not len(b): b=np.ones((n2,))/n2 # calling the function - EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data, - <double*> alpha.data, <double*> beta.data, <double*> &cost, maxiter) - + cdef int resultSolver = EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter) + if resultSolver != OPTIMAL: + if resultSolver == INFEASIBLE: + print("Problem infeasible. Try to inscrease numItermax.") + elif resultSolver == UNBOUNDED: + print("Problem unbounded") return cost, alpha, beta diff --git a/ot/optim.py b/ot/optim.py index 79f4f66..1d09adc 100644 --- a/ot/optim.py +++ b/ot/optim.py @@ -3,13 +3,19 @@ Optimization algorithms for OT """ +# Author: Remi Flamary <remi.flamary@unice.fr> +# +# License: MIT License + import numpy as np from scipy.optimize.linesearch import scalar_search_armijo from .lp import emd from .bregman import sinkhorn # The corresponding scipy function does not work for matrices -def line_search_armijo(f,xk,pk,gfk,old_fval,args=(),c1=1e-4,alpha0=0.99): + + +def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=0.99): """ Armijo linesearch function that works with matrices @@ -51,20 +57,21 @@ def line_search_armijo(f,xk,pk,gfk,old_fval,args=(),c1=1e-4,alpha0=0.99): def phi(alpha1): fc[0] += 1 - return f(xk + alpha1*pk, *args) + return f(xk + alpha1 * pk, *args) if old_fval is None: phi0 = phi(0.) else: phi0 = old_fval - derphi0 = np.sum(pk*gfk) # Quickfix for matrices - alpha,phi1 = scalar_search_armijo(phi,phi0,derphi0,c1=c1,alpha0=alpha0) + derphi0 = np.sum(pk * gfk) # Quickfix for matrices + alpha, phi1 = scalar_search_armijo( + phi, phi0, derphi0, c1=c1, alpha0=alpha0) - return alpha,fc[0],phi1 + return alpha, fc[0], phi1 -def cg(a,b,M,reg,f,df,G0=None,numItermax = 200,stopThr=1e-9,verbose=False,log=False): +def cg(a, b, M, reg, f, df, G0=None, numItermax=200, stopThr=1e-9, verbose=False, log=False): """ Solve the general regularized OT problem with conditional gradient @@ -128,74 +135,74 @@ def cg(a,b,M,reg,f,df,G0=None,numItermax = 200,stopThr=1e-9,verbose=False,log=Fa """ - loop=1 + loop = 1 if log: - log={'loss':[]} + log = {'loss': []} if G0 is None: - G=np.outer(a,b) + G = np.outer(a, b) else: - G=G0 + G = G0 def cost(G): - return np.sum(M*G)+reg*f(G) + return np.sum(M * G) + reg * f(G) - f_val=cost(G) + f_val = cost(G) if log: log['loss'].append(f_val) - it=0 + it = 0 if verbose: - print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32) - print('{:5d}|{:8e}|{:8e}'.format(it,f_val,0)) + print('{:5s}|{:12s}|{:8s}'.format( + 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32) + print('{:5d}|{:8e}|{:8e}'.format(it, f_val, 0)) while loop: - it+=1 - old_fval=f_val - + it += 1 + old_fval = f_val # problem linearization - Mi=M+reg*df(G) + Mi = M + reg * df(G) # set M positive - Mi+=Mi.min() + Mi += Mi.min() # solve linear program - Gc=emd(a,b,Mi) + Gc = emd(a, b, Mi) - deltaG=Gc-G + deltaG = Gc - G # line search - alpha,fc,f_val = line_search_armijo(cost,G,deltaG,Mi,f_val) + alpha, fc, f_val = line_search_armijo(cost, G, deltaG, Mi, f_val) - G=G+alpha*deltaG + G = G + alpha * deltaG # test convergence - if it>=numItermax: - loop=0 - - delta_fval=(f_val-old_fval)/abs(f_val) - if abs(delta_fval)<stopThr: - loop=0 + if it >= numItermax: + loop = 0 + delta_fval = (f_val - old_fval) / abs(f_val) + if abs(delta_fval) < stopThr: + loop = 0 if log: log['loss'].append(f_val) if verbose: - if it%20 ==0: - print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32) - print('{:5d}|{:8e}|{:8e}'.format(it,f_val,delta_fval)) - + if it % 20 == 0: + print('{:5s}|{:12s}|{:8s}'.format( + 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32) + print('{:5d}|{:8e}|{:8e}'.format(it, f_val, delta_fval)) if log: - return G,log + return G, log else: return G -def gcg(a,b,M,reg1,reg2,f,df,G0=None,numItermax = 10,numInnerItermax = 200,stopThr=1e-9,verbose=False,log=False): + +def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10, numInnerItermax=200, stopThr=1e-9, verbose=False, log=False): """ Solve the general regularized OT problem with the generalized conditional gradient @@ -264,70 +271,68 @@ def gcg(a,b,M,reg1,reg2,f,df,G0=None,numItermax = 10,numInnerItermax = 200,stopT """ - loop=1 + loop = 1 if log: - log={'loss':[]} + log = {'loss': []} if G0 is None: - G=np.outer(a,b) + G = np.outer(a, b) else: - G=G0 + G = G0 def cost(G): - return np.sum(M*G)+ reg1*np.sum(G*np.log(G)) + reg2*f(G) + return np.sum(M * G) + reg1 * np.sum(G * np.log(G)) + reg2 * f(G) - f_val=cost(G) + f_val = cost(G) if log: log['loss'].append(f_val) - it=0 + it = 0 if verbose: - print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32) - print('{:5d}|{:8e}|{:8e}'.format(it,f_val,0)) + print('{:5s}|{:12s}|{:8s}'.format( + 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32) + print('{:5d}|{:8e}|{:8e}'.format(it, f_val, 0)) while loop: - it+=1 - old_fval=f_val - + it += 1 + old_fval = f_val # problem linearization - Mi=M+reg2*df(G) + Mi = M + reg2 * df(G) # solve linear program with Sinkhorn - #Gc = sinkhorn_stabilized(a,b, Mi, reg1, numItermax = numInnerItermax) - Gc = sinkhorn(a,b, Mi, reg1, numItermax = numInnerItermax) + # Gc = sinkhorn_stabilized(a,b, Mi, reg1, numItermax = numInnerItermax) + Gc = sinkhorn(a, b, Mi, reg1, numItermax=numInnerItermax) - deltaG=Gc-G + deltaG = Gc - G # line search - dcost=Mi+reg1*(1+np.log(G)) #?? - alpha,fc,f_val = line_search_armijo(cost,G,deltaG,dcost,f_val) + dcost = Mi + reg1 * (1 + np.log(G)) # ?? + alpha, fc, f_val = line_search_armijo(cost, G, deltaG, dcost, f_val) - G=G+alpha*deltaG + G = G + alpha * deltaG # test convergence - if it>=numItermax: - loop=0 - - delta_fval=(f_val-old_fval)/abs(f_val) - if abs(delta_fval)<stopThr: - loop=0 + if it >= numItermax: + loop = 0 + delta_fval = (f_val - old_fval) / abs(f_val) + if abs(delta_fval) < stopThr: + loop = 0 if log: log['loss'].append(f_val) if verbose: - if it%20 ==0: - print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32) - print('{:5d}|{:8e}|{:8e}'.format(it,f_val,delta_fval)) - + if it % 20 == 0: + print('{:5s}|{:12s}|{:8s}'.format( + 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32) + print('{:5d}|{:8e}|{:8e}'.format(it, f_val, delta_fval)) if log: - return G,log + return G, log else: return G - @@ -2,91 +2,84 @@ Functions for plotting OT matrices """ +# Author: Remi Flamary <remi.flamary@unice.fr> +# +# License: MIT License import numpy as np import matplotlib.pylab as pl from matplotlib import gridspec -def plot1D_mat(a,b,M,title=''): - """ Plot matrix M with the source and target 1D distribution - - Creates a subplot with the source distribution a on the left and +def plot1D_mat(a, b, M, title=''): + """ Plot matrix M with the source and target 1D distribution + + Creates a subplot with the source distribution a on the left and target distribution b on the tot. The matrix M is shown in between. - - + + Parameters ---------- - - a : np.array (na,) + a : np.array, shape (na,) Source distribution - b : np.array (nb,) - Target distribution - M : np.array (na,nb) + b : np.array, shape (nb,) + Target distribution + M : np.array, shape (na,nb) Matrix to plot - - - """ - - na=M.shape[0] - nb=M.shape[1] - + na, nb = M.shape + gs = gridspec.GridSpec(3, 3) - - - xa=np.arange(na) - xb=np.arange(nb) - - - ax1=pl.subplot(gs[0,1:]) - pl.plot(xb,b,'r',label='Target distribution') + + xa = np.arange(na) + xb = np.arange(nb) + + ax1 = pl.subplot(gs[0, 1:]) + pl.plot(xb, b, 'r', label='Target distribution') pl.yticks(()) pl.title(title) - - #pl.axis('off') - - ax2=pl.subplot(gs[1:,0]) - pl.plot(a,xa,'b',label='Source distribution') + + ax2 = pl.subplot(gs[1:, 0]) + pl.plot(a, xa, 'b', label='Source distribution') pl.gca().invert_xaxis() pl.gca().invert_yaxis() pl.xticks(()) - #pl.ylim((0,n)) - #pl.axis('off') - - pl.subplot(gs[1:,1:],sharex=ax1,sharey=ax2) - pl.imshow(M,interpolation='nearest') - - pl.xlim((0,nb)) + pl.subplot(gs[1:, 1:], sharex=ax1, sharey=ax2) + pl.imshow(M, interpolation='nearest') + pl.axis('off') + + pl.xlim((0, nb)) + pl.tight_layout() + pl.subplots_adjust(wspace=0., hspace=0.2) -def plot2D_samples_mat(xs,xt,G,thr=1e-8,**kwargs): + +def plot2D_samples_mat(xs, xt, G, thr=1e-8, **kwargs): """ Plot matrix M in 2D with lines using alpha values - - Plot lines between source and target 2D samples with a color + + Plot lines between source and target 2D samples with a color proportional to the value of the matrix G between samples. - - + + Parameters ---------- - - xs : np.array (ns,2) + xs : ndarray, shape (ns,2) Source samples positions - b : np.array (nt,2) + b : ndarray, shape (nt,2) Target samples positions - G : np.array (na,nb) + G : ndarray, shape (na,nb) OT matrix thr : float, optional threshold above which the line is drawn **kwargs : dict - paameters given to the plot functions (default color is black if nothing given) - + paameters given to the plot functions (default color is black if + nothing given) """ - if ('color' not in kwargs) and ('c' not in kwargs): - kwargs['color']='k' - mx=G.max() + if ('color' not in kwargs) and ('c' not in kwargs): + kwargs['color'] = 'k' + mx = G.max() for i in range(xs.shape[0]): for j in range(xt.shape[0]): - if G[i,j]/mx>thr: - pl.plot([xs[i,0],xt[j,0]],[xs[i,1],xt[j,1]],alpha=G[i,j]/mx,**kwargs) -
\ No newline at end of file + if G[i, j] / mx > thr: + pl.plot([xs[i, 0], xt[j, 0]], [xs[i, 1], xt[j, 1]], + alpha=G[i, j] / mx, **kwargs) diff --git a/ot/utils.py b/ot/utils.py index 7ad7637..31a002b 100644 --- a/ot/utils.py +++ b/ot/utils.py @@ -2,36 +2,50 @@ """ Various function that can be usefull """ + +# Author: Remi Flamary <remi.flamary@unice.fr> +# +# License: MIT License + +import multiprocessing +from functools import reduce +import time + import numpy as np from scipy.spatial.distance import cdist -import multiprocessing +import sys +import warnings + + +__time_tic_toc = time.time() -import time -__time_tic_toc=time.time() def tic(): """ Python implementation of Matlab tic() function """ global __time_tic_toc - __time_tic_toc=time.time() + __time_tic_toc = time.time() + def toc(message='Elapsed time : {} s'): """ Python implementation of Matlab toc() function """ - t=time.time() - print(message.format(t-__time_tic_toc)) - return t-__time_tic_toc + t = time.time() + print(message.format(t - __time_tic_toc)) + return t - __time_tic_toc + def toq(): """ Python implementation of Julia toc() function """ - t=time.time() - return t-__time_tic_toc + t = time.time() + return t - __time_tic_toc -def kernel(x1,x2,method='gaussian',sigma=1,**kwargs): +def kernel(x1, x2, method='gaussian', sigma=1, **kwargs): """Compute kernel matrix""" - if method.lower() in ['gaussian','gauss','rbf']: - K=np.exp(-dist(x1,x2)/(2*sigma**2)) + if method.lower() in ['gaussian', 'gauss', 'rbf']: + K = np.exp(-dist(x1, x2) / (2 * sigma**2)) return K + def unif(n): """ return a uniform histogram of length n (simplex) @@ -48,17 +62,19 @@ def unif(n): """ - return np.ones((n,))/n + return np.ones((n,)) / n + -def clean_zeros(a,b,M): - """ Remove all components with zeros weights in a and b +def clean_zeros(a, b, M): + """ Remove all components with zeros weights in a and b """ - M2=M[a>0,:][:,b>0].copy() # copy force c style matrix (froemd) - a2=a[a>0] - b2=b[b>0] - return a2,b2,M2 + M2 = M[a > 0, :][:, b > 0].copy() # copy force c style matrix (froemd) + a2 = a[a > 0] + b2 = b[b > 0] + return a2, b2, M2 -def dist(x1,x2=None,metric='sqeuclidean'): + +def dist(x1, x2=None, metric='sqeuclidean'): """Compute distance between samples in x1 and x2 using function scipy.spatial.distance.cdist Parameters @@ -84,12 +100,12 @@ def dist(x1,x2=None,metric='sqeuclidean'): """ if x2 is None: - x2=x1 + x2 = x1 - return cdist(x1,x2,metric=metric) + return cdist(x1, x2, metric=metric) -def dist0(n,method='lin_square'): +def dist0(n, method='lin_square'): """Compute standard cost matrices of size (n,n) for OT problems Parameters @@ -111,16 +127,50 @@ def dist0(n,method='lin_square'): """ - res=0 - if method=='lin_square': - x=np.arange(n,dtype=np.float64).reshape((n,1)) - res=dist(x,x) + res = 0 + if method == 'lin_square': + x = np.arange(n, dtype=np.float64).reshape((n, 1)) + res = dist(x, x) return res +def cost_normalization(C, norm=None): + """ Apply normalization to the loss matrix + + + Parameters + ---------- + C : np.array (n1, n2) + The cost matrix to normalize. + norm : str + type of normalization from 'median','max','log','loglog'. Any other + value do not normalize. + + + Returns + ------- + + C : np.array (n1, n2) + The input cost matrix normalized according to given norm. + + """ + + if norm == "median": + C /= float(np.median(C)) + elif norm == "max": + C /= float(np.max(C)) + elif norm == "log": + C = np.log(1 + C) + elif norm == "loglog": + C = np.log(1 + np.log(1 + C)) + + return C + + def dots(*args): """ dots function for multiple matrix multiply """ - return reduce(np.dot,args) + return reduce(np.dot, args) + def fun(f, q_in, q_out): """ Utility function for parmap with no serializing problems """ @@ -130,6 +180,7 @@ def fun(f, q_in, q_out): break q_out.put((i, f(x))) + def parmap(f, X, nprocs=multiprocessing.cpu_count()): """ paralell map for multiprocessing """ q_in = multiprocessing.Queue(1) @@ -147,4 +198,241 @@ def parmap(f, X, nprocs=multiprocessing.cpu_count()): [p.join() for p in proc] - return [x for i, x in sorted(res)]
\ No newline at end of file + return [x for i, x in sorted(res)] + + +def check_params(**kwargs): + """check_params: check whether some parameters are missing + """ + + missing_params = [] + check = True + + for param in kwargs: + if kwargs[param] is None: + missing_params.append(param) + + if len(missing_params) > 0: + print("POT - Warning: following necessary parameters are missing") + for p in missing_params: + print("\n", p) + + check = False + + return check + + +class deprecated(object): + """Decorator to mark a function or class as deprecated. + + deprecated class from scikit-learn package + https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/deprecation.py + Issue a warning when the function is called/the class is instantiated and + adds a warning to the docstring. + The optional extra argument will be appended to the deprecation message + and the docstring. Note: to use this with the default value for extra, put + in an empty of parentheses: + >>> from ot.deprecation import deprecated + >>> @deprecated() + ... def some_function(): pass + + Parameters + ---------- + extra : string + to be added to the deprecation messages + """ + + # Adapted from http://wiki.python.org/moin/PythonDecoratorLibrary, + # but with many changes. + + def __init__(self, extra=''): + self.extra = extra + + def __call__(self, obj): + """Call method + Parameters + ---------- + obj : object + """ + if isinstance(obj, type): + return self._decorate_class(obj) + else: + return self._decorate_fun(obj) + + def _decorate_class(self, cls): + msg = "Class %s is deprecated" % cls.__name__ + if self.extra: + msg += "; %s" % self.extra + + # FIXME: we should probably reset __new__ for full generality + init = cls.__init__ + + def wrapped(*args, **kwargs): + warnings.warn(msg, category=DeprecationWarning) + return init(*args, **kwargs) + + cls.__init__ = wrapped + + wrapped.__name__ = '__init__' + wrapped.__doc__ = self._update_doc(init.__doc__) + wrapped.deprecated_original = init + + return cls + + def _decorate_fun(self, fun): + """Decorate function fun""" + + msg = "Function %s is deprecated" % fun.__name__ + if self.extra: + msg += "; %s" % self.extra + + def wrapped(*args, **kwargs): + warnings.warn(msg, category=DeprecationWarning) + return fun(*args, **kwargs) + + wrapped.__name__ = fun.__name__ + wrapped.__dict__ = fun.__dict__ + wrapped.__doc__ = self._update_doc(fun.__doc__) + + return wrapped + + def _update_doc(self, olddoc): + newdoc = "DEPRECATED" + if self.extra: + newdoc = "%s: %s" % (newdoc, self.extra) + if olddoc: + newdoc = "%s\n\n%s" % (newdoc, olddoc) + return newdoc + + +def _is_deprecated(func): + """Helper to check if func is wraped by our deprecated decorator""" + if sys.version_info < (3, 5): + raise NotImplementedError("This is only available for python3.5 " + "or above") + closures = getattr(func, '__closure__', []) + if closures is None: + closures = [] + is_deprecated = ('deprecated' in ''.join([c.cell_contents + for c in closures + if isinstance(c.cell_contents, str)])) + return is_deprecated + + +class BaseEstimator(object): + """Base class for most objects in POT + adapted from sklearn BaseEstimator class + + Notes + ----- + All estimators should specify all the parameters that can be set + at the class level in their ``__init__`` as explicit keyword + arguments (no ``*args`` or ``**kwargs``). + """ + + @classmethod + def _get_param_names(cls): + """Get parameter names for the estimator""" + try: + from inspect import signature + except ImportError: + from .externals.funcsigs import signature + # fetch the constructor or the original constructor before + # deprecation wrapping if any + init = getattr(cls.__init__, 'deprecated_original', cls.__init__) + if init is object.__init__: + # No explicit constructor to introspect + return [] + + # introspect the constructor arguments to find the model parameters + # to represent + init_signature = signature(init) + # Consider the constructor parameters excluding 'self' + parameters = [p for p in init_signature.parameters.values() + if p.name != 'self' and p.kind != p.VAR_KEYWORD] + for p in parameters: + if p.kind == p.VAR_POSITIONAL: + raise RuntimeError("POT estimators should always " + "specify their parameters in the signature" + " of their __init__ (no varargs)." + " %s with constructor %s doesn't " + " follow this convention." + % (cls, init_signature)) + # Extract and sort argument names excluding 'self' + return sorted([p.name for p in parameters]) + + def get_params(self, deep=True): + """Get parameters for this estimator. + + Parameters + ---------- + deep : boolean, optional + If True, will return the parameters for this estimator and + contained subobjects that are estimators. + + Returns + ------- + params : mapping of string to any + Parameter names mapped to their values. + """ + out = dict() + for key in self._get_param_names(): + # We need deprecation warnings to always be on in order to + # catch deprecated param values. + # This is set in utils/__init__.py but it gets overwritten + # when running under python3 somehow. + warnings.simplefilter("always", DeprecationWarning) + try: + with warnings.catch_warnings(record=True) as w: + value = getattr(self, key, None) + if len(w) and w[0].category == DeprecationWarning: + # if the parameter is deprecated, don't show it + continue + finally: + warnings.filters.pop(0) + + # XXX: should we rather test if instance of estimator? + if deep and hasattr(value, 'get_params'): + deep_items = value.get_params().items() + out.update((key + '__' + k, val) for k, val in deep_items) + out[key] = value + return out + + def set_params(self, **params): + """Set the parameters of this estimator. + + The method works on simple estimators as well as on nested objects + (such as pipelines). The latter have parameters of the form + ``<component>__<parameter>`` so that it's possible to update each + component of a nested object. + + Returns + ------- + self + """ + if not params: + # Simple optimisation to gain speed (inspect is slow) + return self + valid_params = self.get_params(deep=True) + # for key, value in iteritems(params): + for key, value in params.items(): + split = key.split('__', 1) + if len(split) > 1: + # nested objects case + name, sub_name = split + if name not in valid_params: + raise ValueError('Invalid parameter %s for estimator %s. ' + 'Check the list of available parameters ' + 'with `estimator.get_params().keys()`.' % + (name, self)) + sub_object = valid_params[name] + sub_object.set_params(**{sub_name: value}) + else: + # simple objects case + if key not in valid_params: + raise ValueError('Invalid parameter %s for estimator %s. ' + 'Check the list of available parameters ' + 'with `estimator.get_params().keys()`.' % + (key, self.__class__.__name__)) + setattr(self, key, value) + return self |