summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexandre Gramfort <alexandre.gramfort@m4x.org>2017-07-12 23:52:57 +0200
committerAlexandre Gramfort <alexandre.gramfort@m4x.org>2017-07-20 14:08:02 +0200
commit37ca3142a4c8c808382f5eb1c23bf198c3e4610e (patch)
tree55c2b56581b8f0f607d8e0a50778267baa4960e3
parentda21b9888e77f7512727a4f50c60bd475e2c9606 (diff)
pep8
-rw-r--r--ot/bregman.py495
-rw-r--r--ot/da.py504
-rw-r--r--ot/datasets.py107
-rw-r--r--ot/dr.py197
-rw-r--r--ot/optim.py133
5 files changed, 735 insertions, 701 deletions
diff --git a/ot/bregman.py b/ot/bregman.py
index 0d68602..fe10880 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -5,7 +5,8 @@ Bregman projections for regularized OT
import numpy as np
-def sinkhorn(a,b, M, reg,method='sinkhorn', numItermax = 1000, stopThr=1e-9, verbose=False, log=False,**kwargs):
+
+def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs):
u"""
Solve the entropic regularization optimal transport problem and return the OT matrix
@@ -92,22 +93,28 @@ def sinkhorn(a,b, M, reg,method='sinkhorn', numItermax = 1000, stopThr=1e-9, ver
"""
- if method.lower()=='sinkhorn':
- sink= lambda: sinkhorn_knopp(a,b, M, reg,numItermax=numItermax,
- stopThr=stopThr, verbose=verbose, log=log,**kwargs)
- elif method.lower()=='sinkhorn_stabilized':
- sink= lambda: sinkhorn_stabilized(a,b, M, reg,numItermax=numItermax,
- stopThr=stopThr, verbose=verbose, log=log, **kwargs)
- elif method.lower()=='sinkhorn_epsilon_scaling':
- sink= lambda: sinkhorn_epsilon_scaling(a,b, M, reg,numItermax=numItermax,
- stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ if method.lower() == 'sinkhorn':
+ def sink():
+ return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ elif method.lower() == 'sinkhorn_stabilized':
+ def sink():
+ return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ elif method.lower() == 'sinkhorn_epsilon_scaling':
+ def sink():
+ return sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose, log=log, **kwargs)
else:
print('Warning : unknown method using classic Sinkhorn Knopp')
- sink= lambda: sinkhorn_knopp(a,b, M, reg, **kwargs)
+
+ def sink():
+ return sinkhorn_knopp(a, b, M, reg, **kwargs)
return sink()
-def sinkhorn2(a,b, M, reg,method='sinkhorn', numItermax = 1000, stopThr=1e-9, verbose=False, log=False,**kwargs):
+
+def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs):
u"""
Solve the entropic regularization optimal transport problem and return the loss
@@ -170,7 +177,7 @@ def sinkhorn2(a,b, M, reg,method='sinkhorn', numItermax = 1000, stopThr=1e-9, ve
>>> M=[[0.,1.],[1.,0.]]
>>> ot.sinkhorn2(a,b,M,1)
array([ 0.26894142])
-
+
References
@@ -194,27 +201,32 @@ def sinkhorn2(a,b, M, reg,method='sinkhorn', numItermax = 1000, stopThr=1e-9, ve
"""
- if method.lower()=='sinkhorn':
- sink= lambda: sinkhorn_knopp(a,b, M, reg,numItermax=numItermax,
- stopThr=stopThr, verbose=verbose, log=log,**kwargs)
- elif method.lower()=='sinkhorn_stabilized':
- sink= lambda: sinkhorn_stabilized(a,b, M, reg,numItermax=numItermax,
- stopThr=stopThr, verbose=verbose, log=log, **kwargs)
- elif method.lower()=='sinkhorn_epsilon_scaling':
- sink= lambda: sinkhorn_epsilon_scaling(a,b, M, reg,numItermax=numItermax,
- stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ if method.lower() == 'sinkhorn':
+ def sink():
+ return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ elif method.lower() == 'sinkhorn_stabilized':
+ def sink():
+ return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ elif method.lower() == 'sinkhorn_epsilon_scaling':
+ def sink():
+ return sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose, log=log, **kwargs)
else:
print('Warning : unknown method using classic Sinkhorn Knopp')
- sink= lambda: sinkhorn_knopp(a,b, M, reg, **kwargs)
-
- b=np.asarray(b,dtype=np.float64)
- if len(b.shape)<2:
- b=b.reshape((-1,1))
+
+ def sink():
+ return sinkhorn_knopp(a, b, M, reg, **kwargs)
+
+ b = np.asarray(b, dtype=np.float64)
+ if len(b.shape) < 2:
+ b = b.reshape((-1, 1))
return sink()
-def sinkhorn_knopp(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False, log=False,**kwargs):
+def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs):
"""
Solve the entropic regularization optimal transport problem and return the OT matrix
@@ -290,100 +302,101 @@ def sinkhorn_knopp(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False,
"""
- a=np.asarray(a,dtype=np.float64)
- b=np.asarray(b,dtype=np.float64)
- M=np.asarray(M,dtype=np.float64)
-
-
- if len(a)==0:
- a=np.ones((M.shape[0],),dtype=np.float64)/M.shape[0]
- if len(b)==0:
- b=np.ones((M.shape[1],),dtype=np.float64)/M.shape[1]
+ a = np.asarray(a, dtype=np.float64)
+ b = np.asarray(b, dtype=np.float64)
+ M = np.asarray(M, dtype=np.float64)
+ if len(a) == 0:
+ a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
+ if len(b) == 0:
+ b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
# init data
Nini = len(a)
Nfin = len(b)
- if len(b.shape)>1:
- nbb=b.shape[1]
+ if len(b.shape) > 1:
+ nbb = b.shape[1]
else:
- nbb=0
-
+ nbb = 0
if log:
- log={'err':[]}
+ log = {'err': []}
- # we assume that no distances are null except those of the diagonal of distances
+ # we assume that no distances are null except those of the diagonal of
+ # distances
if nbb:
- u = np.ones((Nini,nbb))/Nini
- v = np.ones((Nfin,nbb))/Nfin
+ u = np.ones((Nini, nbb)) / Nini
+ v = np.ones((Nfin, nbb)) / Nfin
else:
- u = np.ones(Nini)/Nini
- v = np.ones(Nfin)/Nfin
+ u = np.ones(Nini) / Nini
+ v = np.ones(Nfin) / Nfin
+ # print(reg)
- #print(reg)
+ K = np.exp(-M / reg)
+ # print(np.min(K))
- K = np.exp(-M/reg)
- #print(np.min(K))
-
- Kp = (1/a).reshape(-1, 1) * K
+ Kp = (1 / a).reshape(-1, 1) * K
cpt = 0
- err=1
- while (err>stopThr and cpt<numItermax):
+ err = 1
+ while (err > stopThr and cpt < numItermax):
uprev = u
vprev = v
KtransposeU = np.dot(K.T, u)
v = np.divide(b, KtransposeU)
- u = 1./np.dot(Kp,v)
+ u = 1. / np.dot(Kp, v)
- if (np.any(KtransposeU==0) or
- np.any(np.isnan(u)) or np.any(np.isnan(v)) or
- np.any(np.isinf(u)) or np.any(np.isinf(v))):
+ if (np.any(KtransposeU == 0) or
+ np.any(np.isnan(u)) or np.any(np.isnan(v)) or
+ np.any(np.isinf(u)) or np.any(np.isinf(v))):
# we have reached the machine precision
# come back to previous solution and quit loop
print('Warning: numerical errors at iteration', cpt)
u = uprev
v = vprev
break
- if cpt%10==0:
- # we can speed up the process by checking for the error only all the 10th iterations
+ if cpt % 10 == 0:
+ # we can speed up the process by checking for the error only all
+ # the 10th iterations
if nbb:
- err = np.sum((u-uprev)**2)/np.sum((u)**2)+np.sum((v-vprev)**2)/np.sum((v)**2)
+ err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
+ np.sum((v - vprev)**2) / np.sum((v)**2)
else:
transp = u.reshape(-1, 1) * (K * v)
- err = np.linalg.norm((np.sum(transp,axis=0)-b))**2
+ err = np.linalg.norm((np.sum(transp, axis=0) - b))**2
if log:
log['err'].append(err)
if verbose:
- if cpt%200 ==0:
- print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19)
- print('{:5d}|{:8e}|'.format(cpt,err))
- cpt = cpt +1
+ if cpt % 200 == 0:
+ print(
+ '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
+ cpt = cpt + 1
if log:
- log['u']=u
- log['v']=v
+ log['u'] = u
+ log['v'] = v
- if nbb: #return only loss
- res=np.zeros((nbb))
+ if nbb: # return only loss
+ res = np.zeros((nbb))
for i in range(nbb):
- res[i]=np.sum(u[:,i].reshape((-1,1))*K*v[:,i].reshape((1,-1))*M)
+ res[i] = np.sum(
+ u[:, i].reshape((-1, 1)) * K * v[:, i].reshape((1, -1)) * M)
if log:
- return res,log
+ return res, log
else:
return res
- else: # return OT matrix
+ else: # return OT matrix
if log:
- return u.reshape((-1,1))*K*v.reshape((1,-1)),log
+ return u.reshape((-1, 1)) * K * v.reshape((1, -1)), log
else:
- return u.reshape((-1,1))*K*v.reshape((1,-1))
+ return u.reshape((-1, 1)) * K * v.reshape((1, -1))
-def sinkhorn_stabilized(a,b, M, reg, numItermax = 1000,tau=1e3, stopThr=1e-9,warmstart=None, verbose=False,print_period=20, log=False,**kwargs):
+def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9, warmstart=None, verbose=False, print_period=20, log=False, **kwargs):
"""
Solve the entropic regularization OT problem with log stabilization
@@ -468,21 +481,21 @@ def sinkhorn_stabilized(a,b, M, reg, numItermax = 1000,tau=1e3, stopThr=1e-9,war
"""
- a=np.asarray(a,dtype=np.float64)
- b=np.asarray(b,dtype=np.float64)
- M=np.asarray(M,dtype=np.float64)
+ a = np.asarray(a, dtype=np.float64)
+ b = np.asarray(b, dtype=np.float64)
+ M = np.asarray(M, dtype=np.float64)
- if len(a)==0:
- a=np.ones((M.shape[0],),dtype=np.float64)/M.shape[0]
- if len(b)==0:
- b=np.ones((M.shape[1],),dtype=np.float64)/M.shape[1]
+ if len(a) == 0:
+ a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
+ if len(b) == 0:
+ b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
# test if multiple target
- if len(b.shape)>1:
- nbb=b.shape[1]
- a=a[:,np.newaxis]
+ if len(b.shape) > 1:
+ nbb = b.shape[1]
+ a = a[:, np.newaxis]
else:
- nbb=0
+ nbb = 0
# init data
na = len(a)
@@ -490,81 +503,80 @@ def sinkhorn_stabilized(a,b, M, reg, numItermax = 1000,tau=1e3, stopThr=1e-9,war
cpt = 0
if log:
- log={'err':[]}
+ log = {'err': []}
- # we assume that no distances are null except those of the diagonal of distances
+ # we assume that no distances are null except those of the diagonal of
+ # distances
if warmstart is None:
- alpha,beta=np.zeros(na),np.zeros(nb)
+ alpha, beta = np.zeros(na), np.zeros(nb)
else:
- alpha,beta=warmstart
+ alpha, beta = warmstart
if nbb:
- u,v = np.ones((na,nbb))/na,np.ones((nb,nbb))/nb
+ u, v = np.ones((na, nbb)) / na, np.ones((nb, nbb)) / nb
else:
- u,v = np.ones(na)/na,np.ones(nb)/nb
+ u, v = np.ones(na) / na, np.ones(nb) / nb
- def get_K(alpha,beta):
+ def get_K(alpha, beta):
"""log space computation"""
- return np.exp(-(M-alpha.reshape((na,1))-beta.reshape((1,nb)))/reg)
+ return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) / reg)
- def get_Gamma(alpha,beta,u,v):
+ def get_Gamma(alpha, beta, u, v):
"""log space gamma computation"""
- return np.exp(-(M-alpha.reshape((na,1))-beta.reshape((1,nb)))/reg+np.log(u.reshape((na,1)))+np.log(v.reshape((1,nb))))
+ return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) / reg + np.log(u.reshape((na, 1))) + np.log(v.reshape((1, nb))))
- #print(np.min(K))
+ # print(np.min(K))
- K=get_K(alpha,beta)
+ K = get_K(alpha, beta)
transp = K
- loop=1
+ loop = 1
cpt = 0
- err=1
+ err = 1
while loop:
-
-
uprev = u
vprev = v
# sinkhorn update
- v = b/(np.dot(K.T,u)+1e-16)
- u = a/(np.dot(K,v)+1e-16)
-
+ v = b / (np.dot(K.T, u) + 1e-16)
+ u = a / (np.dot(K, v) + 1e-16)
# remove numerical problems and store them in K
- if np.abs(u).max()>tau or np.abs(v).max()>tau:
+ if np.abs(u).max() > tau or np.abs(v).max() > tau:
if nbb:
- alpha,beta=alpha+reg*np.max(np.log(u),1),beta+reg*np.max(np.log(v))
+ alpha, beta = alpha + reg * \
+ np.max(np.log(u), 1), beta + reg * np.max(np.log(v))
else:
- alpha,beta=alpha+reg*np.log(u),beta+reg*np.log(v)
+ alpha, beta = alpha + reg * np.log(u), beta + reg * np.log(v)
if nbb:
- u,v = np.ones((na,nbb))/na,np.ones((nb,nbb))/nb
+ u, v = np.ones((na, nbb)) / na, np.ones((nb, nbb)) / nb
else:
- u,v = np.ones(na)/na,np.ones(nb)/nb
- K=get_K(alpha,beta)
-
+ u, v = np.ones(na) / na, np.ones(nb) / nb
+ K = get_K(alpha, beta)
- if cpt%print_period==0:
- # we can speed up the process by checking for the error only all the 10th iterations
+ if cpt % print_period == 0:
+ # we can speed up the process by checking for the error only all
+ # the 10th iterations
if nbb:
- err = np.sum((u-uprev)**2)/np.sum((u)**2)+np.sum((v-vprev)**2)/np.sum((v)**2)
+ err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
+ np.sum((v - vprev)**2) / np.sum((v)**2)
else:
- transp = get_Gamma(alpha,beta,u,v)
- err = np.linalg.norm((np.sum(transp,axis=0)-b))**2
+ transp = get_Gamma(alpha, beta, u, v)
+ err = np.linalg.norm((np.sum(transp, axis=0) - b))**2
if log:
log['err'].append(err)
if verbose:
- if cpt%(print_period*20) ==0:
- print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19)
- print('{:5d}|{:8e}|'.format(cpt,err))
+ if cpt % (print_period * 20) == 0:
+ print(
+ '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
+ if err <= stopThr:
+ loop = False
- if err<=stopThr:
- loop=False
-
- if cpt>=numItermax:
- loop=False
-
+ if cpt >= numItermax:
+ loop = False
if np.any(np.isnan(u)) or np.any(np.isnan(v)):
# we have reached the machine precision
@@ -574,34 +586,34 @@ def sinkhorn_stabilized(a,b, M, reg, numItermax = 1000,tau=1e3, stopThr=1e-9,war
v = vprev
break
- cpt = cpt +1
-
+ cpt = cpt + 1
#print('err=',err,' cpt=',cpt)
if log:
- log['logu']=alpha/reg+np.log(u)
- log['logv']=beta/reg+np.log(v)
- log['alpha']=alpha+reg*np.log(u)
- log['beta']=beta+reg*np.log(v)
- log['warmstart']=(log['alpha'],log['beta'])
+ log['logu'] = alpha / reg + np.log(u)
+ log['logv'] = beta / reg + np.log(v)
+ log['alpha'] = alpha + reg * np.log(u)
+ log['beta'] = beta + reg * np.log(v)
+ log['warmstart'] = (log['alpha'], log['beta'])
if nbb:
- res=np.zeros((nbb))
+ res = np.zeros((nbb))
for i in range(nbb):
- res[i]=np.sum(get_Gamma(alpha,beta,u[:,i],v[:,i])*M)
- return res,log
+ res[i] = np.sum(get_Gamma(alpha, beta, u[:, i], v[:, i]) * M)
+ return res, log
else:
- return get_Gamma(alpha,beta,u,v),log
+ return get_Gamma(alpha, beta, u, v), log
else:
if nbb:
- res=np.zeros((nbb))
+ res = np.zeros((nbb))
for i in range(nbb):
- res[i]=np.sum(get_Gamma(alpha,beta,u[:,i],v[:,i])*M)
+ res[i] = np.sum(get_Gamma(alpha, beta, u[:, i], v[:, i]) * M)
return res
else:
- return get_Gamma(alpha,beta,u,v)
+ return get_Gamma(alpha, beta, u, v)
+
-def sinkhorn_epsilon_scaling(a,b, M, reg, numItermax = 100, epsilon0=1e4, numInnerItermax = 100,tau=1e3, stopThr=1e-9,warmstart=None, verbose=False,print_period=10, log=False,**kwargs):
+def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInnerItermax=100, tau=1e3, stopThr=1e-9, warmstart=None, verbose=False, print_period=10, log=False, **kwargs):
"""
Solve the entropic regularization optimal transport problem with log
stabilization and epsilon scaling.
@@ -690,14 +702,14 @@ def sinkhorn_epsilon_scaling(a,b, M, reg, numItermax = 100, epsilon0=1e4, numInn
"""
- a=np.asarray(a,dtype=np.float64)
- b=np.asarray(b,dtype=np.float64)
- M=np.asarray(M,dtype=np.float64)
+ a = np.asarray(a, dtype=np.float64)
+ b = np.asarray(b, dtype=np.float64)
+ M = np.asarray(M, dtype=np.float64)
- if len(a)==0:
- a=np.ones((M.shape[0],),dtype=np.float64)/M.shape[0]
- if len(b)==0:
- b=np.ones((M.shape[1],),dtype=np.float64)/M.shape[1]
+ if len(a) == 0:
+ a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
+ if len(b) == 0:
+ b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
# init data
na = len(a)
@@ -705,88 +717,94 @@ def sinkhorn_epsilon_scaling(a,b, M, reg, numItermax = 100, epsilon0=1e4, numInn
# nrelative umerical precision with 64 bits
numItermin = 35
- numItermax=max(numItermin,numItermax) # ensure that last velue is exact
-
+ numItermax = max(numItermin, numItermax) # ensure that last velue is exact
cpt = 0
if log:
- log={'err':[]}
+ log = {'err': []}
- # we assume that no distances are null except those of the diagonal of distances
+ # we assume that no distances are null except those of the diagonal of
+ # distances
if warmstart is None:
- alpha,beta=np.zeros(na),np.zeros(nb)
+ alpha, beta = np.zeros(na), np.zeros(nb)
else:
- alpha,beta=warmstart
-
+ alpha, beta = warmstart
- def get_K(alpha,beta):
+ def get_K(alpha, beta):
"""log space computation"""
- return np.exp(-(M-alpha.reshape((na,1))-beta.reshape((1,nb)))/reg)
+ return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) / reg)
- #print(np.min(K))
- def get_reg(n): # exponential decreasing
- return (epsilon0-reg)*np.exp(-n)+reg
+ # print(np.min(K))
+ def get_reg(n): # exponential decreasing
+ return (epsilon0 - reg) * np.exp(-n) + reg
- loop=1
+ loop = 1
cpt = 0
- err=1
+ err = 1
while loop:
- regi=get_reg(cpt)
+ regi = get_reg(cpt)
- G,logi=sinkhorn_stabilized(a,b, M, regi, numItermax = numInnerItermax, stopThr=1e-9,warmstart=(alpha,beta), verbose=False,print_period=20,tau=tau, log=True)
+ G, logi = sinkhorn_stabilized(a, b, M, regi, numItermax=numInnerItermax, stopThr=1e-9, warmstart=(
+ alpha, beta), verbose=False, print_period=20, tau=tau, log=True)
- alpha=logi['alpha']
- beta=logi['beta']
+ alpha = logi['alpha']
+ beta = logi['beta']
- if cpt>=numItermax:
- loop=False
+ if cpt >= numItermax:
+ loop = False
- if cpt%(print_period)==0: # spsion nearly converged
- # we can speed up the process by checking for the error only all the 10th iterations
+ if cpt % (print_period) == 0: # spsion nearly converged
+ # we can speed up the process by checking for the error only all
+ # the 10th iterations
transp = G
- err = np.linalg.norm((np.sum(transp,axis=0)-b))**2+np.linalg.norm((np.sum(transp,axis=1)-a))**2
+ err = np.linalg.norm(
+ (np.sum(transp, axis=0) - b))**2 + np.linalg.norm((np.sum(transp, axis=1) - a))**2
if log:
log['err'].append(err)
if verbose:
- if cpt%(print_period*10) ==0:
- print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19)
- print('{:5d}|{:8e}|'.format(cpt,err))
+ if cpt % (print_period * 10) == 0:
+ print(
+ '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
- if err<=stopThr and cpt>numItermin:
- loop=False
+ if err <= stopThr and cpt > numItermin:
+ loop = False
- cpt = cpt +1
+ cpt = cpt + 1
#print('err=',err,' cpt=',cpt)
if log:
- log['alpha']=alpha
- log['beta']=beta
- log['warmstart']=(log['alpha'],log['beta'])
- return G,log
+ log['alpha'] = alpha
+ log['beta'] = beta
+ log['warmstart'] = (log['alpha'], log['beta'])
+ return G, log
else:
return G
-def geometricBar(weights,alldistribT):
+def geometricBar(weights, alldistribT):
"""return the weighted geometric mean of distributions"""
- assert(len(weights)==alldistribT.shape[1])
- return np.exp(np.dot(np.log(alldistribT),weights.T))
+ assert(len(weights) == alldistribT.shape[1])
+ return np.exp(np.dot(np.log(alldistribT), weights.T))
+
def geometricMean(alldistribT):
"""return the geometric mean of distributions"""
- return np.exp(np.mean(np.log(alldistribT),axis=1))
+ return np.exp(np.mean(np.log(alldistribT), axis=1))
-def projR(gamma,p):
+
+def projR(gamma, p):
"""return the KL projection on the row constrints """
- return np.multiply(gamma.T,p/np.maximum(np.sum(gamma,axis=1),1e-10)).T
+ return np.multiply(gamma.T, p / np.maximum(np.sum(gamma, axis=1), 1e-10)).T
+
-def projC(gamma,q):
+def projC(gamma, q):
"""return the KL projection on the column constrints """
- return np.multiply(gamma,q/np.maximum(np.sum(gamma,axis=0),1e-10))
+ return np.multiply(gamma, q / np.maximum(np.sum(gamma, axis=0), 1e-10))
-def barycenter(A,M,reg, weights=None, numItermax = 1000, stopThr=1e-4,verbose=False,log=False):
+def barycenter(A, M, reg, weights=None, numItermax=1000, stopThr=1e-4, verbose=False, log=False):
"""Compute the entropic regularized wasserstein barycenter of distributions A
The function solves the following optimization problem:
@@ -837,49 +855,49 @@ def barycenter(A,M,reg, weights=None, numItermax = 1000, stopThr=1e-4,verbose=Fa
"""
-
if weights is None:
- weights=np.ones(A.shape[1])/A.shape[1]
+ weights = np.ones(A.shape[1]) / A.shape[1]
else:
- assert(len(weights)==A.shape[1])
+ assert(len(weights) == A.shape[1])
if log:
- log={'err':[]}
+ log = {'err': []}
- #M = M/np.median(M) # suggested by G. Peyre
- K = np.exp(-M/reg)
+ # M = M/np.median(M) # suggested by G. Peyre
+ K = np.exp(-M / reg)
cpt = 0
- err=1
+ err = 1
- UKv=np.dot(K,np.divide(A.T,np.sum(K,axis=0)).T)
- u = (geometricMean(UKv)/UKv.T).T
+ UKv = np.dot(K, np.divide(A.T, np.sum(K, axis=0)).T)
+ u = (geometricMean(UKv) / UKv.T).T
- while (err>stopThr and cpt<numItermax):
- cpt = cpt +1
- UKv=u*np.dot(K,np.divide(A,np.dot(K,u)))
- u = (u.T*geometricBar(weights,UKv)).T/UKv
+ while (err > stopThr and cpt < numItermax):
+ cpt = cpt + 1
+ UKv = u * np.dot(K, np.divide(A, np.dot(K, u)))
+ u = (u.T * geometricBar(weights, UKv)).T / UKv
- if cpt%10==1:
- err=np.sum(np.std(UKv,axis=1))
+ if cpt % 10 == 1:
+ err = np.sum(np.std(UKv, axis=1))
# log and verbose print
if log:
log['err'].append(err)
if verbose:
- if cpt%200 ==0:
- print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19)
- print('{:5d}|{:8e}|'.format(cpt,err))
+ if cpt % 200 == 0:
+ print(
+ '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
if log:
- log['niter']=cpt
- return geometricBar(weights,UKv),log
+ log['niter'] = cpt
+ return geometricBar(weights, UKv), log
else:
- return geometricBar(weights,UKv)
+ return geometricBar(weights, UKv)
-def unmix(a,D,M,M0,h0,reg,reg0,alpha,numItermax = 1000, stopThr=1e-3,verbose=False,log=False):
+def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000, stopThr=1e-3, verbose=False, log=False):
"""
Compute the unmixing of an observation with a given dictionary using Wasserstein distance
@@ -943,43 +961,44 @@ def unmix(a,D,M,M0,h0,reg,reg0,alpha,numItermax = 1000, stopThr=1e-3,verbose=Fal
"""
#M = M/np.median(M)
- K = np.exp(-M/reg)
+ K = np.exp(-M / reg)
#M0 = M0/np.median(M0)
- K0 = np.exp(-M0/reg0)
+ K0 = np.exp(-M0 / reg0)
old = h0
- err=1
- cpt=0
+ err = 1
+ cpt = 0
#log = {'niter':0, 'all_err':[]}
if log:
- log={'err':[]}
-
-
- while (err>stopThr and cpt<numItermax):
- K = projC(K,a)
- K0 = projC(K0,h0)
- new = np.sum(K0,axis=1)
- inv_new = np.dot(D,new) # we recombine the current selection from dictionnary
- other = np.sum(K,axis=1)
- delta = np.exp(alpha*np.log(other)+(1-alpha)*np.log(inv_new)) # geometric interpolation
- K = projR(K,delta)
- K0 = np.dot(np.diag(np.dot(D.T,delta/inv_new)),K0)
-
- err=np.linalg.norm(np.sum(K0,axis=1)-old)
+ log = {'err': []}
+
+ while (err > stopThr and cpt < numItermax):
+ K = projC(K, a)
+ K0 = projC(K0, h0)
+ new = np.sum(K0, axis=1)
+ # we recombine the current selection from dictionnary
+ inv_new = np.dot(D, new)
+ other = np.sum(K, axis=1)
+ # geometric interpolation
+ delta = np.exp(alpha * np.log(other) + (1 - alpha) * np.log(inv_new))
+ K = projR(K, delta)
+ K0 = np.dot(np.diag(np.dot(D.T, delta / inv_new)), K0)
+
+ err = np.linalg.norm(np.sum(K0, axis=1) - old)
old = new
if log:
log['err'].append(err)
if verbose:
- if cpt%200 ==0:
- print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19)
- print('{:5d}|{:8e}|'.format(cpt,err))
+ if cpt % 200 == 0:
+ print('{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
- cpt = cpt+1
+ cpt = cpt + 1
if log:
- log['niter']=cpt
- return np.sum(K0,axis=1),log
+ log['niter'] = cpt
+ return np.sum(K0, axis=1), log
else:
- return np.sum(K0,axis=1)
+ return np.sum(K0, axis=1)
diff --git a/ot/da.py b/ot/da.py
index ddf1c60..5039fbd 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -6,12 +6,12 @@ Domain adaptation with optimal transport
import numpy as np
from .bregman import sinkhorn
from .lp import emd
-from .utils import unif,dist,kernel
+from .utils import unif, dist, kernel
from .optim import cg
from .optim import gcg
-def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerItermax = 200,stopInnerThr=1e-9,verbose=False,log=False):
+def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, numInnerItermax=200, stopInnerThr=1e-9, verbose=False, log=False):
"""
Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization
@@ -94,7 +94,7 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
W = np.zeros(M.shape)
for cpt in range(numItermax):
- Mreg = M + eta*W
+ Mreg = M + eta * W
transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax,
stopThr=stopInnerThr)
# the transport has been computed. Check if classes are really
@@ -102,12 +102,13 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
W = np.ones(M.shape)
for (i, c) in enumerate(classes):
majs = np.sum(transp[indices_labels[i]], axis=0)
- majs = p*((majs+epsilon)**(p-1))
+ majs = p * ((majs + epsilon)**(p - 1))
W[indices_labels[i]] = majs
return transp
-def sinkhorn_l1l2_gl(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerItermax = 200,stopInnerThr=1e-9,verbose=False,log=False):
+
+def sinkhorn_l1l2_gl(a, labels_a, b, M, reg, eta=0.1, numItermax=10, numInnerItermax=200, stopInnerThr=1e-9, verbose=False, log=False):
"""
Solve the entropic regularization optimal transport problem with group lasso regularization
@@ -176,32 +177,30 @@ def sinkhorn_l1l2_gl(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
ot.optim.gcg : Generalized conditional gradient for OT problems
"""
- lstlab=np.unique(labels_a)
+ lstlab = np.unique(labels_a)
def f(G):
- res=0
+ res = 0
for i in range(G.shape[1]):
for lab in lstlab:
- temp=G[labels_a==lab,i]
- res+=np.linalg.norm(temp)
+ temp = G[labels_a == lab, i]
+ res += np.linalg.norm(temp)
return res
def df(G):
- W=np.zeros(G.shape)
+ W = np.zeros(G.shape)
for i in range(G.shape[1]):
for lab in lstlab:
- temp=G[labels_a==lab,i]
- n=np.linalg.norm(temp)
+ temp = G[labels_a == lab, i]
+ n = np.linalg.norm(temp)
if n:
- W[labels_a==lab,i]=temp/n
+ W[labels_a == lab, i] = temp / n
return W
-
- return gcg(a,b,M,reg,eta,f,df,G0=None,numItermax = numItermax,numInnerItermax=numInnerItermax, stopThr=stopInnerThr,verbose=verbose,log=log)
-
+ return gcg(a, b, M, reg, eta, f, df, G0=None, numItermax=numItermax, numInnerItermax=numInnerItermax, stopThr=stopInnerThr, verbose=verbose, log=log)
-def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbose2=False,numItermax = 100,numInnerItermax = 10,stopInnerThr=1e-6,stopThr=1e-5,log=False,**kwargs):
+def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False, verbose2=False, numItermax=100, numInnerItermax=10, stopInnerThr=1e-6, stopThr=1e-5, log=False, **kwargs):
"""Joint OT and linear mapping estimation as proposed in [8]
The function solves the following optimization problem:
@@ -281,97 +280,105 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos
"""
- ns,nt,d=xs.shape[0],xt.shape[0],xt.shape[1]
+ ns, nt, d = xs.shape[0], xt.shape[0], xt.shape[1]
if bias:
- xs1=np.hstack((xs,np.ones((ns,1))))
- xstxs=xs1.T.dot(xs1)
- I=np.eye(d+1)
- I[-1]=0
- I0=I[:,:-1]
- sel=lambda x : x[:-1,:]
+ xs1 = np.hstack((xs, np.ones((ns, 1))))
+ xstxs = xs1.T.dot(xs1)
+ I = np.eye(d + 1)
+ I[-1] = 0
+ I0 = I[:, :-1]
+
+ def sel(x):
+ return x[:-1, :]
else:
- xs1=xs
- xstxs=xs1.T.dot(xs1)
- I=np.eye(d)
- I0=I
- sel=lambda x : x
+ xs1 = xs
+ xstxs = xs1.T.dot(xs1)
+ I = np.eye(d)
+ I0 = I
+
+ def sel(x):
+ return x
if log:
- log={'err':[]}
+ log = {'err': []}
- a,b=unif(ns),unif(nt)
- M=dist(xs,xt)*ns
- G=emd(a,b,M)
+ a, b = unif(ns), unif(nt)
+ M = dist(xs, xt) * ns
+ G = emd(a, b, M)
- vloss=[]
+ vloss = []
- def loss(L,G):
+ def loss(L, G):
"""Compute full loss"""
- return np.sum((xs1.dot(L)-ns*G.dot(xt))**2)+mu*np.sum(G*M)+eta*np.sum(sel(L-I0)**2)
+ return np.sum((xs1.dot(L) - ns * G.dot(xt))**2) + mu * np.sum(G * M) + eta * np.sum(sel(L - I0)**2)
def solve_L(G):
""" solve L problem with fixed G (least square)"""
- xst=ns*G.dot(xt)
- return np.linalg.solve(xstxs+eta*I,xs1.T.dot(xst)+eta*I0)
+ xst = ns * G.dot(xt)
+ return np.linalg.solve(xstxs + eta * I, xs1.T.dot(xst) + eta * I0)
- def solve_G(L,G0):
+ def solve_G(L, G0):
"""Update G with CG algorithm"""
- xsi=xs1.dot(L)
+ xsi = xs1.dot(L)
+
def f(G):
- return np.sum((xsi-ns*G.dot(xt))**2)
+ return np.sum((xsi - ns * G.dot(xt))**2)
+
def df(G):
- return -2*ns*(xsi-ns*G.dot(xt)).dot(xt.T)
- G=cg(a,b,M,1.0/mu,f,df,G0=G0,numItermax=numInnerItermax,stopThr=stopInnerThr)
+ return -2 * ns * (xsi - ns * G.dot(xt)).dot(xt.T)
+ G = cg(a, b, M, 1.0 / mu, f, df, G0=G0,
+ numItermax=numInnerItermax, stopThr=stopInnerThr)
return G
+ L = solve_L(G)
- L=solve_L(G)
-
- vloss.append(loss(L,G))
+ vloss.append(loss(L, G))
if verbose:
- print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32)
- print('{:5d}|{:8e}|{:8e}'.format(0,vloss[-1],0))
-
+ print('{:5s}|{:12s}|{:8s}'.format(
+ 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
+ print('{:5d}|{:8e}|{:8e}'.format(0, vloss[-1], 0))
# init loop
- if numItermax>0:
- loop=1
+ if numItermax > 0:
+ loop = 1
else:
- loop=0
- it=0
+ loop = 0
+ it = 0
while loop:
- it+=1
+ it += 1
# update G
- G=solve_G(L,G)
+ G = solve_G(L, G)
- #update L
- L=solve_L(G)
+ # update L
+ L = solve_L(G)
- vloss.append(loss(L,G))
+ vloss.append(loss(L, G))
- if it>=numItermax:
- loop=0
+ if it >= numItermax:
+ loop = 0
- if abs(vloss[-1]-vloss[-2])/abs(vloss[-2])<stopThr:
- loop=0
+ if abs(vloss[-1] - vloss[-2]) / abs(vloss[-2]) < stopThr:
+ loop = 0
if verbose:
- if it%20==0:
- print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32)
- print('{:5d}|{:8e}|{:8e}'.format(it,vloss[-1],(vloss[-1]-vloss[-2])/abs(vloss[-2])))
+ if it % 20 == 0:
+ print('{:5s}|{:12s}|{:8s}'.format(
+ 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
+ print('{:5d}|{:8e}|{:8e}'.format(
+ it, vloss[-1], (vloss[-1] - vloss[-2]) / abs(vloss[-2])))
if log:
- log['loss']=vloss
- return G,L,log
+ log['loss'] = vloss
+ return G, L, log
else:
- return G,L
+ return G, L
-def joint_OT_mapping_kernel(xs,xt,mu=1,eta=0.001,kerneltype='gaussian',sigma=1,bias=False,verbose=False,verbose2=False,numItermax = 100,numInnerItermax = 10,stopInnerThr=1e-6,stopThr=1e-5,log=False,**kwargs):
+def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian', sigma=1, bias=False, verbose=False, verbose2=False, numItermax=100, numInnerItermax=10, stopInnerThr=1e-6, stopThr=1e-5, log=False, **kwargs):
"""Joint OT and nonlinear mapping estimation with kernels as proposed in [8]
The function solves the following optimization problem:
@@ -454,123 +461,126 @@ def joint_OT_mapping_kernel(xs,xt,mu=1,eta=0.001,kerneltype='gaussian',sigma=1,b
"""
- ns,nt=xs.shape[0],xt.shape[0]
+ ns, nt = xs.shape[0], xt.shape[0]
- K=kernel(xs,xs,method=kerneltype,sigma=sigma)
+ K = kernel(xs, xs, method=kerneltype, sigma=sigma)
if bias:
- K1=np.hstack((K,np.ones((ns,1))))
- I=np.eye(ns+1)
- I[-1]=0
- Kp=np.eye(ns+1)
- Kp[:ns,:ns]=K
+ K1 = np.hstack((K, np.ones((ns, 1))))
+ I = np.eye(ns + 1)
+ I[-1] = 0
+ Kp = np.eye(ns + 1)
+ Kp[:ns, :ns] = K
# ls regu
#K0 = K1.T.dot(K1)+eta*I
- #Kreg=I
+ # Kreg=I
# RKHS regul
- K0 = K1.T.dot(K1)+eta*Kp
- Kreg=Kp
+ K0 = K1.T.dot(K1) + eta * Kp
+ Kreg = Kp
else:
- K1=K
- I=np.eye(ns)
+ K1 = K
+ I = np.eye(ns)
# ls regul
#K0 = K1.T.dot(K1)+eta*I
- #Kreg=I
+ # Kreg=I
# proper kernel ridge
- K0=K+eta*I
- Kreg=K
-
-
-
+ K0 = K + eta * I
+ Kreg = K
if log:
- log={'err':[]}
+ log = {'err': []}
- a,b=unif(ns),unif(nt)
- M=dist(xs,xt)*ns
- G=emd(a,b,M)
+ a, b = unif(ns), unif(nt)
+ M = dist(xs, xt) * ns
+ G = emd(a, b, M)
- vloss=[]
+ vloss = []
- def loss(L,G):
+ def loss(L, G):
"""Compute full loss"""
- return np.sum((K1.dot(L)-ns*G.dot(xt))**2)+mu*np.sum(G*M)+eta*np.trace(L.T.dot(Kreg).dot(L))
+ return np.sum((K1.dot(L) - ns * G.dot(xt))**2) + mu * np.sum(G * M) + eta * np.trace(L.T.dot(Kreg).dot(L))
def solve_L_nobias(G):
""" solve L problem with fixed G (least square)"""
- xst=ns*G.dot(xt)
- return np.linalg.solve(K0,xst)
+ xst = ns * G.dot(xt)
+ return np.linalg.solve(K0, xst)
def solve_L_bias(G):
""" solve L problem with fixed G (least square)"""
- xst=ns*G.dot(xt)
- return np.linalg.solve(K0,K1.T.dot(xst))
+ xst = ns * G.dot(xt)
+ return np.linalg.solve(K0, K1.T.dot(xst))
- def solve_G(L,G0):
+ def solve_G(L, G0):
"""Update G with CG algorithm"""
- xsi=K1.dot(L)
+ xsi = K1.dot(L)
+
def f(G):
- return np.sum((xsi-ns*G.dot(xt))**2)
+ return np.sum((xsi - ns * G.dot(xt))**2)
+
def df(G):
- return -2*ns*(xsi-ns*G.dot(xt)).dot(xt.T)
- G=cg(a,b,M,1.0/mu,f,df,G0=G0,numItermax=numInnerItermax,stopThr=stopInnerThr)
+ return -2 * ns * (xsi - ns * G.dot(xt)).dot(xt.T)
+ G = cg(a, b, M, 1.0 / mu, f, df, G0=G0,
+ numItermax=numInnerItermax, stopThr=stopInnerThr)
return G
if bias:
- solve_L=solve_L_bias
+ solve_L = solve_L_bias
else:
- solve_L=solve_L_nobias
+ solve_L = solve_L_nobias
- L=solve_L(G)
+ L = solve_L(G)
- vloss.append(loss(L,G))
+ vloss.append(loss(L, G))
if verbose:
- print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32)
- print('{:5d}|{:8e}|{:8e}'.format(0,vloss[-1],0))
-
+ print('{:5s}|{:12s}|{:8s}'.format(
+ 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
+ print('{:5d}|{:8e}|{:8e}'.format(0, vloss[-1], 0))
# init loop
- if numItermax>0:
- loop=1
+ if numItermax > 0:
+ loop = 1
else:
- loop=0
- it=0
+ loop = 0
+ it = 0
while loop:
- it+=1
+ it += 1
# update G
- G=solve_G(L,G)
+ G = solve_G(L, G)
- #update L
- L=solve_L(G)
+ # update L
+ L = solve_L(G)
- vloss.append(loss(L,G))
+ vloss.append(loss(L, G))
- if it>=numItermax:
- loop=0
+ if it >= numItermax:
+ loop = 0
- if abs(vloss[-1]-vloss[-2])/abs(vloss[-2])<stopThr:
- loop=0
+ if abs(vloss[-1] - vloss[-2]) / abs(vloss[-2]) < stopThr:
+ loop = 0
if verbose:
- if it%20==0:
- print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32)
- print('{:5d}|{:8e}|{:8e}'.format(it,vloss[-1],(vloss[-1]-vloss[-2])/abs(vloss[-2])))
+ if it % 20 == 0:
+ print('{:5s}|{:12s}|{:8s}'.format(
+ 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
+ print('{:5d}|{:8e}|{:8e}'.format(
+ it, vloss[-1], (vloss[-1] - vloss[-2]) / abs(vloss[-2])))
if log:
- log['loss']=vloss
- return G,L,log
+ log['loss'] = vloss
+ return G, L, log
else:
- return G,L
+ return G, L
class OTDA(object):
+
"""Class for domain adaptation with optimal transport as proposed in [5]
@@ -581,34 +591,33 @@ class OTDA(object):
"""
- def __init__(self,metric='sqeuclidean'):
+ def __init__(self, metric='sqeuclidean'):
""" Class initialization"""
- self.xs=0
- self.xt=0
- self.G=0
- self.metric=metric
- self.computed=False
+ self.xs = 0
+ self.xt = 0
+ self.G = 0
+ self.metric = metric
+ self.computed = False
-
- def fit(self,xs,xt,ws=None,wt=None,norm=None):
+ def fit(self, xs, xt, ws=None, wt=None, norm=None):
""" Fit domain adaptation between samples is xs and xt (with optional weights)"""
- self.xs=xs
- self.xt=xt
+ self.xs = xs
+ self.xt = xt
if wt is None:
- wt=unif(xt.shape[0])
+ wt = unif(xt.shape[0])
if ws is None:
- ws=unif(xs.shape[0])
+ ws = unif(xs.shape[0])
- self.ws=ws
- self.wt=wt
+ self.ws = ws
+ self.wt = wt
- self.M=dist(xs,xt,metric=self.metric)
+ self.M = dist(xs, xt, metric=self.metric)
self.normalizeM(norm)
- self.G=emd(ws,wt,self.M)
- self.computed=True
+ self.G = emd(ws, wt, self.M)
+ self.computed = True
- def interp(self,direction=1):
+ def interp(self, direction=1):
"""Barycentric interpolation for the source (1) or target (-1) samples
This Barycentric interpolation solves for each source (resp target)
@@ -623,28 +632,28 @@ class OTDA(object):
metric could be used in the future.
"""
- if direction>0: # >0 then source to target
- G=self.G
- w=self.ws.reshape((self.xs.shape[0],1))
- x=self.xt
+ if direction > 0: # >0 then source to target
+ G = self.G
+ w = self.ws.reshape((self.xs.shape[0], 1))
+ x = self.xt
else:
- G=self.G.T
- w=self.wt.reshape((self.xt.shape[0],1))
- x=self.xs
+ G = self.G.T
+ w = self.wt.reshape((self.xt.shape[0], 1))
+ x = self.xs
if self.computed:
- if self.metric=='sqeuclidean':
- return np.dot(G/w,x) # weighted mean
+ if self.metric == 'sqeuclidean':
+ return np.dot(G / w, x) # weighted mean
else:
- print("Warning, metric not handled yet, using weighted average")
- return np.dot(G/w,x) # weighted mean
+ print(
+ "Warning, metric not handled yet, using weighted average")
+ return np.dot(G / w, x) # weighted mean
return None
else:
print("Warning, model not fitted yet, returning None")
return None
-
- def predict(self,x,direction=1):
+ def predict(self, x, direction=1):
""" Out of sample mapping using the formulation from [6]
For each sample x to map, it finds the nearest source sample xs and
@@ -657,29 +666,30 @@ class OTDA(object):
.. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014). Regularized discrete optimal transport. SIAM Journal on Imaging Sciences, 7(3), 1853-1882.
"""
- if direction>0: # >0 then source to target
- xf=self.xt
- x0=self.xs
+ if direction > 0: # >0 then source to target
+ xf = self.xt
+ x0 = self.xs
else:
- xf=self.xs
- x0=self.xt
+ xf = self.xs
+ x0 = self.xt
- D0=dist(x,x0) # dist netween new samples an source
- idx=np.argmin(D0,1) # closest one
- xf=self.interp(direction)# interp the source samples
- return xf[idx,:]+x-x0[idx,:] # aply the delta to the interpolation
+ D0 = dist(x, x0) # dist netween new samples an source
+ idx = np.argmin(D0, 1) # closest one
+ xf = self.interp(direction) # interp the source samples
+ # aply the delta to the interpolation
+ return xf[idx, :] + x - x0[idx, :]
def normalizeM(self, norm):
""" Apply normalization to the loss matrix
-
-
+
+
Parameters
----------
norm : str
type of normalization from 'median','max','log','loglog'
-
+
"""
-
+
if norm == "median":
self.M /= float(np.median(self.M))
elif norm == "max":
@@ -688,149 +698,149 @@ class OTDA(object):
self.M = np.log(1 + self.M)
elif norm == "loglog":
self.M = np.log(1 + np.log(1 + self.M))
-
class OTDA_sinkhorn(OTDA):
+
"""Class for domain adaptation with optimal transport with entropic regularization"""
- def fit(self,xs,xt,reg=1,ws=None,wt=None,norm=None,**kwargs):
+ def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs):
""" Fit regularized domain adaptation between samples is xs and xt (with optional weights)"""
- self.xs=xs
- self.xt=xt
+ self.xs = xs
+ self.xt = xt
if wt is None:
- wt=unif(xt.shape[0])
+ wt = unif(xt.shape[0])
if ws is None:
- ws=unif(xs.shape[0])
+ ws = unif(xs.shape[0])
- self.ws=ws
- self.wt=wt
+ self.ws = ws
+ self.wt = wt
- self.M=dist(xs,xt,metric=self.metric)
+ self.M = dist(xs, xt, metric=self.metric)
self.normalizeM(norm)
- self.G=sinkhorn(ws,wt,self.M,reg,**kwargs)
- self.computed=True
+ self.G = sinkhorn(ws, wt, self.M, reg, **kwargs)
+ self.computed = True
class OTDA_lpl1(OTDA):
- """Class for domain adaptation with optimal transport with entropic and group regularization"""
+ """Class for domain adaptation with optimal transport with entropic and group regularization"""
- def fit(self,xs,ys,xt,reg=1,eta=1,ws=None,wt=None,norm=None,**kwargs):
+ def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None, **kwargs):
""" Fit regularized domain adaptation between samples is xs and xt (with optional weights), See ot.da.sinkhorn_lpl1_mm for fit parameters"""
- self.xs=xs
- self.xt=xt
+ self.xs = xs
+ self.xt = xt
if wt is None:
- wt=unif(xt.shape[0])
+ wt = unif(xt.shape[0])
if ws is None:
- ws=unif(xs.shape[0])
+ ws = unif(xs.shape[0])
- self.ws=ws
- self.wt=wt
+ self.ws = ws
+ self.wt = wt
- self.M=dist(xs,xt,metric=self.metric)
+ self.M = dist(xs, xt, metric=self.metric)
self.normalizeM(norm)
- self.G=sinkhorn_lpl1_mm(ws,ys,wt,self.M,reg,eta,**kwargs)
- self.computed=True
+ self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M, reg, eta, **kwargs)
+ self.computed = True
+
class OTDA_l1l2(OTDA):
- """Class for domain adaptation with optimal transport with entropic and group lasso regularization"""
+ """Class for domain adaptation with optimal transport with entropic and group lasso regularization"""
- def fit(self,xs,ys,xt,reg=1,eta=1,ws=None,wt=None,norm=None,**kwargs):
+ def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None, **kwargs):
""" Fit regularized domain adaptation between samples is xs and xt (with optional weights), See ot.da.sinkhorn_lpl1_gl for fit parameters"""
- self.xs=xs
- self.xt=xt
+ self.xs = xs
+ self.xt = xt
if wt is None:
- wt=unif(xt.shape[0])
+ wt = unif(xt.shape[0])
if ws is None:
- ws=unif(xs.shape[0])
+ ws = unif(xs.shape[0])
- self.ws=ws
- self.wt=wt
+ self.ws = ws
+ self.wt = wt
- self.M=dist(xs,xt,metric=self.metric)
+ self.M = dist(xs, xt, metric=self.metric)
self.normalizeM(norm)
- self.G=sinkhorn_l1l2_gl(ws,ys,wt,self.M,reg,eta,**kwargs)
- self.computed=True
+ self.G = sinkhorn_l1l2_gl(ws, ys, wt, self.M, reg, eta, **kwargs)
+ self.computed = True
+
class OTDA_mapping_linear(OTDA):
- """Class for optimal transport with joint linear mapping estimation as in [8]"""
+ """Class for optimal transport with joint linear mapping estimation as in [8]"""
def __init__(self):
""" Class initialization"""
+ self.xs = 0
+ self.xt = 0
+ self.G = 0
+ self.L = 0
+ self.bias = False
+ self.computed = False
+ self.metric = 'sqeuclidean'
- self.xs=0
- self.xt=0
- self.G=0
- self.L=0
- self.bias=False
- self.computed=False
- self.metric='sqeuclidean'
-
- def fit(self,xs,xt,mu=1,eta=1,bias=False,**kwargs):
+ def fit(self, xs, xt, mu=1, eta=1, bias=False, **kwargs):
""" Fit domain adaptation between samples is xs and xt (with optional
weights)"""
- self.xs=xs
- self.xt=xt
- self.bias=bias
-
+ self.xs = xs
+ self.xt = xt
+ self.bias = bias
- self.ws=unif(xs.shape[0])
- self.wt=unif(xt.shape[0])
+ self.ws = unif(xs.shape[0])
+ self.wt = unif(xt.shape[0])
- self.G,self.L=joint_OT_mapping_linear(xs,xt,mu=mu,eta=eta,bias=bias,**kwargs)
- self.computed=True
+ self.G, self.L = joint_OT_mapping_linear(
+ xs, xt, mu=mu, eta=eta, bias=bias, **kwargs)
+ self.computed = True
def mapping(self):
return lambda x: self.predict(x)
-
- def predict(self,x):
+ def predict(self, x):
""" Out of sample mapping estimated during the call to fit"""
if self.computed:
if self.bias:
- x=np.hstack((x,np.ones((x.shape[0],1))))
- return x.dot(self.L) # aply the delta to the interpolation
+ x = np.hstack((x, np.ones((x.shape[0], 1))))
+ return x.dot(self.L) # aply the delta to the interpolation
else:
print("Warning, model not fitted yet, returning None")
return None
-class OTDA_mapping_kernel(OTDA_mapping_linear):
- """Class for optimal transport with joint nonlinear mapping estimation as in [8]"""
+class OTDA_mapping_kernel(OTDA_mapping_linear):
+ """Class for optimal transport with joint nonlinear mapping estimation as in [8]"""
- def fit(self,xs,xt,mu=1,eta=1,bias=False,kerneltype='gaussian',sigma=1,**kwargs):
+ def fit(self, xs, xt, mu=1, eta=1, bias=False, kerneltype='gaussian', sigma=1, **kwargs):
""" Fit domain adaptation between samples is xs and xt """
- self.xs=xs
- self.xt=xt
- self.bias=bias
-
- self.ws=unif(xs.shape[0])
- self.wt=unif(xt.shape[0])
- self.kernel=kerneltype
- self.sigma=sigma
- self.kwargs=kwargs
+ self.xs = xs
+ self.xt = xt
+ self.bias = bias
+ self.ws = unif(xs.shape[0])
+ self.wt = unif(xt.shape[0])
+ self.kernel = kerneltype
+ self.sigma = sigma
+ self.kwargs = kwargs
- self.G,self.L=joint_OT_mapping_kernel(xs,xt,mu=mu,eta=eta,bias=bias,**kwargs)
- self.computed=True
+ self.G, self.L = joint_OT_mapping_kernel(
+ xs, xt, mu=mu, eta=eta, bias=bias, **kwargs)
+ self.computed = True
-
- def predict(self,x):
+ def predict(self, x):
""" Out of sample mapping estimated during the call to fit"""
if self.computed:
- K=kernel(x,self.xs,method=self.kernel,sigma=self.sigma,**self.kwargs)
+ K = kernel(
+ x, self.xs, method=self.kernel, sigma=self.sigma, **self.kwargs)
if self.bias:
- K=np.hstack((K,np.ones((x.shape[0],1))))
+ K = np.hstack((K, np.ones((x.shape[0], 1))))
return K.dot(self.L)
else:
print("Warning, model not fitted yet, returning None")
- return None \ No newline at end of file
+ return None
diff --git a/ot/datasets.py b/ot/datasets.py
index 7816833..4371a23 100644
--- a/ot/datasets.py
+++ b/ot/datasets.py
@@ -7,7 +7,7 @@ import numpy as np
import scipy as sp
-def get_1D_gauss(n,m,s):
+def get_1D_gauss(n, m, s):
"""return a 1D histogram for a gaussian distribution (n bins, mean m and std s)
Parameters
@@ -27,12 +27,12 @@ def get_1D_gauss(n,m,s):
1D histogram for a gaussian distribution
"""
- x=np.arange(n,dtype=np.float64)
- h=np.exp(-(x-m)**2/(2*s**2))
- return h/h.sum()
+ x = np.arange(n, dtype=np.float64)
+ h = np.exp(-(x - m)**2 / (2 * s**2))
+ return h / h.sum()
-def get_2D_samples_gauss(n,m,sigma):
+def get_2D_samples_gauss(n, m, sigma):
"""return n samples drawn from 2D gaussian N(m,sigma)
Parameters
@@ -52,17 +52,17 @@ def get_2D_samples_gauss(n,m,sigma):
n samples drawn from N(m,sigma)
"""
- if np.isscalar(sigma):
- sigma=np.array([sigma,])
- if len(sigma)>1:
- P=sp.linalg.sqrtm(sigma)
- res= np.random.randn(n,2).dot(P)+m
+ if np.isscalar(sigma):
+ sigma = np.array([sigma, ])
+ if len(sigma) > 1:
+ P = sp.linalg.sqrtm(sigma)
+ res = np.random.randn(n, 2).dot(P) + m
else:
- res= np.random.randn(n,2)*np.sqrt(sigma)+m
+ res = np.random.randn(n, 2) * np.sqrt(sigma) + m
return res
-def get_data_classif(dataset,n,nz=.5,theta=0,**kwargs):
+def get_data_classif(dataset, n, nz=.5, theta=0, **kwargs):
""" dataset generation for classification problems
Parameters
@@ -84,48 +84,53 @@ def get_data_classif(dataset,n,nz=.5,theta=0,**kwargs):
labels of the samples
"""
- if dataset.lower()=='3gauss':
- y=np.floor((np.arange(n)*1.0/n*3))+1
- x=np.zeros((n,2))
+ if dataset.lower() == '3gauss':
+ y = np.floor((np.arange(n) * 1.0 / n * 3)) + 1
+ x = np.zeros((n, 2))
# class 1
- x[y==1,0]=-1.; x[y==1,1]=-1.
- x[y==2,0]=-1.; x[y==2,1]=1.
- x[y==3,0]=1. ; x[y==3,1]=0
-
- x[y!=3,:]+=1.5*nz*np.random.randn(sum(y!=3),2)
- x[y==3,:]+=2*nz*np.random.randn(sum(y==3),2)
-
- elif dataset.lower()=='3gauss2':
- y=np.floor((np.arange(n)*1.0/n*3))+1
- x=np.zeros((n,2))
- y[y==4]=3
+ x[y == 1, 0] = -1.
+ x[y == 1, 1] = -1.
+ x[y == 2, 0] = -1.
+ x[y == 2, 1] = 1.
+ x[y == 3, 0] = 1.
+ x[y == 3, 1] = 0
+
+ x[y != 3, :] += 1.5 * nz * np.random.randn(sum(y != 3), 2)
+ x[y == 3, :] += 2 * nz * np.random.randn(sum(y == 3), 2)
+
+ elif dataset.lower() == '3gauss2':
+ y = np.floor((np.arange(n) * 1.0 / n * 3)) + 1
+ x = np.zeros((n, 2))
+ y[y == 4] = 3
# class 1
- x[y==1,0]=-2.; x[y==1,1]=-2.
- x[y==2,0]=-2.; x[y==2,1]=2.
- x[y==3,0]=2. ; x[y==3,1]=0
-
- x[y!=3,:]+=nz*np.random.randn(sum(y!=3),2)
- x[y==3,:]+=2*nz*np.random.randn(sum(y==3),2)
-
- elif dataset.lower()=='gaussrot' :
- rot=np.array([[np.cos(theta),np.sin(theta)],[-np.sin(theta),np.cos(theta)]])
- m1=np.array([-1,1])
- m2=np.array([1,-1])
- y=np.floor((np.arange(n)*1.0/n*2))+1
- n1=np.sum(y==1)
- n2=np.sum(y==2)
- x=np.zeros((n,2))
-
- x[y==1,:]=get_2D_samples_gauss(n1,m1,nz)
- x[y==2,:]=get_2D_samples_gauss(n2,m2,nz)
-
- x=x.dot(rot)
-
-
+ x[y == 1, 0] = -2.
+ x[y == 1, 1] = -2.
+ x[y == 2, 0] = -2.
+ x[y == 2, 1] = 2.
+ x[y == 3, 0] = 2.
+ x[y == 3, 1] = 0
+
+ x[y != 3, :] += nz * np.random.randn(sum(y != 3), 2)
+ x[y == 3, :] += 2 * nz * np.random.randn(sum(y == 3), 2)
+
+ elif dataset.lower() == 'gaussrot':
+ rot = np.array(
+ [[np.cos(theta), np.sin(theta)], [-np.sin(theta), np.cos(theta)]])
+ m1 = np.array([-1, 1])
+ m2 = np.array([1, -1])
+ y = np.floor((np.arange(n) * 1.0 / n * 2)) + 1
+ n1 = np.sum(y == 1)
+ n2 = np.sum(y == 2)
+ x = np.zeros((n, 2))
+
+ x[y == 1, :] = get_2D_samples_gauss(n1, m1, nz)
+ x[y == 2, :] = get_2D_samples_gauss(n2, m2, nz)
+
+ x = x.dot(rot)
else:
- x=np.array(0)
- y=np.array(0)
+ x = np.array(0)
+ y = np.array(0)
print("unknown dataset")
- return x,y.astype(int) \ No newline at end of file
+ return x, y.astype(int)
diff --git a/ot/dr.py b/ot/dr.py
index 763ce35..77cbae2 100644
--- a/ot/dr.py
+++ b/ot/dr.py
@@ -3,43 +3,46 @@
Dimension reduction with optimal transport
"""
+from scipy import linalg
import autograd.numpy as np
from pymanopt.manifolds import Stiefel
from pymanopt import Problem
from pymanopt.solvers import SteepestDescent, TrustRegions
-import scipy.linalg as la
-def dist(x1,x2):
+
+def dist(x1, x2):
""" Compute squared euclidean distance between samples (autograd)
"""
- x1p2=np.sum(np.square(x1),1)
- x2p2=np.sum(np.square(x2),1)
- return x1p2.reshape((-1,1))+x2p2.reshape((1,-1))-2*np.dot(x1,x2.T)
+ x1p2 = np.sum(np.square(x1), 1)
+ x2p2 = np.sum(np.square(x2), 1)
+ return x1p2.reshape((-1, 1)) + x2p2.reshape((1, -1)) - 2 * np.dot(x1, x2.T)
+
-def sinkhorn(w1,w2,M,reg,k):
+def sinkhorn(w1, w2, M, reg, k):
"""Sinkhorn algorithm with fixed number of iteration (autograd)
"""
- K=np.exp(-M/reg)
- ui=np.ones((M.shape[0],))
- vi=np.ones((M.shape[1],))
+ K = np.exp(-M / reg)
+ ui = np.ones((M.shape[0],))
+ vi = np.ones((M.shape[1],))
for i in range(k):
- vi=w2/(np.dot(K.T,ui))
- ui=w1/(np.dot(K,vi))
- G=ui.reshape((M.shape[0],1))*K*vi.reshape((1,M.shape[1]))
+ vi = w2 / (np.dot(K.T, ui))
+ ui = w1 / (np.dot(K, vi))
+ G = ui.reshape((M.shape[0], 1)) * K * vi.reshape((1, M.shape[1]))
return G
-def split_classes(X,y):
+
+def split_classes(X, y):
"""split samples in X by classes in y
"""
- lstsclass=np.unique(y)
- return [X[y==i,:].astype(np.float32) for i in lstsclass]
+ lstsclass = np.unique(y)
+ return [X[y == i, :].astype(np.float32) for i in lstsclass]
+
+
+def fda(X, y, p=2, reg=1e-16):
+ """
+ Fisher Discriminant Analysis
-def fda(X,y,p=2,reg=1e-16):
- """
- Fisher Discriminant Analysis
-
-
Parameters
----------
X : numpy.ndarray (n,d)
@@ -59,62 +62,62 @@ def fda(X,y,p=2,reg=1e-16):
proj : fun
projection function including mean centering
-
- """
-
- mx=np.mean(X)
- X-=mx.reshape((1,-1))
-
+
+ """
+
+ mx = np.mean(X)
+ X -= mx.reshape((1, -1))
+
# data split between classes
- d=X.shape[1]
- xc=split_classes(X,y)
- nc=len(xc)
-
- p=min(nc-1,p)
-
- Cw=0
+ d = X.shape[1]
+ xc = split_classes(X, y)
+ nc = len(xc)
+
+ p = min(nc - 1, p)
+
+ Cw = 0
for x in xc:
- Cw+=np.cov(x,rowvar=False)
- Cw/=nc
-
- mxc=np.zeros((d,nc))
-
+ Cw += np.cov(x, rowvar=False)
+ Cw /= nc
+
+ mxc = np.zeros((d, nc))
+
for i in range(nc):
- mxc[:,i]=np.mean(xc[i])
-
- mx0=np.mean(mxc,1)
- Cb=0
+ mxc[:, i] = np.mean(xc[i])
+
+ mx0 = np.mean(mxc, 1)
+ Cb = 0
for i in range(nc):
- Cb+=(mxc[:,i]-mx0).reshape((-1,1))*(mxc[:,i]-mx0).reshape((1,-1))
-
- w,V=la.eig(Cb,Cw+reg*np.eye(d))
-
- idx=np.argsort(w.real)
-
- Popt=V[:,idx[-p:]]
-
-
-
+ Cb += (mxc[:, i] - mx0).reshape((-1, 1)) * \
+ (mxc[:, i] - mx0).reshape((1, -1))
+
+ w, V = linalg.eig(Cb, Cw + reg * np.eye(d))
+
+ idx = np.argsort(w.real)
+
+ Popt = V[:, idx[-p:]]
+
def proj(X):
- return (X-mx.reshape((1,-1))).dot(Popt)
-
+ return (X - mx.reshape((1, -1))).dot(Popt)
+
return Popt, proj
-def wda(X,y,p=2,reg=1,k=10,solver = None,maxiter=100,verbose=0,P0=None):
- """
+
+def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None):
+ """
Wasserstein Discriminant Analysis [11]_
-
+
The function solves the following optimization problem:
.. math::
P = \\text{arg}\min_P \\frac{\\sum_i W(PX^i,PX^i)}{\\sum_{i,j\\neq i} W(PX^i,PX^j)}
where :
-
+
- :math:`P` is a linear projection operator in the Stiefel(p,d) manifold
- :math:`W` is entropic regularized Wasserstein distances
- - :math:`X^i` are samples in the dataset corresponding to class i
-
+ - :math:`X^i` are samples in the dataset corresponding to class i
+
Parameters
----------
X : numpy.ndarray (n,d)
@@ -147,54 +150,50 @@ def wda(X,y,p=2,reg=1,k=10,solver = None,maxiter=100,verbose=0,P0=None):
----------
.. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016). Wasserstein Discriminant Analysis. arXiv preprint arXiv:1608.08063.
-
- """
-
- mx=np.mean(X)
- X-=mx.reshape((1,-1))
-
+
+ """ # noqa
+
+ mx = np.mean(X)
+ X -= mx.reshape((1, -1))
+
# data split between classes
- d=X.shape[1]
- xc=split_classes(X,y)
+ d = X.shape[1]
+ xc = split_classes(X, y)
# compute uniform weighs
- wc=[np.ones((x.shape[0]),dtype=np.float32)/x.shape[0] for x in xc]
-
+ wc = [np.ones((x.shape[0]), dtype=np.float32) / x.shape[0] for x in xc]
+
def cost(P):
# wda loss
- loss_b=0
- loss_w=0
-
- for i,xi in enumerate(xc):
- xi=np.dot(xi,P)
- for j,xj in enumerate(xc[i:]):
- xj=np.dot(xj,P)
- M=dist(xi,xj)
- G=sinkhorn(wc[i],wc[j+i],M,reg,k)
- if j==0:
- loss_w+=np.sum(G*M)
+ loss_b = 0
+ loss_w = 0
+
+ for i, xi in enumerate(xc):
+ xi = np.dot(xi, P)
+ for j, xj in enumerate(xc[i:]):
+ xj = np.dot(xj, P)
+ M = dist(xi, xj)
+ G = sinkhorn(wc[i], wc[j + i], M, reg, k)
+ if j == 0:
+ loss_w += np.sum(G * M)
else:
- loss_b+=np.sum(G*M)
-
- # loss inversed because minimization
- return loss_w/loss_b
-
-
+ loss_b += np.sum(G * M)
+
+ # loss inversed because minimization
+ return loss_w / loss_b
+
# declare manifold and problem
- manifold = Stiefel(d, p)
+ manifold = Stiefel(d, p)
problem = Problem(manifold=manifold, cost=cost)
-
+
# declare solver and solve
if solver is None:
- solver= SteepestDescent(maxiter=maxiter,logverbosity=verbose)
- elif solver in ['tr','TrustRegions']:
- solver= TrustRegions(maxiter=maxiter,logverbosity=verbose)
-
- Popt = solver.solve(problem,x=P0)
-
- def proj(X):
- return (X-mx.reshape((1,-1))).dot(Popt)
-
- return Popt, proj
+ solver = SteepestDescent(maxiter=maxiter, logverbosity=verbose)
+ elif solver in ['tr', 'TrustRegions']:
+ solver = TrustRegions(maxiter=maxiter, logverbosity=verbose)
+ Popt = solver.solve(problem, x=P0)
+ def proj(X):
+ return (X - mx.reshape((1, -1))).dot(Popt)
+ return Popt, proj
diff --git a/ot/optim.py b/ot/optim.py
index 79f4f66..adad95e 100644
--- a/ot/optim.py
+++ b/ot/optim.py
@@ -9,7 +9,9 @@ from .lp import emd
from .bregman import sinkhorn
# The corresponding scipy function does not work for matrices
-def line_search_armijo(f,xk,pk,gfk,old_fval,args=(),c1=1e-4,alpha0=0.99):
+
+
+def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=0.99):
"""
Armijo linesearch function that works with matrices
@@ -51,20 +53,21 @@ def line_search_armijo(f,xk,pk,gfk,old_fval,args=(),c1=1e-4,alpha0=0.99):
def phi(alpha1):
fc[0] += 1
- return f(xk + alpha1*pk, *args)
+ return f(xk + alpha1 * pk, *args)
if old_fval is None:
phi0 = phi(0.)
else:
phi0 = old_fval
- derphi0 = np.sum(pk*gfk) # Quickfix for matrices
- alpha,phi1 = scalar_search_armijo(phi,phi0,derphi0,c1=c1,alpha0=alpha0)
+ derphi0 = np.sum(pk * gfk) # Quickfix for matrices
+ alpha, phi1 = scalar_search_armijo(
+ phi, phi0, derphi0, c1=c1, alpha0=alpha0)
- return alpha,fc[0],phi1
+ return alpha, fc[0], phi1
-def cg(a,b,M,reg,f,df,G0=None,numItermax = 200,stopThr=1e-9,verbose=False,log=False):
+def cg(a, b, M, reg, f, df, G0=None, numItermax=200, stopThr=1e-9, verbose=False, log=False):
"""
Solve the general regularized OT problem with conditional gradient
@@ -128,74 +131,74 @@ def cg(a,b,M,reg,f,df,G0=None,numItermax = 200,stopThr=1e-9,verbose=False,log=Fa
"""
- loop=1
+ loop = 1
if log:
- log={'loss':[]}
+ log = {'loss': []}
if G0 is None:
- G=np.outer(a,b)
+ G = np.outer(a, b)
else:
- G=G0
+ G = G0
def cost(G):
- return np.sum(M*G)+reg*f(G)
+ return np.sum(M * G) + reg * f(G)
- f_val=cost(G)
+ f_val = cost(G)
if log:
log['loss'].append(f_val)
- it=0
+ it = 0
if verbose:
- print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32)
- print('{:5d}|{:8e}|{:8e}'.format(it,f_val,0))
+ print('{:5s}|{:12s}|{:8s}'.format(
+ 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
+ print('{:5d}|{:8e}|{:8e}'.format(it, f_val, 0))
while loop:
- it+=1
- old_fval=f_val
-
+ it += 1
+ old_fval = f_val
# problem linearization
- Mi=M+reg*df(G)
+ Mi = M + reg * df(G)
# set M positive
- Mi+=Mi.min()
+ Mi += Mi.min()
# solve linear program
- Gc=emd(a,b,Mi)
+ Gc = emd(a, b, Mi)
- deltaG=Gc-G
+ deltaG = Gc - G
# line search
- alpha,fc,f_val = line_search_armijo(cost,G,deltaG,Mi,f_val)
+ alpha, fc, f_val = line_search_armijo(cost, G, deltaG, Mi, f_val)
- G=G+alpha*deltaG
+ G = G + alpha * deltaG
# test convergence
- if it>=numItermax:
- loop=0
-
- delta_fval=(f_val-old_fval)/abs(f_val)
- if abs(delta_fval)<stopThr:
- loop=0
+ if it >= numItermax:
+ loop = 0
+ delta_fval = (f_val - old_fval) / abs(f_val)
+ if abs(delta_fval) < stopThr:
+ loop = 0
if log:
log['loss'].append(f_val)
if verbose:
- if it%20 ==0:
- print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32)
- print('{:5d}|{:8e}|{:8e}'.format(it,f_val,delta_fval))
-
+ if it % 20 == 0:
+ print('{:5s}|{:12s}|{:8s}'.format(
+ 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
+ print('{:5d}|{:8e}|{:8e}'.format(it, f_val, delta_fval))
if log:
- return G,log
+ return G, log
else:
return G
-def gcg(a,b,M,reg1,reg2,f,df,G0=None,numItermax = 10,numInnerItermax = 200,stopThr=1e-9,verbose=False,log=False):
+
+def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10, numInnerItermax=200, stopThr=1e-9, verbose=False, log=False):
"""
Solve the general regularized OT problem with the generalized conditional gradient
@@ -264,70 +267,68 @@ def gcg(a,b,M,reg1,reg2,f,df,G0=None,numItermax = 10,numInnerItermax = 200,stopT
"""
- loop=1
+ loop = 1
if log:
- log={'loss':[]}
+ log = {'loss': []}
if G0 is None:
- G=np.outer(a,b)
+ G = np.outer(a, b)
else:
- G=G0
+ G = G0
def cost(G):
- return np.sum(M*G)+ reg1*np.sum(G*np.log(G)) + reg2*f(G)
+ return np.sum(M * G) + reg1 * np.sum(G * np.log(G)) + reg2 * f(G)
- f_val=cost(G)
+ f_val = cost(G)
if log:
log['loss'].append(f_val)
- it=0
+ it = 0
if verbose:
- print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32)
- print('{:5d}|{:8e}|{:8e}'.format(it,f_val,0))
+ print('{:5s}|{:12s}|{:8s}'.format(
+ 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
+ print('{:5d}|{:8e}|{:8e}'.format(it, f_val, 0))
while loop:
- it+=1
- old_fval=f_val
-
+ it += 1
+ old_fval = f_val
# problem linearization
- Mi=M+reg2*df(G)
+ Mi = M + reg2 * df(G)
# solve linear program with Sinkhorn
#Gc = sinkhorn_stabilized(a,b, Mi, reg1, numItermax = numInnerItermax)
- Gc = sinkhorn(a,b, Mi, reg1, numItermax = numInnerItermax)
+ Gc = sinkhorn(a, b, Mi, reg1, numItermax=numInnerItermax)
- deltaG=Gc-G
+ deltaG = Gc - G
# line search
- dcost=Mi+reg1*(1+np.log(G)) #??
- alpha,fc,f_val = line_search_armijo(cost,G,deltaG,dcost,f_val)
+ dcost = Mi + reg1 * (1 + np.log(G)) # ??
+ alpha, fc, f_val = line_search_armijo(cost, G, deltaG, dcost, f_val)
- G=G+alpha*deltaG
+ G = G + alpha * deltaG
# test convergence
- if it>=numItermax:
- loop=0
-
- delta_fval=(f_val-old_fval)/abs(f_val)
- if abs(delta_fval)<stopThr:
- loop=0
+ if it >= numItermax:
+ loop = 0
+ delta_fval = (f_val - old_fval) / abs(f_val)
+ if abs(delta_fval) < stopThr:
+ loop = 0
if log:
log['loss'].append(f_val)
if verbose:
- if it%20 ==0:
- print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32)
- print('{:5d}|{:8e}|{:8e}'.format(it,f_val,delta_fval))
-
+ if it % 20 == 0:
+ print('{:5s}|{:12s}|{:8s}'.format(
+ 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
+ print('{:5d}|{:8e}|{:8e}'.format(it, f_val, delta_fval))
if log:
- return G,log
+ return G, log
else:
return G
-