summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md2
-rw-r--r--docs/source/readme.rst3
-rw-r--r--ot/bregman.py232
-rw-r--r--ot/da.py123
-rw-r--r--ot/optim.py6
5 files changed, 226 insertions, 140 deletions
diff --git a/README.md b/README.md
index cd9e281..f73cbe5 100644
--- a/README.md
+++ b/README.md
@@ -62,6 +62,8 @@ The examples folder contain several examples and use case for the library. The f
* [Color transfer in images](https://github.com/rflamary/POT/blob/master/examples/Demo_Image_ColorAdaptation.ipynb)
* [OT mapping estimation for domain adaptation](https://github.com/rflamary/POT/blob/master/examples/Demo_2D_OTmapping_DomainAdaptation.ipynb)
+You can also see the notebooks with [Jupyter nbviewer](https://nbviewer.jupyter.org/github/rflamary/POT/tree/master/examples/).
+
## Acknowledgements
The contributors to this library are:
diff --git a/docs/source/readme.rst b/docs/source/readme.rst
index 7b88cad..653adaf 100644
--- a/docs/source/readme.rst
+++ b/docs/source/readme.rst
@@ -86,6 +86,9 @@ Here is a list of the Python notebooks if you want a quick look:
- `OT mapping estimation for domain
adaptation <https://github.com/rflamary/POT/blob/master/examples/Demo_2D_OTmapping_DomainAdaptation.ipynb>`__
+You can also see the notebooks with `Jupyter
+nbviewer <https://nbviewer.jupyter.org/github/rflamary/POT/tree/master/examples/>`__.
+
Acknowledgements
----------------
diff --git a/ot/bregman.py b/ot/bregman.py
index 2d82ae4..a770c5a 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -14,21 +14,21 @@ def sinkhorn(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False, log=Fa
.. math::
\gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
-
+
s.t. \gamma 1 = a
-
- \gamma^T 1= b
-
+
+ \gamma^T 1= b
+
\gamma\geq 0
where :
-
+
- M is the (ns,nt) metric cost matrix
- :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- a and b are source and target weights (sum to 1)
-
+
The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [2]_
-
-
+
+
Parameters
----------
a : np.ndarray (ns,)
@@ -36,79 +36,79 @@ def sinkhorn(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False, log=Fa
b : np.ndarray (nt,)
samples in the target domain
M : np.ndarray (ns,nt)
- loss matrix
- reg: float
+ loss matrix
+ reg : float
Regularization term >0
- numItermax: int, optional
+ numItermax : int, optional
Max number of iterations
- stopThr: float, optional
+ stopThr : float, optional
Stop threshol on error (>0)
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
-
-
+ record log if True
+
+
Returns
-------
- gamma: (ns x nt) ndarray
+ gamma : (ns x nt) ndarray
Optimal transportation matrix for the given parameters
- log: dict
- log dictionary return only if log==True in parameters
+ log : dict
+ log dictionary return only if log==True in parameters
Examples
--------
-
+
>>> a=[.5,.5]
>>> b=[.5,.5]
>>> M=[[0.,1.],[1.,0.]]
>>> ot.sinkhorn(a,b,M,1)
array([[ 0.36552929, 0.13447071],
[ 0.13447071, 0.36552929]])
-
-
+
+
References
----------
-
+
.. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
-
-
+
+
See Also
--------
ot.lp.emd : Unregularized OT
ot.optim.cg : General regularized OT
-
- """
-
+
+ """
+
a=np.asarray(a,dtype=np.float64)
b=np.asarray(b,dtype=np.float64)
M=np.asarray(M,dtype=np.float64)
-
+
if len(a)==0:
a=np.ones((M.shape[0],),dtype=np.float64)/M.shape[0]
if len(b)==0:
- b=np.ones((M.shape[1],),dtype=np.float64)/M.shape[1]
-
+ b=np.ones((M.shape[1],),dtype=np.float64)/M.shape[1]
+
# init data
Nini = len(a)
Nfin = len(b)
-
-
+
+
cpt = 0
if log:
log={'err':[]}
-
+
# we assume that no distances are null except those of the diagonal of distances
u = np.ones(Nini)/Nini
- v = np.ones(Nfin)/Nfin
+ v = np.ones(Nfin)/Nfin
uprev=np.zeros(Nini)
vprev=np.zeros(Nini)
#print reg
-
+
K = np.exp(-M/reg)
#print np.min(K)
-
+
Kp = np.dot(np.diag(1/a),K)
transp = K
cpt = 0
@@ -120,10 +120,10 @@ def sinkhorn(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False, log=Fa
print('Warning: numerical errrors')
if cpt!=0:
u = uprev
- v = vprev
+ v = vprev
break
uprev = u
- vprev = v
+ vprev = v
v = np.divide(b,np.dot(K.T,u))
u = 1./np.dot(Kp,v)
if cpt%10==0:
@@ -131,14 +131,14 @@ def sinkhorn(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False, log=Fa
transp = np.dot(np.diag(u),np.dot(K,np.diag(v)))
err = np.linalg.norm((np.sum(transp,axis=0)-b))**2
if log:
- log['err'].append(err)
-
+ log['err'].append(err)
+
if verbose:
if cpt%200 ==0:
print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19)
print('{:5d}|{:8e}|'.format(cpt,err))
cpt = cpt +1
- #print 'err=',err,' cpt=',cpt
+ #print 'err=',err,' cpt=',cpt
if log:
return np.dot(np.diag(u),np.dot(K,np.diag(v))),log
else:
@@ -147,12 +147,12 @@ def sinkhorn(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False, log=Fa
def geometricBar(weights,alldistribT):
"""return the weighted geometric mean of distributions"""
- assert(len(weights)==alldistribT.shape[1])
- return np.exp(np.dot(np.log(alldistribT),weights.T))
+ assert(len(weights)==alldistribT.shape[1])
+ return np.exp(np.dot(np.log(alldistribT),weights.T))
-def geometricMean(alldistribT):
+def geometricMean(alldistribT):
"""return the geometric mean of distributions"""
- return np.exp(np.mean(np.log(alldistribT),axis=1))
+ return np.exp(np.mean(np.log(alldistribT),axis=1))
def projR(gamma,p):
#return np.dot(np.diag(p/np.maximum(np.sum(gamma,axis=1),1e-10)),gamma)
@@ -161,65 +161,65 @@ def projR(gamma,p):
def projC(gamma,q):
#return (np.dot(np.diag(q/np.maximum(np.sum(gamma,axis=0),1e-10)),gamma.T)).T
return np.multiply(gamma,q/np.maximum(np.sum(gamma,axis=0),1e-10))
-
+
def barycenter(A,M,reg, weights=None, numItermax = 1000, stopThr=1e-4,verbose=False,log=False):
"""Compute the entropic regularized wasserstein barycenter of distributions A
The function solves the following optimization problem:
-
+
.. math::
\mathbf{a} = arg\min_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
-
+
where :
-
+
- :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance (see ot.bregman.sinkhorn)
- :math:`\mathbf{a}_i` are training distributions in the columns of matrix :math:`\mathbf{A}`
- reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT
-
+
The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [3]_
-
+
Parameters
----------
A : np.ndarray (d,n)
- n training distributions of size d
+ n training distributions of size d
M : np.ndarray (d,d)
- loss matrix for OT
- reg: float
+ loss matrix for OT
+ reg : float
Regularization term >0
- numItermax: int, optional
+ numItermax : int, optional
Max number of iterations
- stopThr: float, optional
+ stopThr : float, optional
Stop threshol on error (>0)
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
-
-
+ record log if True
+
+
Returns
-------
- a: (d,) ndarray
+ a : (d,) ndarray
Wasserstein barycenter
- log: dict
- log dictionary return only if log==True in parameters
+ log : dict
+ log dictionary return only if log==True in parameters
+
-
References
----------
-
+
.. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G. (2015). Iterative Bregman projections for regularized transportation problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
-
-
-
+
+
+
"""
-
-
+
+
if weights is None:
weights=np.ones(A.shape[1])/A.shape[1]
else:
assert(len(weights)==A.shape[1])
-
+
if log:
log={'err':[]}
@@ -231,130 +231,130 @@ def barycenter(A,M,reg, weights=None, numItermax = 1000, stopThr=1e-4,verbose=Fa
UKv=np.dot(K,np.divide(A.T,np.sum(K,axis=0)).T)
u = (geometricMean(UKv)/UKv.T).T
-
+
while (err>stopThr and cpt<numItermax):
cpt = cpt +1
UKv=u*np.dot(K,np.divide(A,np.dot(K,u)))
u = (u.T*geometricBar(weights,UKv)).T/UKv
-
+
if cpt%10==1:
err=np.sum(np.std(UKv,axis=1))
-
+
# log and verbose print
if log:
log['err'].append(err)
-
+
if verbose:
if cpt%200 ==0:
print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19)
- print('{:5d}|{:8e}|'.format(cpt,err))
+ print('{:5d}|{:8e}|'.format(cpt,err))
if log:
log['niter']=cpt
return geometricBar(weights,UKv),log
else:
return geometricBar(weights,UKv)
-
+
def unmix(a,D,M,M0,h0,reg,reg0,alpha,numItermax = 1000, stopThr=1e-3,verbose=False,log=False):
"""
Compute the unmixing of an observation with a given dictionary using Wasserstein distance
-
+
The function solve the following optimization problem:
-
+
.. math::
\mathbf{h} = arg\min_\mathbf{h} (1- \\alpha) W_{M,reg}(\mathbf{a},\mathbf{Dh})+\\alpha W_{M0,reg0}(\mathbf{h}_0,\mathbf{h})
where :
-
+
- :math:`W_{M,reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance with M loss matrix (see ot.bregman.sinkhorn)
- - :math:`\mathbf{a}` is an observed distribution, :math:`\mathbf{h}_0` is aprior on unmixing
+ - :math:`\mathbf{a}` is an observed distribution, :math:`\mathbf{h}_0` is aprior on unmixing
- reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT data fitting
- reg0 and :math:`\mathbf{M0}` are respectively the regularization term and the cost matrix for regularization
- - :math:`\\alpha`weight data fitting and regularization
-
+ - :math:`\\alpha`weight data fitting and regularization
+
The optimization problem is solved suing the algorithm described in [4]
-
-
+
+
Parameters
----------
a : np.ndarray (d)
observed distribution
D : np.ndarray (d,n)
- dictionary matrix
+ dictionary matrix
M : np.ndarray (d,d)
- loss matrix
+ loss matrix
M0 : np.ndarray (n,n)
- loss matrix
+ loss matrix
h0 : np.ndarray (n,)
- prior on h
- reg: float
+ prior on h
+ reg : float
Regularization term >0 (Wasserstein data fitting)
- reg0: float
- Regularization term >0 (Wasserstein reg with h0)
- alpha: float
+ reg0 : float
+ Regularization term >0 (Wasserstein reg with h0)
+ alpha : float
How much should we trust the prior ([0,1])
- numItermax: int, optional
+ numItermax : int, optional
Max number of iterations
- stopThr: float, optional
+ stopThr : float, optional
Stop threshol on error (>0)
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
-
-
+ record log if True
+
+
Returns
-------
- a: (d,) ndarray
+ a : (d,) ndarray
Wasserstein barycenter
- log: dict
- log dictionary return only if log==True in parameters
-
+ log : dict
+ log dictionary return only if log==True in parameters
+
References
----------
-
+
.. [4] S. Nakhostin, N. Courty, R. Flamary, D. Tuia, T. Corpetti, Supervised planetary unmixing with optimal transport, Whorkshop on Hyperspectral Image and Signal Processing : Evolution in Remote Sensing (WHISPERS), 2016.
"""
-
- #M = M/np.median(M)
+
+ #M = M/np.median(M)
K = np.exp(-M/reg)
-
- #M0 = M0/np.median(M0)
+
+ #M0 = M0/np.median(M0)
K0 = np.exp(-M0/reg0)
old = h0
err=1
- cpt=0
+ cpt=0
#log = {'niter':0, 'all_err':[]}
if log:
log={'err':[]}
-
-
+
+
while (err>stopThr and cpt<numItermax):
- K = projC(K,a)
- K0 = projC(K0,h0)
+ K = projC(K,a)
+ K0 = projC(K0,h0)
new = np.sum(K0,axis=1)
inv_new = np.dot(D,new) # we recombine the current selection from dictionnary
other = np.sum(K,axis=1)
delta = np.exp(alpha*np.log(other)+(1-alpha)*np.log(inv_new)) # geometric interpolation
K = projR(K,delta)
K0 = np.dot(np.diag(np.dot(D.T,delta/inv_new)),K0)
-
+
err=np.linalg.norm(np.sum(K0,axis=1)-old)
old = new
if log:
log['err'].append(err)
-
+
if verbose:
if cpt%200 ==0:
print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19)
- print('{:5d}|{:8e}|'.format(cpt,err))
-
+ print('{:5d}|{:8e}|'.format(cpt,err))
+
cpt = cpt+1
-
+
if log:
log['niter']=cpt
return np.sum(K0,axis=1),log
diff --git a/ot/da.py b/ot/da.py
index 76bc6a3..72ca3ac 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -14,7 +14,6 @@ def indices(a, func):
return [i for (i, val) in enumerate(a) if func(val)]
-
def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerItermax = 200,stopInnerThr=1e-9,verbose=False,log=False):
"""
Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization
@@ -49,15 +48,15 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
samples in the target domain
M : np.ndarray (ns,nt)
loss matrix
- reg: float
+ reg : float
Regularization term for entropic regularization >0
- eta: float, optional
+ eta : float, optional
Regularization term for group lasso regularization >0
- numItermax: int, optional
+ numItermax : int, optional
Max number of iterations
- numInnerItermax: int, optional
+ numInnerItermax : int, optional
Max number of iterations (inner sinkhorn solver)
- stopInnerThr: float, optional
+ stopInnerThr : float, optional
Stop threshold on error (inner sinkhorn solver) (>0)
verbose : bool, optional
Print information along iterations
@@ -67,9 +66,9 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
Returns
-------
- gamma: (ns x nt) ndarray
+ gamma : (ns x nt) ndarray
Optimal transportation matrix for the given parameters
- log: dict
+ log : dict
log dictionary return only if log==True in parameters
@@ -145,7 +144,10 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos
The problem consist in solving jointly an optimal transport matrix
:math:`\gamma` and a linear mapping that fits the barycentric mapping
- :math:`n_s\gamma X_t`
+ :math:`n_s\gamma X_t`.
+
+ One can also estimate a mapping with constant bias (see supplementary
+ material of [8]) using the bias optional argument.
The algorithm used for solving the problem is the block coordinate
descent that alternates between updates of G (using conditionnal gradient)
@@ -158,19 +160,19 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos
samples in the source domain
xt : np.ndarray (nt,d)
samples in the target domain
- mu: float,optional
+ mu : float,optional
Weight for the linear OT loss (>0)
- eta: float, optional
+ eta : float, optional
Regularization term for the linear mapping L (>0)
- bias: bool,optional
+ bias : bool,optional
Estimate linear mapping with constant bias
- numItermax: int, optional
+ numItermax : int, optional
Max number of BCD iterations
- stopThr: float, optional
+ stopThr : float, optional
Stop threshold on relative loss decrease (>0)
- numInnerItermax: int, optional
+ numInnerItermax : int, optional
Max number of iterations (inner CG solver)
- stopInnerThr: float, optional
+ stopInnerThr : float, optional
Stop threshold on error (inner CG solver) (>0)
verbose : bool, optional
Print information along iterations
@@ -180,11 +182,11 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos
Returns
-------
- gamma: (ns x nt) ndarray
+ gamma : (ns x nt) ndarray
Optimal transportation matrix for the given parameters
- L: (d x d) ndarray
+ L : (d x d) ndarray
Linear mapping matrix (d+1 x d if bias)
- log: dict
+ log : dict
log dictionary return only if log==True in parameters
@@ -291,10 +293,89 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos
def joint_OT_mapping_kernel(xs,xt,mu=1,eta=0.001,kerneltype='gaussian',sigma=1,bias=False,verbose=False,verbose2=False,numItermax = 100,numInnerItermax = 10,stopInnerThr=1e-6,stopThr=1e-5,log=False,**kwargs):
- """Joint Ot and mapping estimation (uniform weights and )
+ """Joint OT and nonlinear mapping estimation with kernels as proposed in [8]
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \min_{\gamma,L\in\mathcal{H}}\quad \|L(X_s) -n_s\gamma X_t\|^2_F + \mu<\gamma,M>_F + \eta \|L\|^2_\mathcal{H}
+
+ s.t. \gamma 1 = a
+
+ \gamma^T 1= b
+
+ \gamma\geq 0
+ where :
+
+ - M is the (ns,nt) squared euclidean cost matrix between samples in Xs and Xt (scaled by ns)
+ - :math:`L` is a ns x d linear operator on a kernel matrix that approximates the barycentric mapping
+ - a and b are uniform source and target weights
+
+ The problem consist in solving jointly an optimal transport matrix
+ :math:`\gamma` and the nonlinear mapping that fits the barycentric mapping
+ :math:`n_s\gamma X_t`.
+
+ One can also estimate a mapping with constant bias (see supplementary
+ material of [8]) using the bias optional argument.
+
+ The algorithm used for solving the problem is the block coordinate
+ descent that alternates between updates of G (using conditionnal gradient)
+ abd the update of L using a classical kernel least square solver.
+
+
+ Parameters
+ ----------
+ xs : np.ndarray (ns,d)
+ samples in the source domain
+ xt : np.ndarray (nt,d)
+ samples in the target domain
+ mu : float,optional
+ Weight for the linear OT loss (>0)
+ eta : float, optional
+ Regularization term for the linear mapping L (>0)
+ bias : bool,optional
+ Estimate linear mapping with constant bias
+ kerneltype : str,optional
+ kernel used by calling function ot.utils.kernel (gaussian by default)
+ sigma : float, optional
+ Gaussian kernel bandwidth.
+ numItermax : int, optional
+ Max number of BCD iterations
+ stopThr : float, optional
+ Stop threshold on relative loss decrease (>0)
+ numInnerItermax : int, optional
+ Max number of iterations (inner CG solver)
+ stopInnerThr : float, optional
+ Stop threshold on error (inner CG solver) (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ gamma : (ns x nt) ndarray
+ Optimal transportation matrix for the given parameters
+ L : (ns x d) ndarray
+ Nonlinear mapping matrix (ns+1 x d if bias)
+ log : dict
+ log dictionary return only if log==True in parameters
+
+
+ References
+ ----------
+
+ .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard, "Mapping estimation for discrete optimal transport", Neural Information Processing Systems (NIPS), 2016.
+
+ See Also
+ --------
+ ot.lp.emd : Unregularized OT
+ ot.optim.cg : General regularized OT
+
"""
- ns,nt,d=xs.shape[0],xt.shape[0],xt.shape[1]
+ ns,nt=xs.shape[0],xt.shape[0]
K=kernel(xs,xs,method=kerneltype,sigma=sigma)
if bias:
diff --git a/ot/optim.py b/ot/optim.py
index 2b8f565..7ed658c 100644
--- a/ot/optim.py
+++ b/ot/optim.py
@@ -27,7 +27,7 @@ def line_search_armijo(f,xk,pk,gfk,old_fval,args=(),c1=1e-4,alpha0=0.99):
descent direction
gfk : np.ndarray
gradient of f at xk
- old_fval: float
+ old_fval : float
loss value at xk
args : tuple, optional
arguments given to f
@@ -110,9 +110,9 @@ def cg(a,b,M,reg,f,df,G0=None,numItermax = 200,stopThr=1e-9,verbose=False,log=Fa
Returns
-------
- gamma: (ns x nt) ndarray
+ gamma : (ns x nt) ndarray
Optimal transportation matrix for the given parameters
- log: dict
+ log : dict
log dictionary return only if log==True in parameters