5 files changed, 226 insertions, 140 deletions
diff --git a/README.md b/README.md
index cd9e281..f73cbe5 100644
--- a/README.md
+++ b/README.md
@@ -62,6 +62,8 @@ The examples folder contain several examples and use case for the library. The f
 * [Color transfer in images](https://github.com/rflamary/POT/blob/master/examples/Demo_Image_ColorAdaptation.ipynb)
 * [OT mapping estimation for domain adaptation](https://github.com/rflamary/POT/blob/master/examples/Demo_2D_OTmapping_DomainAdaptation.ipynb)
 
+You can also see the notebooks with [Jupyter nbviewer](https://nbviewer.jupyter.org/github/rflamary/POT/tree/master/examples/).
+
 ## Acknowledgements
 
 The contributors to this library are:
diff --git a/docs/source/readme.rst b/docs/source/readme.rst
index 7b88cad..653adaf 100644
--- a/docs/source/readme.rst
+++ b/docs/source/readme.rst
@@ -86,6 +86,9 @@ Here is a list of the Python notebooks if you want a quick look:
 -  `OT mapping estimation for domain
    adaptation <https://github.com/rflamary/POT/blob/master/examples/Demo_2D_OTmapping_DomainAdaptation.ipynb>`__
 
+You can also see the notebooks with `Jupyter
+nbviewer <https://nbviewer.jupyter.org/github/rflamary/POT/tree/master/examples/>`__.
+
 Acknowledgements
 ----------------
 
diff --git a/ot/bregman.py b/ot/bregman.py
index 2d82ae4..a770c5a 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -14,21 +14,21 @@ def sinkhorn(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False, log=Fa
 
     .. math::
         \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
-        
+
         s.t. \gamma 1 = a
-        
-             \gamma^T 1= b 
-             
+
+             \gamma^T 1= b
+
              \gamma\geq 0
     where :
-    
+
     - M is the (ns,nt) metric cost matrix
     - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
     - a and b are source and target weights (sum to 1)
-    
+
     The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [2]_
-    
-             
+
+
     Parameters
     ----------
     a : np.ndarray (ns,)
@@ -36,79 +36,79 @@ def sinkhorn(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False, log=Fa
     b : np.ndarray (nt,)
         samples in the target domain
     M : np.ndarray (ns,nt)
-        loss matrix        
-    reg: float
+        loss matrix
+    reg : float
         Regularization term >0
-    numItermax: int, optional
+    numItermax : int, optional
         Max number of iterations
-    stopThr: float, optional
+    stopThr : float, optional
         Stop threshol on error (>0)
     verbose : bool, optional
         Print information along iterations
     log : bool, optional
-        record log if True    
-  
-    
+        record log if True
+
+
     Returns
     -------
-    gamma: (ns x nt) ndarray
+    gamma : (ns x nt) ndarray
         Optimal transportation matrix for the given parameters
-    log: dict
-        log dictionary return only if log==True in parameters         
+    log : dict
+        log dictionary return only if log==True in parameters
 
     Examples
     --------
-        
+
     >>> a=[.5,.5]
     >>> b=[.5,.5]
     >>> M=[[0.,1.],[1.,0.]]
     >>> ot.sinkhorn(a,b,M,1)
     array([[ 0.36552929,  0.13447071],
            [ 0.13447071,  0.36552929]])
-      
-        
+
+
     References
     ----------
-    
+
     .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
-        
-        
+
+
     See Also
     --------
     ot.lp.emd : Unregularized OT
     ot.optim.cg : General regularized OT
-        
-    """    
-    
+
+    """
+
     a=np.asarray(a,dtype=np.float64)
     b=np.asarray(b,dtype=np.float64)
     M=np.asarray(M,dtype=np.float64)
-    
+
     if len(a)==0:
         a=np.ones((M.shape[0],),dtype=np.float64)/M.shape[0]
     if len(b)==0:
-        b=np.ones((M.shape[1],),dtype=np.float64)/M.shape[1]    
-    
+        b=np.ones((M.shape[1],),dtype=np.float64)/M.shape[1]
+
     # init data
     Nini = len(a)
     Nfin = len(b)
-    
-    
+
+
     cpt = 0
     if log:
         log={'err':[]}
-    
+
     # we assume that no distances are null except those of the diagonal of distances
     u = np.ones(Nini)/Nini
-    v = np.ones(Nfin)/Nfin 
+    v = np.ones(Nfin)/Nfin
     uprev=np.zeros(Nini)
     vprev=np.zeros(Nini)
 
     #print reg
- 
+
     K = np.exp(-M/reg)
     #print np.min(K)
-      
+
     Kp = np.dot(np.diag(1/a),K)
     transp = K
     cpt = 0
@@ -120,10 +120,10 @@ def sinkhorn(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False, log=Fa
             print('Warning: numerical errrors')
             if cpt!=0:
                 u = uprev
-                v = vprev     
+                v = vprev
             break
         uprev = u
-        vprev = v  
+        vprev = v
         v = np.divide(b,np.dot(K.T,u))
         u = 1./np.dot(Kp,v)
         if cpt%10==0:
@@ -131,14 +131,14 @@ def sinkhorn(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False, log=Fa
             transp = np.dot(np.diag(u),np.dot(K,np.diag(v)))
             err = np.linalg.norm((np.sum(transp,axis=0)-b))**2
             if log:
-                log['err'].append(err)        
-        
+                log['err'].append(err)
+
             if verbose:
                 if cpt%200 ==0:
                     print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19)
                 print('{:5d}|{:8e}|'.format(cpt,err))
         cpt = cpt +1
-    #print 'err=',err,' cpt=',cpt  
+    #print 'err=',err,' cpt=',cpt
     if log:
         return np.dot(np.diag(u),np.dot(K,np.diag(v))),log
     else:
@@ -147,12 +147,12 @@ def sinkhorn(a,b, M, reg, numItermax = 1000, stopThr=1e-9, verbose=False, log=Fa
 
 def geometricBar(weights,alldistribT):
     """return the weighted geometric mean of distributions"""
-    assert(len(weights)==alldistribT.shape[1])       
-    return np.exp(np.dot(np.log(alldistribT),weights.T))    
+    assert(len(weights)==alldistribT.shape[1])
+    return np.exp(np.dot(np.log(alldistribT),weights.T))
 
-def geometricMean(alldistribT):     
+def geometricMean(alldistribT):
     """return the  geometric mean of distributions"""
-    return np.exp(np.mean(np.log(alldistribT),axis=1))    
+    return np.exp(np.mean(np.log(alldistribT),axis=1))
 
 def projR(gamma,p):
     #return np.dot(np.diag(p/np.maximum(np.sum(gamma,axis=1),1e-10)),gamma)
@@ -161,65 +161,65 @@ def projR(gamma,p):
 def projC(gamma,q):
     #return (np.dot(np.diag(q/np.maximum(np.sum(gamma,axis=0),1e-10)),gamma.T)).T
     return np.multiply(gamma,q/np.maximum(np.sum(gamma,axis=0),1e-10))
-    
+
 
 def barycenter(A,M,reg, weights=None, numItermax = 1000, stopThr=1e-4,verbose=False,log=False):
     """Compute the entropic regularized wasserstein barycenter of distributions A
 
      The function solves the following optimization problem:
-    
+
     .. math::
        \mathbf{a} = arg\min_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
-        
+
     where :
-    
+
     - :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance (see ot.bregman.sinkhorn)
     - :math:`\mathbf{a}_i` are training distributions in the columns of matrix :math:`\mathbf{A}`
     - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT
-    
+
     The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [3]_
-        
+
     Parameters
     ----------
     A : np.ndarray (d,n)
-        n training distributions of size d 
+        n training distributions of size d
     M : np.ndarray (d,d)
-        loss matrix   for OT      
-    reg: float
+        loss matrix   for OT
+    reg : float
         Regularization term >0
-    numItermax: int, optional
+    numItermax : int, optional
         Max number of iterations
-    stopThr: float, optional
+    stopThr : float, optional
         Stop threshol on error (>0)
     verbose : bool, optional
         Print information along iterations
     log : bool, optional
-        record log if True    
-  
-    
+        record log if True
+
+
     Returns
     -------
-    a: (d,) ndarray
+    a : (d,) ndarray
         Wasserstein barycenter
-    log: dict
-        log dictionary return only if log==True in parameters     
+    log : dict
+        log dictionary return only if log==True in parameters
+
 
-        
     References
     ----------
-    
+
     .. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G. (2015). Iterative Bregman projections for regularized transportation problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
-  
-        
-          
+
+
+
     """
-    
-    
+
+
     if weights is None:
         weights=np.ones(A.shape[1])/A.shape[1]
     else:
         assert(len(weights)==A.shape[1])
-        
+
     if log:
         log={'err':[]}
 
@@ -231,130 +231,130 @@ def barycenter(A,M,reg, weights=None, numItermax = 1000, stopThr=1e-4,verbose=Fa
 
     UKv=np.dot(K,np.divide(A.T,np.sum(K,axis=0)).T)
     u = (geometricMean(UKv)/UKv.T).T
-    
+
     while (err>stopThr and cpt<numItermax):
         cpt = cpt +1
         UKv=u*np.dot(K,np.divide(A,np.dot(K,u)))
         u = (u.T*geometricBar(weights,UKv)).T/UKv
-        
+
         if cpt%10==1:
             err=np.sum(np.std(UKv,axis=1))
-            
+
             # log and verbose print
             if log:
                 log['err'].append(err)
-            
+
             if verbose:
                 if cpt%200 ==0:
                     print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19)
-                print('{:5d}|{:8e}|'.format(cpt,err))            
+                print('{:5d}|{:8e}|'.format(cpt,err))
 
     if log:
         log['niter']=cpt
         return geometricBar(weights,UKv),log
     else:
         return geometricBar(weights,UKv)
-    
+
 
 def unmix(a,D,M,M0,h0,reg,reg0,alpha,numItermax = 1000, stopThr=1e-3,verbose=False,log=False):
     """
     Compute the unmixing of an observation with a given dictionary using Wasserstein distance
-    
+
     The function solve the following optimization problem:
-    
+
     .. math::
        \mathbf{h} = arg\min_\mathbf{h}  (1- \\alpha) W_{M,reg}(\mathbf{a},\mathbf{Dh})+\\alpha W_{M0,reg0}(\mathbf{h}_0,\mathbf{h})
 
 
     where :
-    
+
     - :math:`W_{M,reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance with M loss matrix (see ot.bregman.sinkhorn)
-    - :math:`\mathbf{a}` is an observed distribution,  :math:`\mathbf{h}_0` is aprior on unmixing 
+    - :math:`\mathbf{a}` is an observed distribution,  :math:`\mathbf{h}_0` is aprior on unmixing
     - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT data fitting
     - reg0 and :math:`\mathbf{M0}` are respectively the regularization term and the cost matrix for regularization
-    - :math:`\\alpha`weight data fitting and regularization    
-    
+    - :math:`\\alpha`weight data fitting and regularization
+
     The optimization problem is solved suing the algorithm described in [4]
-  
-        
+
+
     Parameters
     ----------
     a : np.ndarray (d)
         observed distribution
     D : np.ndarray (d,n)
-        dictionary matrix    
+        dictionary matrix
     M : np.ndarray (d,d)
-        loss matrix        
+        loss matrix
     M0 : np.ndarray (n,n)
-        loss matrix        
+        loss matrix
     h0 : np.ndarray (n,)
-        prior on h            
-    reg: float
+        prior on h
+    reg : float
         Regularization term >0 (Wasserstein data fitting)
-    reg0: float
-        Regularization term >0 (Wasserstein reg with h0)   
-    alpha: float
+    reg0 : float
+        Regularization term >0 (Wasserstein reg with h0)
+    alpha : float
         How much should we trust the prior ([0,1])
-    numItermax: int, optional
+    numItermax : int, optional
         Max number of iterations
-    stopThr: float, optional
+    stopThr : float, optional
         Stop threshol on error (>0)
     verbose : bool, optional
         Print information along iterations
     log : bool, optional
-        record log if True    
-  
-    
+        record log if True
+
+
     Returns
     -------
-    a: (d,) ndarray
+    a : (d,) ndarray
         Wasserstein barycenter
-    log: dict
-        log dictionary return only if log==True in parameters     
-  
+    log : dict
+        log dictionary return only if log==True in parameters
+
     References
     ----------
-    
+
     .. [4] S. Nakhostin, N. Courty, R. Flamary, D. Tuia, T. Corpetti, Supervised planetary unmixing with optimal transport, Whorkshop on Hyperspectral Image and Signal Processing : Evolution in Remote Sensing (WHISPERS), 2016.
 
     """
-        
-    #M = M/np.median(M) 
+
+    #M = M/np.median(M)
     K = np.exp(-M/reg)
- 
-    #M0 = M0/np.median(M0)  
+
+    #M0 = M0/np.median(M0)
     K0 = np.exp(-M0/reg0)
     old = h0
 
     err=1
-    cpt=0    
+    cpt=0
     #log = {'niter':0, 'all_err':[]}
     if log:
         log={'err':[]}
-    
-    
+
+
     while (err>stopThr and cpt<numItermax):
-        K = projC(K,a)  
-        K0 = projC(K0,h0)  
+        K = projC(K,a)
+        K0 = projC(K0,h0)
         new  = np.sum(K0,axis=1)
         inv_new = np.dot(D,new) # we recombine the current selection from dictionnary
         other = np.sum(K,axis=1)
         delta = np.exp(alpha*np.log(other)+(1-alpha)*np.log(inv_new)) # geometric interpolation
         K = projR(K,delta)
         K0 =  np.dot(np.diag(np.dot(D.T,delta/inv_new)),K0)
-        
+
         err=np.linalg.norm(np.sum(K0,axis=1)-old)
         old = new
         if log:
             log['err'].append(err)
-            
+
         if verbose:
             if cpt%200 ==0:
                 print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19)
-            print('{:5d}|{:8e}|'.format(cpt,err))      
-            
+            print('{:5d}|{:8e}|'.format(cpt,err))
+
         cpt = cpt+1
-        
+
     if log:
         log['niter']=cpt
         return np.sum(K0,axis=1),log
diff --git a/ot/da.py b/ot/da.py
index 76bc6a3..72ca3ac 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -14,7 +14,6 @@ def indices(a, func):
     return [i for (i, val) in enumerate(a) if func(val)]
 
 
-
 def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerItermax = 200,stopInnerThr=1e-9,verbose=False,log=False):
     """
     Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization
@@ -49,15 +48,15 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
         samples in the target domain
     M : np.ndarray (ns,nt)
         loss matrix
-    reg: float
+    reg : float
         Regularization term for entropic regularization >0
-    eta: float, optional
+    eta : float, optional
         Regularization term  for group lasso regularization >0
-    numItermax: int, optional
+    numItermax : int, optional
         Max number of iterations
-    numInnerItermax: int, optional
+    numInnerItermax : int, optional
         Max number of iterations (inner sinkhorn solver)
-    stopInnerThr: float, optional
+    stopInnerThr : float, optional
         Stop threshold on error (inner sinkhorn solver) (>0)
     verbose : bool, optional
         Print information along iterations
@@ -67,9 +66,9 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
 
     Returns
     -------
-    gamma: (ns x nt) ndarray
+    gamma : (ns x nt) ndarray
         Optimal transportation matrix for the given parameters
-    log: dict
+    log : dict
         log dictionary return only if log==True in parameters
 
 
@@ -145,7 +144,10 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos
 
     The problem consist in solving jointly an optimal transport matrix
     :math:`\gamma` and a linear mapping that fits the barycentric mapping
-    :math:`n_s\gamma X_t`
+    :math:`n_s\gamma X_t`.
+
+    One can also estimate a mapping with constant bias (see supplementary
+    material of [8]) using the bias optional argument.
 
     The algorithm used for solving the problem is the block coordinate
     descent that alternates between updates of G (using conditionnal gradient)
@@ -158,19 +160,19 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos
         samples in the source domain
     xt : np.ndarray (nt,d)
         samples in the target domain
-    mu: float,optional
+    mu : float,optional
         Weight for the linear OT loss (>0)
-    eta: float, optional
+    eta : float, optional
         Regularization term  for the linear mapping L (>0)
-    bias: bool,optional
+    bias : bool,optional
         Estimate linear mapping with constant bias
-    numItermax: int, optional
+    numItermax : int, optional
         Max number of BCD iterations
-    stopThr: float, optional
+    stopThr : float, optional
         Stop threshold on relative loss decrease (>0)
-    numInnerItermax: int, optional
+    numInnerItermax : int, optional
         Max number of iterations (inner CG solver)
-    stopInnerThr: float, optional
+    stopInnerThr : float, optional
         Stop threshold on error (inner CG solver) (>0)
     verbose : bool, optional
         Print information along iterations
@@ -180,11 +182,11 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos
 
     Returns
     -------
-    gamma: (ns x nt) ndarray
+    gamma : (ns x nt) ndarray
         Optimal transportation matrix for the given parameters
-    L: (d x d) ndarray
+    L : (d x d) ndarray
         Linear mapping matrix (d+1 x d if bias)
-    log: dict
+    log : dict
         log dictionary return only if log==True in parameters
 
 
@@ -291,10 +293,89 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos
 
 
 def joint_OT_mapping_kernel(xs,xt,mu=1,eta=0.001,kerneltype='gaussian',sigma=1,bias=False,verbose=False,verbose2=False,numItermax = 100,numInnerItermax = 10,stopInnerThr=1e-6,stopThr=1e-5,log=False,**kwargs):
-    """Joint Ot and mapping estimation (uniform weights and )
+    """Joint OT and nonlinear mapping estimation with kernels as proposed in [8]
+
+    The function solves the following optimization problem:
+
+    .. math::
+        \min_{\gamma,L\in\mathcal{H}}\quad \|L(X_s) -n_s\gamma X_t\|^2_F + \mu<\gamma,M>_F + \eta  \|L\|^2_\mathcal{H}
+
+        s.t. \gamma 1 = a
+
+             \gamma^T 1= b
+
+             \gamma\geq 0
+    where :
+
+    - M is the (ns,nt) squared euclidean cost matrix between samples in Xs and Xt (scaled by ns)
+    - :math:`L` is a ns x d linear operator on a kernel matrix that approximates the barycentric mapping
+    - a and b are uniform source and target weights
+
+    The problem consist in solving jointly an optimal transport matrix
+    :math:`\gamma` and the nonlinear mapping that fits the barycentric mapping
+    :math:`n_s\gamma X_t`.
+
+    One can also estimate a mapping with constant bias (see supplementary
+    material of [8]) using the bias optional argument.
+
+    The algorithm used for solving the problem is the block coordinate
+    descent that alternates between updates of G (using conditionnal gradient)
+    abd the update of L using a classical kernel least square solver.
+
+
+    Parameters
+    ----------
+    xs : np.ndarray (ns,d)
+        samples in the source domain
+    xt : np.ndarray (nt,d)
+        samples in the target domain
+    mu : float,optional
+        Weight for the linear OT loss (>0)
+    eta : float, optional
+        Regularization term  for the linear mapping L (>0)
+    bias : bool,optional
+        Estimate linear mapping with constant bias
+    kerneltype : str,optional
+        kernel used by calling function ot.utils.kernel (gaussian by default)
+    sigma : float, optional
+        Gaussian kernel bandwidth.
+    numItermax : int, optional
+        Max number of BCD iterations
+    stopThr : float, optional
+        Stop threshold on relative loss decrease (>0)
+    numInnerItermax : int, optional
+        Max number of iterations (inner CG solver)
+    stopInnerThr : float, optional
+        Stop threshold on error (inner CG solver) (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+
+    Returns
+    -------
+    gamma : (ns x nt) ndarray
+        Optimal transportation matrix for the given parameters
+    L : (ns x d) ndarray
+        Nonlinear mapping matrix (ns+1 x d if bias)
+    log : dict
+        log dictionary return only if log==True in parameters
+
+
+    References
+    ----------
+
+    .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard, "Mapping estimation for discrete optimal transport", Neural Information Processing Systems (NIPS), 2016.
+
+    See Also
+    --------
+    ot.lp.emd : Unregularized OT
+    ot.optim.cg : General regularized OT
+
     """
 
-    ns,nt,d=xs.shape[0],xt.shape[0],xt.shape[1]
+    ns,nt=xs.shape[0],xt.shape[0]
 
     K=kernel(xs,xs,method=kerneltype,sigma=sigma)
     if bias:
diff --git a/ot/optim.py b/ot/optim.py
index 2b8f565..7ed658c 100644
--- a/ot/optim.py
+++ b/ot/optim.py
@@ -27,7 +27,7 @@ def line_search_armijo(f,xk,pk,gfk,old_fval,args=(),c1=1e-4,alpha0=0.99):
         descent direction
     gfk : np.ndarray
         gradient of f at xk
-    old_fval: float
+    old_fval : float
         loss value at xk
     args : tuple, optional
         arguments given to f
@@ -110,9 +110,9 @@ def cg(a,b,M,reg,f,df,G0=None,numItermax = 200,stopThr=1e-9,verbose=False,log=Fa
 
     Returns
     -------
-    gamma: (ns x nt) ndarray
+    gamma : (ns x nt) ndarray
         Optimal transportation matrix for the given parameters
-    log: dict
+    log : dict
         log dictionary return only if log==True in parameters