1 files changed, 1057 insertions, 215 deletions
diff --git a/ot/bregman.py b/ot/bregman.py
index d1057ff..2707b7c 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -5,15 +5,22 @@ Bregman projections for regularized OT
 
 # Author: Remi Flamary <remi.flamary@unice.fr>
 #         Nicolas Courty <ncourty@irisa.fr>
+#         Kilian Fatras <kilian.fatras@irisa.fr>
+#         Titouan Vayer <titouan.vayer@irisa.fr>
+#         Hicham Janati <hicham.janati@inria.fr>
+#         Mokhtar Z. Alaya <mokhtarzahdi.alaya@gmail.com>
 #
 # License: MIT License
 
 import numpy as np
+import warnings
+from .utils import unif, dist
+from scipy.optimize import fmin_l_bfgs_b
 
 
 def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
              stopThr=1e-9, verbose=False, log=False, **kwargs):
-    u"""
+    r"""
     Solve the entropic regularization optimal transport problem and return the OT matrix
 
     The function solves the following optimization problem:
@@ -28,21 +35,21 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
              \gamma\geq 0
     where :
 
-    - M is the (ns,nt) metric cost matrix
+    - M is the (dim_a, dim_b) metric cost matrix
     - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-    - a and b are source and target weights (sum to 1)
+    - a and b are source and target weights (histograms, both sum to 1)
 
     The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [2]_
 
 
     Parameters
     ----------
-    a : np.ndarray (ns,)
+    a : ndarray, shape (dim_a,)
         samples weights in the source domain
-    b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+    b : ndarray, shape (dim_b,) or ndarray, shape (dim_b, n_hists)
         samples in the target domain, compute sinkhorn with multiple targets
         and fixed M if b is a matrix (return OT loss + dual variables in log)
-    M : np.ndarray (ns,nt)
+    M : ndarray, shape (dim_a, dim_b)
         loss matrix
     reg : float
         Regularization term >0
@@ -61,7 +68,7 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
 
     Returns
     -------
-    gamma : (ns x nt) ndarray
+    gamma : ndarray, shape (dim_a, dim_b)
         Optimal transportation matrix for the given parameters
     log : dict
         log dictionary return only if log==True in parameters
@@ -70,12 +77,12 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
     --------
 
     >>> import ot
-    >>> a=[.5,.5]
-    >>> b=[.5,.5]
-    >>> M=[[0.,1.],[1.,0.]]
-    >>> ot.sinkhorn(a,b,M,1)
-    array([[ 0.36552929,  0.13447071],
-           [ 0.13447071,  0.36552929]])
+    >>> a=[.5, .5]
+    >>> b=[.5, .5]
+    >>> M=[[0., 1.], [1., 0.]]
+    >>> ot.sinkhorn(a, b, M, 1)
+    array([[0.36552929, 0.13447071],
+           [0.13447071, 0.36552929]])
 
 
     References
@@ -100,34 +107,28 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
     """
 
     if method.lower() == 'sinkhorn':
-        def sink():
-            return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax,
-                                  stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+        return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax,
+                              stopThr=stopThr, verbose=verbose, log=log,
+                              **kwargs)
     elif method.lower() == 'greenkhorn':
-        def sink():
-            return greenkhorn(a, b, M, reg, numItermax=numItermax,
-                              stopThr=stopThr, verbose=verbose, log=log)
+        return greenkhorn(a, b, M, reg, numItermax=numItermax,
+                          stopThr=stopThr, verbose=verbose, log=log)
     elif method.lower() == 'sinkhorn_stabilized':
-        def sink():
-            return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax,
-                                       stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+        return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax,
+                                   stopThr=stopThr, verbose=verbose,
+                                   log=log, **kwargs)
     elif method.lower() == 'sinkhorn_epsilon_scaling':
-        def sink():
-            return sinkhorn_epsilon_scaling(
-                a, b, M, reg, numItermax=numItermax,
-                stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+        return sinkhorn_epsilon_scaling(a, b, M, reg,
+                                        numItermax=numItermax,
+                                        stopThr=stopThr, verbose=verbose,
+                                        log=log, **kwargs)
     else:
-        print('Warning : unknown method using classic Sinkhorn Knopp')
-
-        def sink():
-            return sinkhorn_knopp(a, b, M, reg, **kwargs)
-
-    return sink()
+        raise ValueError("Unknown method '%s'." % method)
 
 
 def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
               stopThr=1e-9, verbose=False, log=False, **kwargs):
-    u"""
+    r"""
     Solve the entropic regularization optimal transport problem and return the loss
 
     The function solves the following optimization problem:
@@ -142,21 +143,21 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
              \gamma\geq 0
     where :
 
-    - M is the (ns,nt) metric cost matrix
+    - M is the (dim_a, dim_b) metric cost matrix
     - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-    - a and b are source and target weights (sum to 1)
+    - a and b are source and target weights (histograms, both sum to 1)
 
     The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [2]_
 
 
     Parameters
     ----------
-    a : np.ndarray (ns,)
+    a : ndarray, shape (dim_a,)
         samples weights in the source domain
-    b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+    b : ndarray, shape (dim_b,) or ndarray, shape (dim_b, n_hists)
         samples in the target domain, compute sinkhorn with multiple targets
         and fixed M if b is a matrix (return OT loss + dual variables in log)
-    M : np.ndarray (ns,nt)
+    M : ndarray, shape (dim_a, dim_b)
         loss matrix
     reg : float
         Regularization term >0
@@ -172,11 +173,10 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
     log : bool, optional
         record log if True
 
-
     Returns
     -------
-    W : (nt) ndarray or float
-        Optimal transportation matrix for the given parameters
+    W : (n_hists) ndarray or float
+        Optimal transportation loss for the given parameters
     log : dict
         log dictionary return only if log==True in parameters
 
@@ -184,11 +184,11 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
     --------
 
     >>> import ot
-    >>> a=[.5,.5]
-    >>> b=[.5,.5]
-    >>> M=[[0.,1.],[1.,0.]]
-    >>> ot.sinkhorn2(a,b,M,1)
-    array([ 0.26894142])
+    >>> a=[.5, .5]
+    >>> b=[.5, .5]
+    >>> M=[[0., 1.], [1., 0.]]
+    >>> ot.sinkhorn2(a, b, M, 1)
+    array([0.26894142])
 
 
 
@@ -215,36 +215,28 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
     ot.bregman.sinkhorn_epsilon_scaling: Sinkhorn with epslilon scaling [9][10]
 
     """
-
+    b = np.asarray(b, dtype=np.float64)
+    if len(b.shape) < 2:
+        b = b[:, None]
     if method.lower() == 'sinkhorn':
-        def sink():
-            return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax,
-                                  stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+        return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax,
+                              stopThr=stopThr, verbose=verbose, log=log,
+                              **kwargs)
     elif method.lower() == 'sinkhorn_stabilized':
-        def sink():
-            return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax,
-                                       stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+        return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax,
+                                   stopThr=stopThr, verbose=verbose, log=log,
+                                   **kwargs)
     elif method.lower() == 'sinkhorn_epsilon_scaling':
-        def sink():
-            return sinkhorn_epsilon_scaling(
-                a, b, M, reg, numItermax=numItermax,
-                stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+        return sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=numItermax,
+                                        stopThr=stopThr, verbose=verbose,
+                                        log=log, **kwargs)
     else:
-        print('Warning : unknown method using classic Sinkhorn Knopp')
-
-        def sink():
-            return sinkhorn_knopp(a, b, M, reg, **kwargs)
-
-    b = np.asarray(b, dtype=np.float64)
-    if len(b.shape) < 2:
-        b = b.reshape((-1, 1))
-
-    return sink()
+        raise ValueError("Unknown method '%s'." % method)
 
 
 def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
                    stopThr=1e-9, verbose=False, log=False, **kwargs):
-    """
+    r"""
     Solve the entropic regularization optimal transport problem and return the OT matrix
 
     The function solves the following optimization problem:
@@ -259,21 +251,21 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
              \gamma\geq 0
     where :
 
-    - M is the (ns,nt) metric cost matrix
+    - M is the (dim_a, dim_b) metric cost matrix
     - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-    - a and b are source and target weights (sum to 1)
+    - a and b are source and target weights (histograms, both sum to 1)
 
     The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [2]_
 
 
     Parameters
     ----------
-    a : np.ndarray (ns,)
+    a : ndarray, shape (dim_a,)
         samples weights in the source domain
-    b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+    b : ndarray, shape (dim_b,) or ndarray, shape (dim_b, n_hists)
         samples in the target domain, compute sinkhorn with multiple targets
         and fixed M if b is a matrix (return OT loss + dual variables in log)
-    M : np.ndarray (ns,nt)
+    M : ndarray, shape (dim_a, dim_b)
         loss matrix
     reg : float
         Regularization term >0
@@ -286,10 +278,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
     log : bool, optional
         record log if True
 
-
     Returns
     -------
-    gamma : (ns x nt) ndarray
+    gamma : ndarray, shape (dim_a, dim_b)
         Optimal transportation matrix for the given parameters
     log : dict
         log dictionary return only if log==True in parameters
@@ -298,12 +289,12 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
     --------
 
     >>> import ot
-    >>> a=[.5,.5]
-    >>> b=[.5,.5]
-    >>> M=[[0.,1.],[1.,0.]]
-    >>> ot.sinkhorn(a,b,M,1)
-    array([[ 0.36552929,  0.13447071],
-           [ 0.13447071,  0.36552929]])
+    >>> a=[.5, .5]
+    >>> b=[.5, .5]
+    >>> M=[[0., 1.], [1., 0.]]
+    >>> ot.sinkhorn(a, b, M, 1)
+    array([[0.36552929, 0.13447071],
+           [0.13447071, 0.36552929]])
 
 
     References
@@ -329,25 +320,25 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
         b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
 
     # init data
-    Nini = len(a)
-    Nfin = len(b)
+    dim_a = len(a)
+    dim_b = len(b)
 
     if len(b.shape) > 1:
-        nbb = b.shape[1]
+        n_hists = b.shape[1]
     else:
-        nbb = 0
+        n_hists = 0
 
     if log:
         log = {'err': []}
 
     # we assume that no distances are null except those of the diagonal of
     # distances
-    if nbb:
-        u = np.ones((Nini, nbb)) / Nini
-        v = np.ones((Nfin, nbb)) / Nfin
+    if n_hists:
+        u = np.ones((dim_a, n_hists)) / dim_a
+        v = np.ones((dim_b, n_hists)) / dim_b
     else:
-        u = np.ones(Nini) / Nini
-        v = np.ones(Nfin) / Nfin
+        u = np.ones(dim_a) / dim_a
+        v = np.ones(dim_b) / dim_b
 
     # print(reg)
 
@@ -370,9 +361,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
         v = np.divide(b, KtransposeU)
         u = 1. / np.dot(Kp, v)
 
-        if (np.any(KtransposeU == 0) or
-                np.any(np.isnan(u)) or np.any(np.isnan(v)) or
-                np.any(np.isinf(u)) or np.any(np.isinf(v))):
+        if (np.any(KtransposeU == 0)
+                or np.any(np.isnan(u)) or np.any(np.isnan(v))
+                or np.any(np.isinf(u)) or np.any(np.isinf(v))):
             # we have reached the machine precision
             # come back to previous solution and quit loop
             print('Warning: numerical errors at iteration', cpt)
@@ -382,13 +373,12 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
         if cpt % 10 == 0:
             # we can speed up the process by checking for the error only all
             # the 10th iterations
-            if nbb:
-                err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
-                    np.sum((v - vprev)**2) / np.sum((v)**2)
+            if n_hists:
+                np.einsum('ik,ij,jk->jk', u, K, v, out=tmp2)
             else:
                 # compute right marginal tmp2= (diag(u)Kdiag(v))^T1
                 np.einsum('i,ij,j->j', u, K, v, out=tmp2)
-                err = np.linalg.norm(tmp2 - b)**2  # violation of marginal
+            err = np.linalg.norm(tmp2 - b)  # violation of marginal
             if log:
                 log['err'].append(err)
 
@@ -402,7 +392,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
         log['u'] = u
         log['v'] = v
 
-    if nbb:  # return only loss
+    if n_hists:  # return only loss
         res = np.einsum('ik,ij,jk,ij->k', u, K, v, M)
         if log:
             return res, log
@@ -417,8 +407,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
             return u.reshape((-1, 1)) * K * v.reshape((1, -1))
 
 
-def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=False):
-    """
+def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False,
+               log=False):
+    r"""
     Solve the entropic regularization optimal transport problem and return the OT matrix
 
     The algorithm used is based on the paper
@@ -441,20 +432,20 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=
              \gamma\geq 0
     where :
 
-    - M is the (ns,nt) metric cost matrix
+    - M is the (dim_a, dim_b) metric cost matrix
     - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-    - a and b are source and target weights (sum to 1)
+    - a and b are source and target weights (histograms, both sum to 1)
 
 
 
     Parameters
     ----------
-    a : np.ndarray (ns,)
+    a : ndarray, shape (dim_a,)
         samples weights in the source domain
-    b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+    b : ndarray, shape (dim_b,) or ndarray, shape (dim_b, n_hists)
         samples in the target domain, compute sinkhorn with multiple targets
         and fixed M if b is a matrix (return OT loss + dual variables in log)
-    M : np.ndarray (ns,nt)
+    M : ndarray, shape (dim_a, dim_b)
         loss matrix
     reg : float
         Regularization term >0
@@ -465,10 +456,9 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=
     log : bool, optional
         record log if True
 
-
     Returns
     -------
-    gamma : (ns x nt) ndarray
+    gamma : ndarray, shape (dim_a, dim_b)
         Optimal transportation matrix for the given parameters
     log : dict
         log dictionary return only if log==True in parameters
@@ -477,12 +467,12 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=
     --------
 
     >>> import ot
-    >>> a=[.5,.5]
-    >>> b=[.5,.5]
-    >>> M=[[0.,1.],[1.,0.]]
-    >>> ot.bregman.greenkhorn(a,b,M,1)
-    array([[ 0.36552929,  0.13447071],
-           [ 0.13447071,  0.36552929]])
+    >>> a=[.5, .5]
+    >>> b=[.5, .5]
+    >>> M=[[0., 1.], [1., 0.]]
+    >>> ot.bregman.greenkhorn(a, b, M, 1)
+    array([[0.36552929, 0.13447071],
+           [0.13447071, 0.36552929]])
 
 
     References
@@ -499,22 +489,33 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=
 
     """
 
-    n = a.shape[0]
-    m = b.shape[0]
+    a = np.asarray(a, dtype=np.float64)
+    b = np.asarray(b, dtype=np.float64)
+    M = np.asarray(M, dtype=np.float64)
+
+    if len(a) == 0:
+        a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
+    if len(b) == 0:
+        b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
+
+    dim_a = a.shape[0]
+    dim_b = b.shape[0]
 
     # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
     K = np.empty_like(M)
     np.divide(M, -reg, out=K)
     np.exp(K, out=K)
 
-    u = np.full(n, 1. / n)
-    v = np.full(m, 1. / m)
+    u = np.full(dim_a, 1. / dim_a)
+    v = np.full(dim_b, 1. / dim_b)
     G = u[:, np.newaxis] * K * v[np.newaxis, :]
 
     viol = G.sum(1) - a
     viol_2 = G.sum(0) - b
     stopThr_val = 1
+
     if log:
+        log = dict()
         log['u'] = u
         log['v'] = v
 
@@ -560,8 +561,9 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=
 
 
 def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
-                        warmstart=None, verbose=False, print_period=20, log=False, **kwargs):
-    """
+                        warmstart=None, verbose=False, print_period=20,
+                        log=False, **kwargs):
+    r"""
     Solve the entropic regularization OT problem with log stabilization
 
     The function solves the following optimization problem:
@@ -576,9 +578,10 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
              \gamma\geq 0
     where :
 
-    - M is the (ns,nt) metric cost matrix
+    - M is the (dim_a, dim_b) metric cost matrix
     - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-    - a and b are source and target weights (sum to 1)
+    - a and b are source and target weights (histograms, both sum to 1)
+
 
     The algorithm used for solving the problem is the Sinkhorn-Knopp matrix
     scaling algorithm as proposed in [2]_ but with the log stabilization
@@ -587,11 +590,11 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
 
     Parameters
     ----------
-    a : np.ndarray (ns,)
+    a : ndarray, shape (dim_a,)
         samples weights in the source domain
-    b : np.ndarray (nt,)
+    b : ndarray, shape (dim_b,)
         samples in the target domain
-    M : np.ndarray (ns,nt)
+    M : ndarray, shape (dim_a, dim_b)
         loss matrix
     reg : float
         Regularization term >0
@@ -608,10 +611,9 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
     log : bool, optional
         record log if True
 
-
     Returns
     -------
-    gamma : (ns x nt) ndarray
+    gamma : ndarray, shape (dim_a, dim_b)
         Optimal transportation matrix for the given parameters
     log : dict
         log dictionary return only if log==True in parameters
@@ -623,9 +625,9 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
     >>> a=[.5,.5]
     >>> b=[.5,.5]
     >>> M=[[0.,1.],[1.,0.]]
-    >>> ot.bregman.sinkhorn_stabilized(a,b,M,1)
-    array([[ 0.36552929,  0.13447071],
-           [ 0.13447071,  0.36552929]])
+    >>> ot.bregman.sinkhorn_stabilized(a, b, M, 1)
+    array([[0.36552929, 0.13447071],
+           [0.13447071, 0.36552929]])
 
 
     References
@@ -656,14 +658,14 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
 
     # test if multiple target
     if len(b.shape) > 1:
-        nbb = b.shape[1]
+        n_hists = b.shape[1]
         a = a[:, np.newaxis]
     else:
-        nbb = 0
+        n_hists = 0
 
     # init data
-    na = len(a)
-    nb = len(b)
+    dim_a = len(a)
+    dim_b = len(b)
 
     cpt = 0
     if log:
@@ -672,24 +674,25 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
     # we assume that no distances are null except those of the diagonal of
     # distances
     if warmstart is None:
-        alpha, beta = np.zeros(na), np.zeros(nb)
+        alpha, beta = np.zeros(dim_a), np.zeros(dim_b)
     else:
         alpha, beta = warmstart
 
-    if nbb:
-        u, v = np.ones((na, nbb)) / na, np.ones((nb, nbb)) / nb
+    if n_hists:
+        u = np.ones((dim_a, n_hists)) / dim_a
+        v = np.ones((dim_b, n_hists)) / dim_b
     else:
-        u, v = np.ones(na) / na, np.ones(nb) / nb
+        u, v = np.ones(dim_a) / dim_a, np.ones(dim_b) / dim_b
 
     def get_K(alpha, beta):
         """log space computation"""
-        return np.exp(-(M - alpha.reshape((na, 1)) -
-                        beta.reshape((1, nb))) / reg)
+        return np.exp(-(M - alpha.reshape((dim_a, 1))
+                        - beta.reshape((1, dim_b))) / reg)
 
     def get_Gamma(alpha, beta, u, v):
         """log space gamma computation"""
-        return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) /
-                      reg + np.log(u.reshape((na, 1))) + np.log(v.reshape((1, nb))))
+        return np.exp(-(M - alpha.reshape((dim_a, 1)) - beta.reshape((1, dim_b)))
+                      / reg + np.log(u.reshape((dim_a, 1))) + np.log(v.reshape((1, dim_b))))
 
     # print(np.min(K))
 
@@ -709,26 +712,29 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
 
         # remove numerical problems and store them in K
         if np.abs(u).max() > tau or np.abs(v).max() > tau:
-            if nbb:
+            if n_hists:
                 alpha, beta = alpha + reg * \
                     np.max(np.log(u), 1), beta + reg * np.max(np.log(v))
             else:
                 alpha, beta = alpha + reg * np.log(u), beta + reg * np.log(v)
-                if nbb:
-                    u, v = np.ones((na, nbb)) / na, np.ones((nb, nbb)) / nb
+                if n_hists:
+                    u, v = np.ones((dim_a, n_hists)) / dim_a, np.ones((dim_b, n_hists)) / dim_b
                 else:
-                    u, v = np.ones(na) / na, np.ones(nb) / nb
+                    u, v = np.ones(dim_a) / dim_a, np.ones(dim_b) / dim_b
             K = get_K(alpha, beta)
 
         if cpt % print_period == 0:
             # we can speed up the process by checking for the error only all
             # the 10th iterations
-            if nbb:
-                err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
-                    np.sum((v - vprev)**2) / np.sum((v)**2)
+            if n_hists:
+                err_u = abs(u - uprev).max()
+                err_u /= max(abs(u).max(), abs(uprev).max(), 1.)
+                err_v = abs(v - vprev).max()
+                err_v /= max(abs(v).max(), abs(vprev).max(), 1.)
+                err = 0.5 * (err_u + err_v)
             else:
                 transp = get_Gamma(alpha, beta, u, v)
-                err = np.linalg.norm((np.sum(transp, axis=0) - b))**2
+                err = np.linalg.norm((np.sum(transp, axis=0) - b))
             if log:
                 log['err'].append(err)
 
@@ -754,34 +760,40 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
 
         cpt = cpt + 1
 
-    # print('err=',err,' cpt=',cpt)
     if log:
-        log['logu'] = alpha / reg + np.log(u)
-        log['logv'] = beta / reg + np.log(v)
+        if n_hists:
+            alpha = alpha[:, None]
+            beta = beta[:, None]
+        logu = alpha / reg + np.log(u)
+        logv = beta / reg + np.log(v)
+        log['logu'] = logu
+        log['logv'] = logv
         log['alpha'] = alpha + reg * np.log(u)
         log['beta'] = beta + reg * np.log(v)
         log['warmstart'] = (log['alpha'], log['beta'])
-        if nbb:
-            res = np.zeros((nbb))
-            for i in range(nbb):
+        if n_hists:
+            res = np.zeros((n_hists))
+            for i in range(n_hists):
                 res[i] = np.sum(get_Gamma(alpha, beta, u[:, i], v[:, i]) * M)
             return res, log
 
         else:
             return get_Gamma(alpha, beta, u, v), log
     else:
-        if nbb:
-            res = np.zeros((nbb))
-            for i in range(nbb):
+        if n_hists:
+            res = np.zeros((n_hists))
+            for i in range(n_hists):
                 res[i] = np.sum(get_Gamma(alpha, beta, u[:, i], v[:, i]) * M)
             return res
         else:
             return get_Gamma(alpha, beta, u, v)
 
 
-def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInnerItermax=100,
-                             tau=1e3, stopThr=1e-9, warmstart=None, verbose=False, print_period=10, log=False, **kwargs):
-    """
+def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4,
+                             numInnerItermax=100, tau=1e3, stopThr=1e-9,
+                             warmstart=None, verbose=False, print_period=10,
+                             log=False, **kwargs):
+    r"""
     Solve the entropic regularization optimal transport problem with log
     stabilization and epsilon scaling.
 
@@ -797,9 +809,10 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
              \gamma\geq 0
     where :
 
-    - M is the (ns,nt) metric cost matrix
+    - M is the (dim_a, dim_b) metric cost matrix
     - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-    - a and b are source and target weights (sum to 1)
+    - a and b are source and target weights (histograms, both sum to 1)
+
 
     The algorithm used for solving the problem is the Sinkhorn-Knopp matrix
     scaling algorithm as proposed in [2]_ but with the log stabilization
@@ -808,19 +821,17 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
 
     Parameters
     ----------
-    a : np.ndarray (ns,)
+    a : ndarray, shape (dim_a,)
         samples weights in the source domain
-    b : np.ndarray (nt,)
+    b : ndarray, shape (dim_b,)
         samples in the target domain
-    M : np.ndarray (ns,nt)
+    M : ndarray, shape (dim_a, dim_b)
         loss matrix
     reg : float
         Regularization term >0
     tau : float
         thershold for max value in u or v for log scaling
-    tau : float
-        thershold for max value in u or v for log scaling
-    warmstart : tible of vectors
+    warmstart : tuple of vectors
         if given then sarting values for alpha an beta log scalings
     numItermax : int, optional
         Max number of iterations
@@ -835,10 +846,9 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
     log : bool, optional
         record log if True
 
-
     Returns
     -------
-    gamma : (ns x nt) ndarray
+    gamma : ndarray, shape (dim_a, dim_b)
         Optimal transportation matrix for the given parameters
     log : dict
         log dictionary return only if log==True in parameters
@@ -847,12 +857,12 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
     --------
 
     >>> import ot
-    >>> a=[.5,.5]
-    >>> b=[.5,.5]
-    >>> M=[[0.,1.],[1.,0.]]
-    >>> ot.bregman.sinkhorn_epsilon_scaling(a,b,M,1)
-    array([[ 0.36552929,  0.13447071],
-           [ 0.13447071,  0.36552929]])
+    >>> a=[.5, .5]
+    >>> b=[.5, .5]
+    >>> M=[[0., 1.], [1., 0.]]
+    >>> ot.bregman.sinkhorn_epsilon_scaling(a, b, M, 1)
+    array([[0.36552929, 0.13447071],
+           [0.13447071, 0.36552929]])
 
 
     References
@@ -879,8 +889,8 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
         b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
 
     # init data
-    na = len(a)
-    nb = len(b)
+    dim_a = len(a)
+    dim_b = len(b)
 
     # nrelative umerical precision with 64 bits
     numItermin = 35
@@ -893,14 +903,14 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
     # we assume that no distances are null except those of the diagonal of
     # distances
     if warmstart is None:
-        alpha, beta = np.zeros(na), np.zeros(nb)
+        alpha, beta = np.zeros(dim_a), np.zeros(dim_b)
     else:
         alpha, beta = warmstart
 
     def get_K(alpha, beta):
         """log space computation"""
-        return np.exp(-(M - alpha.reshape((na, 1)) -
-                        beta.reshape((1, nb))) / reg)
+        return np.exp(-(M - alpha.reshape((dim_a, 1))
+                        - beta.reshape((1, dim_b))) / reg)
 
     # print(np.min(K))
     def get_reg(n):  # exponential decreasing
@@ -913,8 +923,10 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
 
         regi = get_reg(cpt)
 
-        G, logi = sinkhorn_stabilized(a, b, M, regi, numItermax=numInnerItermax, stopThr=1e-9, warmstart=(
-            alpha, beta), verbose=False, print_period=20, tau=tau, log=True)
+        G, logi = sinkhorn_stabilized(a, b, M, regi,
+                                      numItermax=numInnerItermax, stopThr=1e-9,
+                                      warmstart=(alpha, beta), verbose=False,
+                                      print_period=20, tau=tau, log=True)
 
         alpha = logi['alpha']
         beta = logi['beta']
@@ -972,9 +984,9 @@ def projC(gamma, q):
     return np.multiply(gamma, q / np.maximum(np.sum(gamma, axis=0), 1e-10))
 
 
-def barycenter(A, M, reg, weights=None, numItermax=1000,
-               stopThr=1e-4, verbose=False, log=False):
-    """Compute the entropic regularized wasserstein barycenter of distributions A
+def barycenter(A, M, reg, weights=None, method="sinkhorn", numItermax=10000,
+               stopThr=1e-4, verbose=False, log=False, **kwargs):
+    r"""Compute the entropic regularized wasserstein barycenter of distributions A
 
      The function solves the following optimization problem:
 
@@ -991,13 +1003,15 @@ def barycenter(A, M, reg, weights=None, numItermax=1000,
 
     Parameters
     ----------
-    A : np.ndarray (d,n)
-        n training distributions a_i of size d
-    M : np.ndarray (d,d)
-        loss matrix   for OT
+    A : ndarray, shape (dim, n_hists)
+        n_hists training distributions a_i of size dim
+    M : ndarray, shape (dim, dim)
+        loss matrix for OT
     reg : float
-        Regularization term >0
-    weights : np.ndarray (n,)
+        Regularization term > 0
+    method : str (optional)
+        method used for the solver either 'sinkhorn' or 'sinkhorn_stabilized'
+    weights : ndarray, shape (n_hists,)
         Weights of each histogram a_i on the simplex (barycentric coodinates)
     numItermax : int, optional
         Max number of iterations
@@ -1011,7 +1025,7 @@ def barycenter(A, M, reg, weights=None, numItermax=1000,
 
     Returns
     -------
-    a : (d,) ndarray
+    a : (dim,) ndarray
         Wasserstein barycenter
     log : dict
         log dictionary return only if log==True in parameters
@@ -1022,7 +1036,71 @@ def barycenter(A, M, reg, weights=None, numItermax=1000,
 
     .. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G. (2015). Iterative Bregman projections for regularized transportation problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
 
+    """
 
+    if method.lower() == 'sinkhorn':
+        return barycenter_sinkhorn(A, M, reg, weights=weights,
+                                   numItermax=numItermax,
+                                   stopThr=stopThr, verbose=verbose, log=log,
+                                   **kwargs)
+    elif method.lower() == 'sinkhorn_stabilized':
+        return barycenter_stabilized(A, M, reg, weights=weights,
+                                     numItermax=numItermax,
+                                     stopThr=stopThr, verbose=verbose,
+                                     log=log, **kwargs)
+    else:
+        raise ValueError("Unknown method '%s'." % method)
+
+
+def barycenter_sinkhorn(A, M, reg, weights=None, numItermax=1000,
+                        stopThr=1e-4, verbose=False, log=False):
+    r"""Compute the entropic regularized wasserstein barycenter of distributions A
+
+     The function solves the following optimization problem:
+
+    .. math::
+       \mathbf{a} = arg\min_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
+
+    where :
+
+    - :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance (see ot.bregman.sinkhorn)
+    - :math:`\mathbf{a}_i` are training distributions in the columns of matrix :math:`\mathbf{A}`
+    - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT
+
+    The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [3]_
+
+    Parameters
+    ----------
+    A : ndarray, shape (dim, n_hists)
+        n_hists training distributions a_i of size dim
+    M : ndarray, shape (dim, dim)
+        loss matrix for OT
+    reg : float
+        Regularization term > 0
+    weights : ndarray, shape (n_hists,)
+        Weights of each histogram a_i on the simplex (barycentric coodinates)
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshol on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+
+    Returns
+    -------
+    a : (dim,) ndarray
+        Wasserstein barycenter
+    log : dict
+        log dictionary return only if log==True in parameters
+
+
+    References
+    ----------
+
+    .. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G. (2015). Iterative Bregman projections for regularized transportation problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
 
     """
 
@@ -1068,8 +1146,139 @@ def barycenter(A, M, reg, weights=None, numItermax=1000,
         return geometricBar(weights, UKv)
 
 
-def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1e-9, stabThr=1e-30, verbose=False, log=False):
-    """Compute the entropic regularized wasserstein barycenter of distributions A
+def barycenter_stabilized(A, M, reg, tau=1e10, weights=None, numItermax=1000,
+                          stopThr=1e-4, verbose=False, log=False):
+    r"""Compute the entropic regularized wasserstein barycenter of distributions A
+        with stabilization.
+
+     The function solves the following optimization problem:
+
+    .. math::
+       \mathbf{a} = arg\min_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
+
+    where :
+
+    - :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance (see ot.bregman.sinkhorn)
+    - :math:`\mathbf{a}_i` are training distributions in the columns of matrix :math:`\mathbf{A}`
+    - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT
+
+    The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [3]_
+
+    Parameters
+    ----------
+    A : ndarray, shape (dim, n_hists)
+        n_hists training distributions a_i of size dim
+    M : ndarray, shape (dim, dim)
+        loss matrix for OT
+    reg : float
+        Regularization term > 0
+    tau : float
+        thershold for max value in u or v for log scaling
+    weights : ndarray, shape (n_hists,)
+        Weights of each histogram a_i on the simplex (barycentric coodinates)
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshol on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+
+    Returns
+    -------
+    a : (dim,) ndarray
+        Wasserstein barycenter
+    log : dict
+        log dictionary return only if log==True in parameters
+
+
+    References
+    ----------
+
+    .. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G. (2015). Iterative Bregman projections for regularized transportation problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
+
+    """
+
+    dim, n_hists = A.shape
+    if weights is None:
+        weights = np.ones(n_hists) / n_hists
+    else:
+        assert(len(weights) == A.shape[1])
+
+    if log:
+        log = {'err': []}
+
+    u = np.ones((dim, n_hists)) / dim
+    v = np.ones((dim, n_hists)) / dim
+
+    # print(reg)
+    # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
+    K = np.empty(M.shape, dtype=M.dtype)
+    np.divide(M, -reg, out=K)
+    np.exp(K, out=K)
+
+    cpt = 0
+    err = 1.
+    alpha = np.zeros(dim)
+    beta = np.zeros(dim)
+    q = np.ones(dim) / dim
+    while (err > stopThr and cpt < numItermax):
+        qprev = q
+        Kv = K.dot(v)
+        u = A / (Kv + 1e-16)
+        Ktu = K.T.dot(u)
+        q = geometricBar(weights, Ktu)
+        Q = q[:, None]
+        v = Q / (Ktu + 1e-16)
+        absorbing = False
+        if (u > tau).any() or (v > tau).any():
+            absorbing = True
+            alpha = alpha + reg * np.log(np.max(u, 1))
+            beta = beta + reg * np.log(np.max(v, 1))
+            K = np.exp((alpha[:, None] + beta[None, :] -
+                        M) / reg)
+            v = np.ones_like(v)
+        Kv = K.dot(v)
+        if (np.any(Ktu == 0.)
+                or np.any(np.isnan(u)) or np.any(np.isnan(v))
+                or np.any(np.isinf(u)) or np.any(np.isinf(v))):
+            # we have reached the machine precision
+            # come back to previous solution and quit loop
+            warnings.warn('Numerical errors at iteration %s' % cpt)
+            q = qprev
+            break
+        if (cpt % 10 == 0 and not absorbing) or cpt == 0:
+            # we can speed up the process by checking for the error only all
+            # the 10th iterations
+            err = abs(u * Kv - A).max()
+            if log:
+                log['err'].append(err)
+            if verbose:
+                if cpt % 50 == 0:
+                    print(
+                        '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+                print('{:5d}|{:8e}|'.format(cpt, err))
+
+        cpt += 1
+    if err > stopThr:
+        warnings.warn("Stabilized Unbalanced Sinkhorn did not converge." +
+                      "Try a larger entropy `reg`" +
+                      "Or a larger absorption threshold `tau`.")
+    if log:
+        log['niter'] = cpt
+        log['logu'] = np.log(u + 1e-16)
+        log['logv'] = np.log(v + 1e-16)
+        return q, log
+    else:
+        return q
+
+
+def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000,
+                               stopThr=1e-9, stabThr=1e-30, verbose=False,
+                               log=False):
+    r"""Compute the entropic regularized wasserstein barycenter of distributions A
     where A is a collection of 2D images.
 
      The function solves the following optimization problem:
@@ -1087,16 +1296,16 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1
 
     Parameters
     ----------
-    A : np.ndarray (n,w,h)
-        n distributions (2D images) of size w x h
+    A : ndarray, shape (n_hists, width, height)
+        n distributions (2D images) of size width x height
     reg : float
         Regularization term >0
-    weights : np.ndarray (n,)
+    weights : ndarray, shape (n_hists,)
         Weights of each image on the simplex (barycentric coodinates)
     numItermax : int, optional
         Max number of iterations
     stopThr : float, optional
-        Stop threshol on error (>0)
+        Stop threshol on error (> 0)
     stabThr : float, optional
         Stabilization threshold to avoid numerical precision issue
     verbose : bool, optional
@@ -1104,15 +1313,13 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1
     log : bool, optional
         record log if True
 
-
     Returns
     -------
-    a : (w,h) ndarray
+    a : ndarray, shape (width, height)
         2D Wasserstein barycenter
     log : dict
         log dictionary return only if log==True in parameters
 
-
     References
     ----------
 
@@ -1180,7 +1387,7 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1
 
 def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
           stopThr=1e-3, verbose=False, log=False):
-    """
+    r"""
     Compute the unmixing of an observation with a given dictionary using Wasserstein distance
 
     The function solve the following optimization problem:
@@ -1192,9 +1399,12 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
     where :
 
     - :math:`W_{M,reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance with M loss matrix (see ot.bregman.sinkhorn)
-    - :math:`\mathbf{a}` is an observed distribution,  :math:`\mathbf{h}_0` is aprior on unmixing
-    - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT data fitting
-    - reg0 and :math:`\mathbf{M0}` are respectively the regularization term and the cost matrix for regularization
+    - :math: `\mathbf{D}` is a dictionary of `n_atoms` atoms of dimension `dim_a`, its expected shape is `(dim_a, n_atoms)`
+    - :math:`\mathbf{h}` is the estimated unmixing of dimension `n_atoms`
+    - :math:`\mathbf{a}` is an observed distribution of dimension `dim_a`
+    - :math:`\mathbf{h}_0` is a prior on `h` of dimension `dim_prior`
+    - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix (dim_a, dim_a) for OT data fitting
+    - reg0 and :math:`\mathbf{M0}` are respectively the regularization term and the cost matrix (dim_prior, n_atoms) regularization
     - :math:`\\alpha`weight data fitting and regularization
 
     The optimization problem is solved suing the algorithm described in [4]
@@ -1202,16 +1412,16 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
 
     Parameters
     ----------
-    a : np.ndarray (d)
-        observed distribution
-    D : np.ndarray (d,n)
+    a : ndarray, shape (dim_a)
+        observed distribution (histogram, sums to 1)
+    D : ndarray, shape (dim_a, n_atoms)
         dictionary matrix
-    M : np.ndarray (d,d)
+    M : ndarray, shape (dim_a, dim_a)
         loss matrix
-    M0 : np.ndarray (n,n)
+    M0 : ndarray, shape (n_atoms, dim_prior)
         loss matrix
-    h0 : np.ndarray (n,)
-        prior on h
+    h0 : ndarray, shape (n_atoms,)
+        prior on the estimated unmixing h
     reg : float
         Regularization term >0 (Wasserstein data fitting)
     reg0 : float
@@ -1230,7 +1440,7 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
 
     Returns
     -------
-    a : (d,) ndarray
+    h : ndarray, shape (n_atoms,)
         Wasserstein barycenter
     log : dict
         log dictionary return only if log==True in parameters
@@ -1284,3 +1494,635 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
         return np.sum(K0, axis=1), log
     else:
         return np.sum(K0, axis=1)
+
+
+def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
+                       numIterMax=10000, stopThr=1e-9, verbose=False,
+                       log=False, **kwargs):
+    r'''
+    Solve the entropic regularization optimal transport problem and return the
+    OT matrix from empirical data
+
+    The function solves the following optimization problem:
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+        s.t. \gamma 1 = a
+
+             \gamma^T 1= b
+
+             \gamma\geq 0
+    where :
+
+    - :math:`M` is the (n_samples_a, n_samples_b) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - :math:`a` and :math:`b` are source and target weights (sum to 1)
+
+
+    Parameters
+    ----------
+    X_s : ndarray, shape (n_samples_a, dim)
+        samples in the source domain
+    X_t : ndarray, shape (n_samples_b, dim)
+        samples in the target domain
+    reg : float
+        Regularization term >0
+    a : ndarray, shape (n_samples_a,)
+        samples weights in the source domain
+    b : ndarray, shape (n_samples_b,)
+        samples weights in the target domain
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshol on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+
+    Returns
+    -------
+    gamma : ndarray, shape (n_samples_a, n_samples_b)
+        Regularized optimal transportation matrix for the given parameters
+    log : dict
+        log dictionary return only if log==True in parameters
+
+    Examples
+    --------
+
+    >>> n_samples_a = 2
+    >>> n_samples_b = 2
+    >>> reg = 0.1
+    >>> X_s = np.reshape(np.arange(n_samples_a), (n_samples_a, 1))
+    >>> X_t = np.reshape(np.arange(0, n_samples_b), (n_samples_b, 1))
+    >>> empirical_sinkhorn(X_s, X_t, reg, verbose=False)  # doctest: +NORMALIZE_WHITESPACE
+    array([[4.99977301e-01,  2.26989344e-05],
+           [2.26989344e-05,  4.99977301e-01]])
+
+
+    References
+    ----------
+
+    .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+
+    .. [9] Schmitzer, B. (2016). Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. arXiv preprint arXiv:1610.06519.
+
+    .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+    '''
+
+    if a is None:
+        a = unif(np.shape(X_s)[0])
+    if b is None:
+        b = unif(np.shape(X_t)[0])
+
+    M = dist(X_s, X_t, metric=metric)
+
+    if log:
+        pi, log = sinkhorn(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=True, **kwargs)
+        return pi, log
+    else:
+        pi = sinkhorn(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=False, **kwargs)
+        return pi
+
+
+def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+    r'''
+    Solve the entropic regularization optimal transport problem from empirical
+    data and return the OT loss
+
+
+    The function solves the following optimization problem:
+
+    .. math::
+        W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+        s.t. \gamma 1 = a
+
+             \gamma^T 1= b
+
+             \gamma\geq 0
+    where :
+
+    - :math:`M` is the (n_samples_a, n_samples_b) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - :math:`a` and :math:`b` are source and target weights (sum to 1)
+
+
+    Parameters
+    ----------
+    X_s : ndarray, shape (n_samples_a, dim)
+        samples in the source domain
+    X_t : ndarray, shape (n_samples_b, dim)
+        samples in the target domain
+    reg : float
+        Regularization term >0
+    a : ndarray, shape (n_samples_a,)
+        samples weights in the source domain
+    b : ndarray, shape (n_samples_b,)
+        samples weights in the target domain
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshol on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+
+    Returns
+    -------
+    gamma : ndarray, shape (n_samples_a, n_samples_b)
+        Regularized optimal transportation matrix for the given parameters
+    log : dict
+        log dictionary return only if log==True in parameters
+
+    Examples
+    --------
+
+    >>> n_samples_a = 2
+    >>> n_samples_b = 2
+    >>> reg = 0.1
+    >>> X_s = np.reshape(np.arange(n_samples_a), (n_samples_a, 1))
+    >>> X_t = np.reshape(np.arange(0, n_samples_b), (n_samples_b, 1))
+    >>> empirical_sinkhorn2(X_s, X_t, reg, verbose=False)
+    array([4.53978687e-05])
+
+
+    References
+    ----------
+
+    .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+
+    .. [9] Schmitzer, B. (2016). Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. arXiv preprint arXiv:1610.06519.
+
+    .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+    '''
+
+    if a is None:
+        a = unif(np.shape(X_s)[0])
+    if b is None:
+        b = unif(np.shape(X_t)[0])
+
+    M = dist(X_s, X_t, metric=metric)
+
+    if log:
+        sinkhorn_loss, log = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+        return sinkhorn_loss, log
+    else:
+        sinkhorn_loss = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+        return sinkhorn_loss
+
+
+def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+    r'''
+    Compute the sinkhorn divergence loss from empirical data
+
+    The function solves the following optimization problems and return the
+    sinkhorn divergence :math:`S`:
+
+    .. math::
+
+        W &= \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+        W_a &= \min_{\gamma_a} <\gamma_a,M_a>_F + reg\cdot\Omega(\gamma_a)
+
+        W_b &= \min_{\gamma_b} <\gamma_b,M_b>_F + reg\cdot\Omega(\gamma_b)
+
+        S &= W - 1/2 * (W_a + W_b)
+
+    .. math::
+        s.t. \gamma 1 = a
+
+             \gamma^T 1= b
+
+             \gamma\geq 0
+
+             \gamma_a 1 = a
+
+             \gamma_a^T 1= a
+
+             \gamma_a\geq 0
+
+             \gamma_b 1 = b
+
+             \gamma_b^T 1= b
+
+             \gamma_b\geq 0
+    where :
+
+    - :math:`M` (resp. :math:`M_a, M_b`) is the (n_samples_a, n_samples_b) metric cost matrix (resp (n_samples_a, n_samples_a) and (n_samples_b, n_samples_b))
+    - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - :math:`a` and :math:`b` are source and target weights (sum to 1)
+
+
+    Parameters
+    ----------
+    X_s : ndarray, shape (n_samples_a, dim)
+        samples in the source domain
+    X_t : ndarray, shape (n_samples_b, dim)
+        samples in the target domain
+    reg : float
+        Regularization term >0
+    a : ndarray, shape (n_samples_a,)
+        samples weights in the source domain
+    b : ndarray, shape (n_samples_b,)
+        samples weights in the target domain
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshol on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+    Returns
+    -------
+    gamma : ndarray, shape (n_samples_a, n_samples_b)
+        Regularized optimal transportation matrix for the given parameters
+    log : dict
+        log dictionary return only if log==True in parameters
+
+    Examples
+    --------
+    >>> n_samples_a = 2
+    >>> n_samples_b = 4
+    >>> reg = 0.1
+    >>> X_s = np.reshape(np.arange(n_samples_a), (n_samples_a, 1))
+    >>> X_t = np.reshape(np.arange(0, n_samples_b), (n_samples_b, 1))
+    >>> empirical_sinkhorn_divergence(X_s, X_t, reg)  # doctest: +ELLIPSIS
+    array([1.499...])
+
+
+    References
+    ----------
+    .. [23] Aude Genevay, Gabriel Peyré, Marco Cuturi, Learning Generative Models with Sinkhorn Divergences,  Proceedings of the Twenty-First International Conference on Artficial Intelligence and Statistics, (AISTATS) 21, 2018
+    '''
+    if log:
+        sinkhorn_loss_ab, log_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+        sinkhorn_loss_a, log_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+        sinkhorn_loss_b, log_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+        sinkhorn_div = sinkhorn_loss_ab - 1 / 2 * (sinkhorn_loss_a + sinkhorn_loss_b)
+
+        log = {}
+        log['sinkhorn_loss_ab'] = sinkhorn_loss_ab
+        log['sinkhorn_loss_a'] = sinkhorn_loss_a
+        log['sinkhorn_loss_b'] = sinkhorn_loss_b
+        log['log_sinkhorn_ab'] = log_ab
+        log['log_sinkhorn_a'] = log_a
+        log['log_sinkhorn_b'] = log_b
+
+        return max(0, sinkhorn_div), log
+
+    else:
+        sinkhorn_loss_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+        sinkhorn_loss_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+        sinkhorn_loss_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+        sinkhorn_div = sinkhorn_loss_ab - 1 / 2 * (sinkhorn_loss_a + sinkhorn_loss_b)
+        return max(0, sinkhorn_div)
+
+
+def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, restricted=True,
+                maxiter=10000, maxfun=10000, pgtol=1e-09, verbose=False, log=False):
+    r""""
+    Screening Sinkhorn Algorithm for Regularized Optimal Transport
+
+    The function solves an approximate dual of Sinkhorn divergence [2] which is written as the following optimization problem:
+
+    ..math::
+      (u, v) = \argmin_{u, v} 1_{ns}^T B(u,v) 1_{nt} - <\kappa u, a> - <v/\kappa, b>
+
+      where B(u,v) = \diag(e^u) K \diag(e^v), with K = e^{-M/reg} and
+
+      s.t. e^{u_i} \geq \epsilon / \kappa, for all i \in {1, ..., ns}
+
+           e^{v_j} \geq \epsilon \kappa, for all j \in {1, ..., nt}
+
+      The parameters \kappa and \epsilon are determined w.r.t the couple number budget of points (ns_budget, nt_budget), see Equation (5) in [26]
+
+
+    Parameters
+    ----------
+    a : `numpy.ndarray`, shape=(ns,)
+        samples weights in the source domain
+
+    b : `numpy.ndarray`, shape=(nt,)
+        samples weights in the target domain
+
+    M : `numpy.ndarray`, shape=(ns, nt)
+        Cost matrix
+
+    reg : `float`
+        Level of the entropy regularisation
+
+    ns_budget : `int`, deafult=None
+        Number budget of points to be keeped in the source domain
+        If it is None then 50% of the source sample points will be keeped
+
+    nt_budget : `int`, deafult=None
+        Number budget of points to be keeped in the target domain
+        If it is None then 50% of the target sample points will be keeped
+
+    uniform : `bool`, default=False
+        If `True`, the source and target distribution are supposed to be uniform, i.e., a_i = 1 / ns and b_j = 1 / nt
+
+    restricted : `bool`, default=True
+         If `True`, a warm-start initialization for the  L-BFGS-B solver
+         using a restricted Sinkhorn algorithm with at most 5 iterations
+
+    maxiter : `int`, default=10000
+      Maximum number of iterations in LBFGS solver
+
+    maxfun : `int`, default=10000
+      Maximum  number of function evaluations in LBFGS solver
+
+    pgtol : `float`, default=1e-09
+      Final objective function accuracy in LBFGS solver
+
+    verbose : `bool`, default=False
+        If `True`, dispaly informations about the cardinals of the active sets and the paramerters kappa
+        and epsilon
+
+    Dependency
+    ----------
+    To gain more efficiency, screenkhorn needs to call the "Bottleneck" package (https://pypi.org/project/Bottleneck/)
+    in the screening pre-processing step. If Bottleneck isn't installed, the following error message appears:
+    "Bottleneck module doesn't exist. Install it from https://pypi.org/project/Bottleneck/"
+
+
+    Returns
+    -------
+    gamma : `numpy.ndarray`, shape=(ns, nt)
+        Screened optimal transportation matrix for the given parameters
+
+    log : `dict`, default=False
+      Log dictionary return only if log==True in parameters
+
+
+    References
+    -----------
+    .. [26] Alaya M. Z., Bérar M., Gasso G., Rakotomamonjy A. (2019). Screening Sinkhorn Algorithm for Regularized Optimal Transport (NIPS) 33, 2019
+
+    """
+    # check if bottleneck module exists
+    try:
+        import bottleneck
+    except ImportError:
+        warnings.warn("Bottleneck module is not installed. Install it from https://pypi.org/project/Bottleneck/ for better performance.")
+        bottleneck = np
+
+    a = np.asarray(a, dtype=np.float64)
+    b = np.asarray(b, dtype=np.float64)
+    M = np.asarray(M, dtype=np.float64)
+    ns, nt = M.shape
+
+    # by default, we keep only 50% of the sample data points
+    if ns_budget is None:
+        ns_budget = int(np.floor(0.5 * ns))
+    if nt_budget is None:
+        nt_budget = int(np.floor(0.5 * nt))
+
+    # calculate the Gibbs kernel
+    K = np.empty_like(M)
+    np.divide(M, -reg, out=K)
+    np.exp(K, out=K)
+
+    def projection(u, epsilon):
+        u[u <= epsilon] = epsilon
+        return u
+
+    # ----------------------------------------------------------------------------------------------------------------#
+    #                                          Step 1: Screening pre-processing                                       #
+    # ----------------------------------------------------------------------------------------------------------------#
+
+    if ns_budget == ns and nt_budget == nt:
+        # full number of budget points (ns, nt) = (ns_budget, nt_budget)
+        Isel = np.ones(ns, dtype=bool)
+        Jsel = np.ones(nt, dtype=bool)
+        epsilon = 0.0
+        kappa = 1.0
+
+        cst_u = 0.
+        cst_v = 0.
+
+        bounds_u = [(0.0, np.inf)] * ns
+        bounds_v = [(0.0, np.inf)] * nt
+
+        a_I = a
+        b_J = b
+        K_IJ = K
+        K_IJc = []
+        K_IcJ = []
+
+        vec_eps_IJc = np.zeros(nt)
+        vec_eps_IcJ = np.zeros(ns)
+
+    else:
+        # sum of rows and columns of K
+        K_sum_cols = K.sum(axis=1)
+        K_sum_rows = K.sum(axis=0)
+
+        if uniform:
+            if ns / ns_budget < 4:
+                aK_sort = np.sort(K_sum_cols)
+                epsilon_u_square = a[0] / aK_sort[ns_budget - 1]
+            else:
+                aK_sort = bottleneck.partition(K_sum_cols, ns_budget - 1)[ns_budget - 1]
+                epsilon_u_square = a[0] / aK_sort
+
+            if nt / nt_budget < 4:
+                bK_sort = np.sort(K_sum_rows)
+                epsilon_v_square = b[0] / bK_sort[nt_budget - 1]
+            else:
+                bK_sort = bottleneck.partition(K_sum_rows, nt_budget - 1)[nt_budget - 1]
+                epsilon_v_square = b[0] / bK_sort
+        else:
+            aK = a / K_sum_cols
+            bK = b / K_sum_rows
+
+            aK_sort = np.sort(aK)[::-1]
+            epsilon_u_square = aK_sort[ns_budget - 1]
+
+            bK_sort = np.sort(bK)[::-1]
+            epsilon_v_square = bK_sort[nt_budget - 1]
+
+        # active sets I and J (see Lemma 1 in [26])
+        Isel = a >= epsilon_u_square * K_sum_cols
+        Jsel = b >= epsilon_v_square * K_sum_rows
+
+        if sum(Isel) != ns_budget:
+            if uniform:
+                aK = a / K_sum_cols
+                aK_sort = np.sort(aK)[::-1]
+            epsilon_u_square = aK_sort[ns_budget - 1:ns_budget + 1].mean()
+            Isel = a >= epsilon_u_square * K_sum_cols
+            ns_budget = sum(Isel)
+
+        if sum(Jsel) != nt_budget:
+            if uniform:
+                bK = b / K_sum_rows
+                bK_sort = np.sort(bK)[::-1]
+            epsilon_v_square = bK_sort[nt_budget - 1:nt_budget + 1].mean()
+            Jsel = b >= epsilon_v_square * K_sum_rows
+            nt_budget = sum(Jsel)
+
+        epsilon = (epsilon_u_square * epsilon_v_square) ** (1 / 4)
+        kappa = (epsilon_v_square / epsilon_u_square) ** (1 / 2)
+
+        if verbose:
+            print("epsilon = %s\n" % epsilon)
+            print("kappa = %s\n" % kappa)
+            print('Cardinality of selected points: |Isel| = %s \t |Jsel| = %s \n' % (sum(Isel), sum(Jsel)))
+
+        # Ic, Jc: complementary of the active sets I and J
+        Ic = ~Isel
+        Jc = ~Jsel
+
+        K_IJ = K[np.ix_(Isel, Jsel)]
+        K_IcJ = K[np.ix_(Ic, Jsel)]
+        K_IJc = K[np.ix_(Isel, Jc)]
+
+        K_min = K_IJ.min()
+        if K_min == 0:
+            K_min = np.finfo(float).tiny
+
+        # a_I, b_J, a_Ic, b_Jc
+        a_I = a[Isel]
+        b_J = b[Jsel]
+        if not uniform:
+            a_I_min = a_I.min()
+            a_I_max = a_I.max()
+            b_J_max = b_J.max()
+            b_J_min = b_J.min()
+        else:
+            a_I_min = a_I[0]
+            a_I_max = a_I[0]
+            b_J_max = b_J[0]
+            b_J_min = b_J[0]
+
+        # box constraints in L-BFGS-B (see Proposition 1 in [26])
+        bounds_u = [(max(a_I_min / ((nt - nt_budget) * epsilon + nt_budget * (b_J_max / (
+            ns * epsilon * kappa * K_min))), epsilon / kappa), a_I_max / (nt * epsilon * K_min))] * ns_budget
+
+        bounds_v = [(max(b_J_min / ((ns - ns_budget) * epsilon + ns_budget * (kappa * a_I_max / (nt * epsilon * K_min))),
+                         epsilon * kappa), b_J_max / (ns * epsilon * K_min))] * nt_budget
+
+        # pre-calculated constants for the objective
+        vec_eps_IJc = epsilon * kappa * (K_IJc * np.ones(nt - nt_budget).reshape((1, -1))).sum(axis=1)
+        vec_eps_IcJ = (epsilon / kappa) * (np.ones(ns - ns_budget).reshape((-1, 1)) * K_IcJ).sum(axis=0)
+
+    # initialisation
+    u0 = np.full(ns_budget, (1. / ns_budget) + epsilon / kappa)
+    v0 = np.full(nt_budget, (1. / nt_budget) + epsilon * kappa)
+
+    # pre-calculed constants for Restricted Sinkhorn (see Algorithm 1 in supplementary of [26])
+    if restricted:
+        if ns_budget != ns or nt_budget != nt:
+            cst_u = kappa * epsilon * K_IJc.sum(axis=1)
+            cst_v = epsilon * K_IcJ.sum(axis=0) / kappa
+
+        cpt = 1
+        while cpt < 5:  # 5 iterations
+            K_IJ_v = np.dot(K_IJ.T, u0) + cst_v
+            v0 = b_J / (kappa * K_IJ_v)
+            KIJ_u = np.dot(K_IJ, v0) + cst_u
+            u0 = (kappa * a_I) / KIJ_u
+            cpt += 1
+
+        u0 = projection(u0, epsilon / kappa)
+        v0 = projection(v0, epsilon * kappa)
+
+    else:
+        u0 = u0
+        v0 = v0
+
+    def restricted_sinkhorn(usc, vsc, max_iter=5):
+        """
+        Restricted Sinkhorn Algorithm as a warm-start initialized point for L-BFGS-B (see Algorithm 1 in supplementary of [26])
+        """
+        cpt = 1
+        while cpt < max_iter:
+            K_IJ_v = np.dot(K_IJ.T, usc) + cst_v
+            vsc = b_J / (kappa * K_IJ_v)
+            KIJ_u = np.dot(K_IJ, vsc) + cst_u
+            usc = (kappa * a_I) / KIJ_u
+            cpt += 1
+
+        usc = projection(usc, epsilon / kappa)
+        vsc = projection(vsc, epsilon * kappa)
+
+        return usc, vsc
+
+    def screened_obj(usc, vsc):
+        part_IJ = np.dot(np.dot(usc, K_IJ), vsc) - kappa * np.dot(a_I, np.log(usc)) - (1. / kappa) * np.dot(b_J, np.log(vsc))
+        part_IJc = np.dot(usc, vec_eps_IJc)
+        part_IcJ = np.dot(vec_eps_IcJ, vsc)
+        psi_epsilon = part_IJ + part_IJc + part_IcJ
+        return psi_epsilon
+
+    def screened_grad(usc, vsc):
+        # gradients of Psi_(kappa,epsilon) w.r.t u and v
+        grad_u = np.dot(K_IJ, vsc) + vec_eps_IJc - kappa * a_I / usc
+        grad_v = np.dot(K_IJ.T, usc) + vec_eps_IcJ - (1. / kappa) * b_J / vsc
+        return grad_u, grad_v
+
+    def bfgspost(theta):
+        u = theta[:ns_budget]
+        v = theta[ns_budget:]
+        # objective
+        f = screened_obj(u, v)
+        # gradient
+        g_u, g_v = screened_grad(u, v)
+        g = np.hstack([g_u, g_v])
+        return f, g
+
+    #----------------------------------------------------------------------------------------------------------------#
+    #                                           Step 2: L-BFGS-B solver                                              #
+    #----------------------------------------------------------------------------------------------------------------#
+
+    u0, v0 = restricted_sinkhorn(u0, v0)
+    theta0 = np.hstack([u0, v0])
+
+    bounds = bounds_u + bounds_v  # constraint bounds
+
+    def obj(theta):
+        return bfgspost(theta)
+
+    theta, _, _ = fmin_l_bfgs_b(func=obj,
+                                x0=theta0,
+                                bounds=bounds,
+                                maxfun=maxfun,
+                                pgtol=pgtol,
+                                maxiter=maxiter)
+
+    usc = theta[:ns_budget]
+    vsc = theta[ns_budget:]
+
+    usc_full = np.full(ns, epsilon / kappa)
+    vsc_full = np.full(nt, epsilon * kappa)
+    usc_full[Isel] = usc
+    vsc_full[Jsel] = vsc
+
+    if log:
+        log = {}
+        log['u'] = usc_full
+        log['v'] = vsc_full
+        log['Isel'] = Isel
+        log['Jsel'] = Jsel
+
+    gamma = usc_full[:, None] * K * vsc_full[None, :]
+    gamma = gamma / gamma.sum()
+
+    if log:
+        return gamma, log
+    else:
+        return gamma