7 files changed, 1540 insertions, 9 deletions
diff --git a/ot/__init__.py b/ot/__init__.py
index 1500e59..1dde390 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -18,6 +18,8 @@ from . import utils
 from . import datasets
 from . import da
 from . import gromov
+from . import smooth
+from . import stochastic
 
 # OT functions
 from .lp import emd, emd2
diff --git a/ot/bregman.py b/ot/bregman.py
index b017c1a..c755f51 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -344,8 +344,13 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
 
     # print(reg)
 
-    K = np.exp(-M / reg)
+    # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
+    K = np.empty(M.shape, dtype=M.dtype)
+    np.divide(M, -reg, out=K)
+    np.exp(K, out=K)
+
     # print(np.min(K))
+    tmp2 = np.empty(b.shape, dtype=M.dtype)
 
     Kp = (1 / a).reshape(-1, 1) * K
     cpt = 0
@@ -353,6 +358,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
     while (err > stopThr and cpt < numItermax):
         uprev = u
         vprev = v
+
         KtransposeU = np.dot(K.T, u)
         v = np.divide(b, KtransposeU)
         u = 1. / np.dot(Kp, v)
@@ -373,8 +379,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
                 err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
                     np.sum((v - vprev)**2) / np.sum((v)**2)
             else:
-                transp = u.reshape(-1, 1) * (K * v)
-                err = np.linalg.norm((np.sum(transp, axis=0) - b))**2
+                # compute right marginal tmp2= (diag(u)Kdiag(v))^T1
+                np.einsum('i,ij,j->j', u, K, v, out=tmp2)
+                err = np.linalg.norm(tmp2 - b)**2  # violation of marginal
             if log:
                 log['err'].append(err)
 
@@ -389,10 +396,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
         log['v'] = v
 
     if nbb:  # return only loss
-        res = np.zeros((nbb))
-        for i in range(nbb):
-            res[i] = np.sum(
-                u[:, i].reshape((-1, 1)) * K * v[:, i].reshape((1, -1)) * M)
+        res = np.einsum('ik,ij,jk,ij->k', u, K, v, M)
         if log:
             return res, log
         else:
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 5dda82a..02cbd8c 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -17,6 +17,9 @@ from .import cvx
 from .emd_wrap import emd_c, check_result
 from ..utils import parmap
 from .cvx import barycenter
+from ..utils import dist
+
+__all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx']
 
 
 def emd(a, b, M, numItermax=100000, log=False):
@@ -214,3 +217,95 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
 
     res = parmap(f, [b[:, i] for i in range(nb)], processes)
     return res
+
+
+
+def free_support_barycenter(measures_locations, measures_weights, X_init, b=None, weights=None, numItermax=100, stopThr=1e-7, verbose=False, log=None):
+    """
+    Solves the free support (locations of the barycenters are optimized, not the weights) Wasserstein barycenter problem (i.e. the weighted Frechet mean for the 2-Wasserstein distance)
+
+    The function solves the Wasserstein barycenter problem when the barycenter measure is constrained to be supported on k atoms.
+    This problem is considered in [1] (Algorithm 2). There are two differences with the following codes:
+    - we do not optimize over the weights
+    - we do not do line search for the locations updates, we use i.e. theta = 1 in [1] (Algorithm 2). This can be seen as a discrete implementation of the fixed-point algorithm of [2] proposed in the continuous setting.
+
+    Parameters
+    ----------
+    measures_locations : list of (k_i,d) np.ndarray
+        The discrete support of a measure supported on k_i locations of a d-dimensional space (k_i can be different for each element of the list)
+    measures_weights : list of (k_i,) np.ndarray
+        Numpy arrays where each numpy array has k_i non-negatives values summing to one representing the weights of each discrete input measure
+
+    X_init : (k,d) np.ndarray
+        Initialization of the support locations (on k atoms) of the barycenter
+    b : (k,) np.ndarray
+        Initialization of the weights of the barycenter (non-negatives, sum to 1)
+    weights : (k,) np.ndarray
+        Initialization of the coefficients of the barycenter (non-negatives, sum to 1)
+
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshol on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+    Returns
+    -------
+    X : (k,d) np.ndarray
+        Support locations (on k atoms) of the barycenter
+
+    References
+    ----------
+
+    .. [1] Cuturi, Marco, and Arnaud Doucet. "Fast computation of Wasserstein barycenters." International Conference on Machine Learning. 2014.
+
+    .. [2]  Álvarez-Esteban, Pedro C., et al. "A fixed-point approach to barycenters in Wasserstein space." Journal of Mathematical Analysis and Applications 441.2 (2016): 744-762.
+
+    """
+
+    iter_count = 0
+
+    N = len(measures_locations)
+    k = X_init.shape[0]
+    d = X_init.shape[1]
+    if b is None:
+        b = np.ones((k,))/k
+    if weights is None:
+        weights = np.ones((N,)) / N
+
+    X = X_init
+
+    log_dict = {}
+    displacement_square_norms = []
+
+    displacement_square_norm = stopThr + 1.
+
+    while ( displacement_square_norm > stopThr and iter_count < numItermax ):
+
+        T_sum = np.zeros((k, d))
+
+        for (measure_locations_i, measure_weights_i, weight_i) in zip(measures_locations, measures_weights, weights.tolist()):
+
+            M_i = dist(X, measure_locations_i)
+            T_i = emd(b, measure_weights_i, M_i)
+            T_sum = T_sum + weight_i * np.reshape(1. / b, (-1, 1)) * np.matmul(T_i, measure_locations_i)
+
+        displacement_square_norm = np.sum(np.square(T_sum-X))
+        if log:
+            displacement_square_norms.append(displacement_square_norm)
+
+        X = T_sum
+
+        if verbose:
+            print('iteration %d, displacement_square_norm=%f\n', iter_count, displacement_square_norm)
+
+        iter_count += 1
+
+    if log:
+        log_dict['displacement_square_norms'] = displacement_square_norms
+        return X, log_dict
+    else:
+        return X
+\ No newline at end of file
diff --git a/ot/lp/cvx.py b/ot/lp/cvx.py
index c8c75bc..8e763be 100644
--- a/ot/lp/cvx.py
+++ b/ot/lp/cvx.py
@@ -11,6 +11,7 @@ import numpy as np
 import scipy as sp
 import scipy.sparse as sps
 
+
 try:
     import cvxopt
     from cvxopt import solvers, matrix, spmatrix
@@ -26,7 +27,7 @@ def scipy_sparse_to_spmatrix(A):
 
 
 def barycenter(A, M, weights=None, verbose=False, log=False, solver='interior-point'):
-    """Compute the entropic regularized wasserstein barycenter of distributions A
+    """Compute the Wasserstein barycenter of distributions A
 
      The function solves the following optimization problem [16]:
 
diff --git a/ot/smooth.py b/ot/smooth.py
new file mode 100644
index 0000000..5a8e4b5
--- /dev/null
+++ b/ot/smooth.py
@@ -0,0 +1,600 @@
+#Copyright (c) 2018, Mathieu Blondel
+#All rights reserved.
+#
+#Redistribution and use in source and binary forms, with or without
+#modification, are permitted provided that the following conditions are met:
+#
+#1. Redistributions of source code must retain the above copyright notice, this
+#list of conditions and the following disclaimer.
+#
+#2. Redistributions in binary form must reproduce the above copyright notice,
+#this list of conditions and the following disclaimer in the documentation and/or
+#other materials provided with the distribution.
+#
+#THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+#ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+#WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+#IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
+#INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+#NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA,
+#OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+#LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
+#OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+#THE POSSIBILITY OF SUCH DAMAGE.
+
+# Author: Mathieu Blondel
+#         Remi Flamary <remi.flamary@unice.fr>
+
+"""
+Implementation of
+Smooth and Sparse Optimal Transport.
+Mathieu Blondel, Vivien Seguy, Antoine Rolet.
+In Proc. of AISTATS 2018.
+https://arxiv.org/abs/1710.06276
+
+[17] Blondel, M., Seguy, V., & Rolet, A. (2018). Smooth and Sparse Optimal
+Transport. Proceedings of the Twenty-First International Conference on
+Artificial Intelligence and Statistics (AISTATS).
+
+Original code from https://github.com/mblondel/smooth-ot/
+
+"""
+
+import numpy as np
+from scipy.optimize import minimize
+
+
+def projection_simplex(V, z=1, axis=None):
+    """ Projection of x onto the simplex, scaled by z
+
+        P(x; z) = argmin_{y >= 0, sum(y) = z} ||y - x||^2
+    z: float or array
+        If array, len(z) must be compatible with V
+    axis: None or int
+        - axis=None: project V by P(V.ravel(); z)
+        - axis=1: project each V[i] by P(V[i]; z[i])
+        - axis=0: project each V[:, j] by P(V[:, j]; z[j])
+    """
+    if axis == 1:
+        n_features = V.shape[1]
+        U = np.sort(V, axis=1)[:, ::-1]
+        z = np.ones(len(V)) * z
+        cssv = np.cumsum(U, axis=1) - z[:, np.newaxis]
+        ind = np.arange(n_features) + 1
+        cond = U - cssv / ind > 0
+        rho = np.count_nonzero(cond, axis=1)
+        theta = cssv[np.arange(len(V)), rho - 1] / rho
+        return np.maximum(V - theta[:, np.newaxis], 0)
+
+    elif axis == 0:
+        return projection_simplex(V.T, z, axis=1).T
+
+    else:
+        V = V.ravel().reshape(1, -1)
+        return projection_simplex(V, z, axis=1).ravel()
+
+
+class Regularization(object):
+    """Base class for Regularization objects
+
+        Notes
+        -----
+        This class is not intended for direct use but as aparent for true
+        regularizatiojn implementation.
+    """
+
+    def __init__(self, gamma=1.0):
+        """
+
+        Parameters
+        ----------
+        gamma: float
+            Regularization parameter.
+            We recover unregularized OT when gamma -> 0.
+
+        """
+        self.gamma = gamma
+
+    def delta_Omega(X):
+        """
+        Compute delta_Omega(X[:, j]) for each X[:, j].
+        delta_Omega(x) = sup_{y >= 0} y^T x - Omega(y).
+
+        Parameters
+        ----------
+        X: array, shape = len(a) x len(b)
+            Input array.
+
+        Returns
+        -------
+        v: array, len(b)
+            Values: v[j] = delta_Omega(X[:, j])
+        G: array, len(a) x len(b)
+            Gradients: G[:, j] = nabla delta_Omega(X[:, j])
+        """
+        raise NotImplementedError
+
+    def max_Omega(X, b):
+        """
+        Compute max_Omega_j(X[:, j]) for each X[:, j].
+        max_Omega_j(x) = sup_{y >= 0, sum(y) = 1} y^T x - Omega(b[j] y) / b[j].
+
+        Parameters
+        ----------
+        X: array, shape = len(a) x len(b)
+            Input array.
+
+        Returns
+        -------
+        v: array, len(b)
+            Values: v[j] = max_Omega_j(X[:, j])
+        G: array, len(a) x len(b)
+            Gradients: G[:, j] = nabla max_Omega_j(X[:, j])
+        """
+        raise NotImplementedError
+
+    def Omega(T):
+        """
+        Compute regularization term.
+
+        Parameters
+        ----------
+        T: array, shape = len(a) x len(b)
+            Input array.
+
+        Returns
+        -------
+        value: float
+            Regularization term.
+        """
+        raise NotImplementedError
+
+
+class NegEntropy(Regularization):
+    """ NegEntropy regularization """
+
+    def delta_Omega(self, X):
+        G = np.exp(X / self.gamma - 1)
+        val = self.gamma * np.sum(G, axis=0)
+        return val, G
+
+    def max_Omega(self, X, b):
+        max_X = np.max(X, axis=0) / self.gamma
+        exp_X = np.exp(X / self.gamma - max_X)
+        val = self.gamma * (np.log(np.sum(exp_X, axis=0)) + max_X)
+        val -= self.gamma * np.log(b)
+        G = exp_X / np.sum(exp_X, axis=0)
+        return val, G
+
+    def Omega(self, T):
+        return self.gamma * np.sum(T * np.log(T))
+
+
+class SquaredL2(Regularization):
+    """ Squared L2 regularization """
+
+    def delta_Omega(self, X):
+        max_X = np.maximum(X, 0)
+        val = np.sum(max_X ** 2, axis=0) / (2 * self.gamma)
+        G = max_X / self.gamma
+        return val, G
+
+    def max_Omega(self, X, b):
+        G = projection_simplex(X / (b * self.gamma), axis=0)
+        val = np.sum(X * G, axis=0)
+        val -= 0.5 * self.gamma * b * np.sum(G * G, axis=0)
+        return val, G
+
+    def Omega(self, T):
+        return 0.5 * self.gamma * np.sum(T ** 2)
+
+
+def dual_obj_grad(alpha, beta, a, b, C, regul):
+    """
+    Compute objective value and gradients of dual objective.
+
+    Parameters
+    ----------
+    alpha: array, shape = len(a)
+    beta: array, shape = len(b)
+        Current iterate of dual potentials.
+    a: array, shape = len(a)
+    b: array, shape = len(b)
+        Input histograms (should be non-negative and sum to 1).
+    C: array, shape = len(a) x len(b)
+        Ground cost matrix.
+    regul: Regularization object
+        Should implement a delta_Omega(X) method.
+
+    Returns
+    -------
+    obj: float
+        Objective value (higher is better).
+    grad_alpha: array, shape = len(a)
+        Gradient w.r.t. alpha.
+    grad_beta: array, shape = len(b)
+        Gradient w.r.t. beta.
+    """
+    obj = np.dot(alpha, a) + np.dot(beta, b)
+    grad_alpha = a.copy()
+    grad_beta = b.copy()
+
+    # X[:, j] = alpha + beta[j] - C[:, j]
+    X = alpha[:, np.newaxis] + beta - C
+
+    # val.shape = len(b)
+    # G.shape = len(a) x len(b)
+    val, G = regul.delta_Omega(X)
+
+    obj -= np.sum(val)
+    grad_alpha -= G.sum(axis=1)
+    grad_beta -= G.sum(axis=0)
+
+    return obj, grad_alpha, grad_beta
+
+
+def solve_dual(a, b, C, regul, method="L-BFGS-B", tol=1e-3, max_iter=500,
+               verbose=False):
+    """
+    Solve the "smoothed" dual objective.
+
+    Parameters
+    ----------
+    a: array, shape = len(a)
+    b: array, shape = len(b)
+        Input histograms (should be non-negative and sum to 1).
+    C: array, shape = len(a) x len(b)
+        Ground cost matrix.
+    regul: Regularization object
+        Should implement a delta_Omega(X) method.
+    method: str
+        Solver to be used (passed to `scipy.optimize.minimize`).
+    tol: float
+        Tolerance parameter.
+    max_iter: int
+        Maximum number of iterations.
+
+    Returns
+    -------
+    alpha: array, shape = len(a)
+    beta: array, shape = len(b)
+        Dual potentials.
+    """
+
+    def _func(params):
+        # Unpack alpha and beta.
+        alpha = params[:len(a)]
+        beta = params[len(a):]
+
+        obj, grad_alpha, grad_beta = dual_obj_grad(alpha, beta, a, b, C, regul)
+
+        # Pack grad_alpha and grad_beta.
+        grad = np.concatenate((grad_alpha, grad_beta))
+
+        # We need to maximize the dual.
+        return -obj, -grad
+
+    # Unfortunately, `minimize` only supports functions whose argument is a
+    # vector. So, we need to concatenate alpha and beta.
+    alpha_init = np.zeros(len(a))
+    beta_init = np.zeros(len(b))
+    params_init = np.concatenate((alpha_init, beta_init))
+
+    res = minimize(_func, params_init, method=method, jac=True,
+                   tol=tol, options=dict(maxiter=max_iter, disp=verbose))
+
+    alpha = res.x[:len(a)]
+    beta = res.x[len(a):]
+
+    return alpha, beta, res
+
+
+def semi_dual_obj_grad(alpha, a, b, C, regul):
+    """
+    Compute objective value and gradient of semi-dual objective.
+
+    Parameters
+    ----------
+    alpha: array, shape = len(a)
+        Current iterate of semi-dual potentials.
+    a: array, shape = len(a)
+    b: array, shape = len(b)
+        Input histograms (should be non-negative and sum to 1).
+    C: array, shape = len(a) x len(b)
+        Ground cost matrix.
+    regul: Regularization object
+        Should implement a max_Omega(X) method.
+
+    Returns
+    -------
+    obj: float
+        Objective value (higher is better).
+    grad: array, shape = len(a)
+        Gradient w.r.t. alpha.
+    """
+    obj = np.dot(alpha, a)
+    grad = a.copy()
+
+    # X[:, j] = alpha - C[:, j]
+    X = alpha[:, np.newaxis] - C
+
+    # val.shape = len(b)
+    # G.shape = len(a) x len(b)
+    val, G = regul.max_Omega(X, b)
+
+    obj -= np.dot(b, val)
+    grad -= np.dot(G, b)
+
+    return obj, grad
+
+
+def solve_semi_dual(a, b, C, regul, method="L-BFGS-B", tol=1e-3, max_iter=500,
+                    verbose=False):
+    """
+    Solve the "smoothed" semi-dual objective.
+
+    Parameters
+    ----------
+    a: array, shape = len(a)
+    b: array, shape = len(b)
+        Input histograms (should be non-negative and sum to 1).
+    C: array, shape = len(a) x len(b)
+        Ground cost matrix.
+    regul: Regularization object
+        Should implement a max_Omega(X) method.
+    method: str
+        Solver to be used (passed to `scipy.optimize.minimize`).
+    tol: float
+        Tolerance parameter.
+    max_iter: int
+        Maximum number of iterations.
+
+    Returns
+    -------
+    alpha: array, shape = len(a)
+        Semi-dual potentials.
+    """
+
+    def _func(alpha):
+        obj, grad = semi_dual_obj_grad(alpha, a, b, C, regul)
+        # We need to maximize the semi-dual.
+        return -obj, -grad
+
+    alpha_init = np.zeros(len(a))
+
+    res = minimize(_func, alpha_init, method=method, jac=True,
+                   tol=tol, options=dict(maxiter=max_iter, disp=verbose))
+
+    return res.x, res
+
+
+def get_plan_from_dual(alpha, beta, C, regul):
+    """
+    Retrieve optimal transportation plan from optimal dual potentials.
+
+    Parameters
+    ----------
+    alpha: array, shape = len(a)
+    beta: array, shape = len(b)
+        Optimal dual potentials.
+    C: array, shape = len(a) x len(b)
+        Ground cost matrix.
+    regul: Regularization object
+        Should implement a delta_Omega(X) method.
+
+    Returns
+    -------
+    T: array, shape = len(a) x len(b)
+        Optimal transportation plan.
+    """
+    X = alpha[:, np.newaxis] + beta - C
+    return regul.delta_Omega(X)[1]
+
+
+def get_plan_from_semi_dual(alpha, b, C, regul):
+    """
+    Retrieve optimal transportation plan from optimal semi-dual potentials.
+
+    Parameters
+    ----------
+    alpha: array, shape = len(a)
+        Optimal semi-dual potentials.
+    b: array, shape = len(b)
+        Second input histogram (should be non-negative and sum to 1).
+    C: array, shape = len(a) x len(b)
+        Ground cost matrix.
+    regul: Regularization object
+        Should implement a delta_Omega(X) method.
+
+    Returns
+    -------
+    T: array, shape = len(a) x len(b)
+        Optimal transportation plan.
+    """
+    X = alpha[:, np.newaxis] - C
+    return regul.max_Omega(X, b)[1] * b
+
+
+def smooth_ot_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=1e-9,
+                   numItermax=500, verbose=False, log=False):
+    r"""
+    Solve the regularized OT problem in the dual and return the OT matrix
+
+    The function solves the smooth relaxed dual formulation (7) in [17]_ :
+
+    .. math::
+        \max_{\alpha,\beta}\quad a^T\alpha+b^T\beta-\sum_j\delta_\Omega(\alpha+\beta_j-\mathbf{m}_j)
+
+    where :
+
+    - :math:`\mathbf{m}_j` is the jth column of the cost matrix
+    - :math:`\delta_\Omega` is the convex conjugate of the regularization term :math:`\Omega`
+    - a and b are source and target weights (sum to 1)
+
+    The OT matrix can is reconstructed from the gradient of :math:`\delta_\Omega`
+    (See [17]_ Proposition 1).
+    The optimization algorithm is using gradient decent (L-BFGS by default).
+
+
+    Parameters
+    ----------
+    a : np.ndarray (ns,)
+        samples weights in the source domain
+    b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+        samples in the target domain, compute sinkhorn with multiple targets
+        and fixed M if b is a matrix (return OT loss + dual variables in log)
+    M : np.ndarray (ns,nt)
+        loss matrix
+    reg : float
+        Regularization term >0
+    reg_type : str
+        Regularization type,  can be the following (default ='l2'):
+        - 'kl' : Kullback Leibler (~ Neg-entropy used in sinkhorn [2]_)
+        - 'l2' : Squared Euclidean regularization
+    method : str
+        Solver to use for scipy.optimize.minimize
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshol on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+
+    Returns
+    -------
+    gamma : (ns x nt) ndarray
+        Optimal transportation matrix for the given parameters
+    log : dict
+        log dictionary return only if log==True in parameters
+
+
+    References
+    ----------
+
+    .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+
+    .. [17] Blondel, M., Seguy, V., & Rolet, A. (2018). Smooth and Sparse Optimal Transport. Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS).
+
+    See Also
+    --------
+    ot.lp.emd : Unregularized OT
+    ot.sinhorn : Entropic regularized OT
+    ot.optim.cg : General regularized OT
+
+    """
+
+    if reg_type.lower() in ['l2', 'squaredl2']:
+        regul = SquaredL2(gamma=reg)
+    elif reg_type.lower() in ['entropic', 'negentropy', 'kl']:
+        regul = NegEntropy(gamma=reg)
+    else:
+        raise NotImplementedError('Unknown regularization')
+
+    # solve dual
+    alpha, beta, res = solve_dual(a, b, M, regul, max_iter=numItermax,
+                                  tol=stopThr, verbose=verbose)
+
+    # reconstruct transport matrix
+    G = get_plan_from_dual(alpha, beta, M, regul)
+
+    if log:
+        log = {'alpha': alpha, 'beta': beta, 'res': res}
+        return G, log
+    else:
+        return G
+
+
+def smooth_ot_semi_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=1e-9,
+                        numItermax=500, verbose=False, log=False):
+    r"""
+    Solve the regularized OT problem in the semi-dual and return the OT matrix
+
+    The function solves the smooth relaxed dual formulation (10) in [17]_ :
+
+    .. math::
+        \max_{\alpha}\quad a^T\alpha-OT_\Omega^*(\alpha,b)
+
+    where :
+
+    .. math::
+        OT_\Omega^*(\alpha,b)=\sum_j b_j
+
+    - :math:`\mathbf{m}_j` is the jth column of the cost matrix
+    - :math:`OT_\Omega^*(\alpha,b)` is defined in Eq. (9) in [17]
+    - a and b are source and target weights (sum to 1)
+
+    The OT matrix can is reconstructed using [17]_ Proposition 2.
+    The optimization algorithm is using gradient decent (L-BFGS by default).
+
+
+    Parameters
+    ----------
+    a : np.ndarray (ns,)
+        samples weights in the source domain
+    b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+        samples in the target domain, compute sinkhorn with multiple targets
+        and fixed M if b is a matrix (return OT loss + dual variables in log)
+    M : np.ndarray (ns,nt)
+        loss matrix
+    reg : float
+        Regularization term >0
+    reg_type : str
+        Regularization type,  can be the following (default ='l2'):
+        - 'kl' : Kullback Leibler (~ Neg-entropy used in sinkhorn [2]_)
+        - 'l2' : Squared Euclidean regularization
+    method : str
+        Solver to use for scipy.optimize.minimize
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshol on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+
+    Returns
+    -------
+    gamma : (ns x nt) ndarray
+        Optimal transportation matrix for the given parameters
+    log : dict
+        log dictionary return only if log==True in parameters
+
+
+    References
+    ----------
+
+    .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+
+    .. [17] Blondel, M., Seguy, V., & Rolet, A. (2018). Smooth and Sparse Optimal Transport. Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS).
+
+    See Also
+    --------
+    ot.lp.emd : Unregularized OT
+    ot.sinhorn : Entropic regularized OT
+    ot.optim.cg : General regularized OT
+
+    """
+    if reg_type.lower() in ['l2', 'squaredl2']:
+        regul = SquaredL2(gamma=reg)
+    elif reg_type.lower() in ['entropic', 'negentropy', 'kl']:
+        regul = NegEntropy(gamma=reg)
+    else:
+        raise NotImplementedError('Unknown regularization')
+
+    # solve dual
+    alpha, res = solve_semi_dual(a, b, M, regul, max_iter=numItermax,
+                                 tol=stopThr, verbose=verbose)
+
+    # reconstruct transport matrix
+    G = get_plan_from_semi_dual(alpha, b, M, regul)
+
+    if log:
+        log = {'alpha': alpha, 'res': res}
+        return G, log
+    else:
+        return G
diff --git a/ot/stochastic.py b/ot/stochastic.py
new file mode 100644
index 0000000..5e8206e
--- /dev/null
+++ b/ot/stochastic.py
@@ -0,0 +1,800 @@
+# Author: Kilian Fatras <kilian.fatras@gmail.com>
+#
+# License: MIT License
+
+import numpy as np
+
+
+##############################################################################
+# Optimization toolbox for SEMI - DUAL problems
+##############################################################################
+
+
+def coordinate_grad_semi_dual(b, M, reg, beta, i):
+    '''
+    Compute the coordinate gradient update for regularized discrete
+        distributions for (i, :)
+
+    The function computes the gradient of the semi dual problem:
+
+    .. math::
+        \W_\varepsilon(a, b) = \max_\v \sum_i (\sum_j v_j * b_j
+            - \reg log(\sum_j exp((v_j - M_{i,j})/reg) * b_j)) * a_i
+
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - v is a dual variable in R^J
+    - reg is the regularization term
+    - a and b are source and target weights (sum to 1)
+
+    The algorithm used for solving the problem is the ASGD & SAG algorithms
+    as proposed in [18]_ [alg.1 & alg.2]
+
+
+    Parameters
+    ----------
+
+    b : np.ndarray(nt,),
+        target measure
+    M : np.ndarray(ns, nt),
+        cost matrix
+    reg : float nu,
+        Regularization term > 0
+    v : np.ndarray(nt,),
+        optimization vector
+    i : number int,
+        picked number i
+
+    Returns
+    -------
+
+    coordinate gradient : np.ndarray(nt,)
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 300000
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> method = "ASGD"
+    >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+                                                      method, numItermax)
+    >>> print(asgd_pi)
+
+    References
+    ----------
+
+    [Genevay et al., 2016] :
+                    Stochastic Optimization for Large-scale Optimal Transport,
+                     Advances in Neural Information Processing Systems (2016),
+                      arXiv preprint arxiv:1605.08527.
+
+    '''
+
+    r = M[i, :] - beta
+    exp_beta = np.exp(-r / reg) * b
+    khi = exp_beta / (np.sum(exp_beta))
+    return b - khi
+
+
+def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
+    '''
+    Compute the SAG algorithm to solve the regularized discrete measures
+        optimal transport max problem
+
+    The function solves the following optimization problem:
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma \geq 0
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term
+        :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are source and target weights (sum to 1)
+    The algorithm used for solving the problem is the SAG algorithm
+    as proposed in [18]_ [alg.1]
+
+
+    Parameters
+    ----------
+
+    a : np.ndarray(ns,),
+        source measure
+    b : np.ndarray(nt,),
+        target measure
+    M : np.ndarray(ns, nt),
+        cost matrix
+    reg : float number,
+        Regularization term > 0
+    numItermax : int number
+        number of iteration
+    lr : float number
+        learning rate
+
+    Returns
+    -------
+
+    v : np.ndarray(nt,)
+        dual variable
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 300000
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> method = "ASGD"
+    >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+                                                      method, numItermax)
+    >>> print(asgd_pi)
+
+    References
+    ----------
+
+    [Genevay et al., 2016] :
+                    Stochastic Optimization for Large-scale Optimal Transport,
+                     Advances in Neural Information Processing Systems (2016),
+                      arXiv preprint arxiv:1605.08527.
+    '''
+
+    if lr is None:
+        lr = 1. / max(a / reg)
+    n_source = np.shape(M)[0]
+    n_target = np.shape(M)[1]
+    cur_beta = np.zeros(n_target)
+    stored_gradient = np.zeros((n_source, n_target))
+    sum_stored_gradient = np.zeros(n_target)
+    for _ in range(numItermax):
+        i = np.random.randint(n_source)
+        cur_coord_grad = a[i] * coordinate_grad_semi_dual(b, M, reg,
+                                                          cur_beta, i)
+        sum_stored_gradient += (cur_coord_grad - stored_gradient[i])
+        stored_gradient[i] = cur_coord_grad
+        cur_beta += lr * (1. / n_source) * sum_stored_gradient
+    return cur_beta
+
+
+def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
+    '''
+    Compute the ASGD algorithm to solve the regularized semi contibous measures
+        optimal transport max problem
+
+    The function solves the following optimization problem:
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma \geq 0
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term
+        :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are source and target weights (sum to 1)
+    The algorithm used for solving the problem is the ASGD algorithm
+    as proposed in [18]_ [alg.2]
+
+
+    Parameters
+    ----------
+
+    b : np.ndarray(nt,),
+        target measure
+    M : np.ndarray(ns, nt),
+        cost matrix
+    reg : float number,
+        Regularization term > 0
+    numItermax : int number
+        number of iteration
+    lr : float number
+        learning rate
+
+
+    Returns
+    -------
+
+    ave_v : np.ndarray(nt,)
+        optimization vector
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 300000
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> method = "ASGD"
+    >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+                                                      method, numItermax)
+    >>> print(asgd_pi)
+
+    References
+    ----------
+
+    [Genevay et al., 2016] :
+                    Stochastic Optimization for Large-scale Optimal Transport,
+                     Advances in Neural Information Processing Systems (2016),
+                      arXiv preprint arxiv:1605.08527.
+    '''
+
+    if lr is None:
+        lr = 1. / max(a / reg)
+    n_source = np.shape(M)[0]
+    n_target = np.shape(M)[1]
+    cur_beta = np.zeros(n_target)
+    ave_beta = np.zeros(n_target)
+    for cur_iter in range(numItermax):
+        k = cur_iter + 1
+        i = np.random.randint(n_source)
+        cur_coord_grad = coordinate_grad_semi_dual(b, M, reg, cur_beta, i)
+        cur_beta += (lr / np.sqrt(k)) * cur_coord_grad
+        ave_beta = (1. / k) * cur_beta + (1 - 1. / k) * ave_beta
+    return ave_beta
+
+
+def c_transform_entropic(b, M, reg, beta):
+    '''
+    The goal is to recover u from the c-transform.
+
+    The function computes the c_transform of a dual variable from the other
+    dual variable:
+
+    .. math::
+        u = v^{c,reg} = -reg \sum_j exp((v - M)/reg) b_j
+
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - u, v are dual variables in R^IxR^J
+    - reg is the regularization term
+
+    It is used to recover an optimal u from optimal v solving the semi dual
+    problem, see Proposition 2.1 of [18]_
+
+
+    Parameters
+    ----------
+
+    b : np.ndarray(nt,)
+        target measure
+    M : np.ndarray(ns, nt)
+        cost matrix
+    reg : float
+        regularization term > 0
+    v : np.ndarray(nt,)
+        dual variable
+
+    Returns
+    -------
+
+    u : np.ndarray(ns,)
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 300000
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> method = "ASGD"
+    >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+                                                      method, numItermax)
+    >>> print(asgd_pi)
+
+    References
+    ----------
+
+    [Genevay et al., 2016] :
+                    Stochastic Optimization for Large-scale Optimal Transport,
+                     Advances in Neural Information Processing Systems (2016),
+                      arXiv preprint arxiv:1605.08527.
+    '''
+
+    n_source = np.shape(M)[0]
+    alpha = np.zeros(n_source)
+    for i in range(n_source):
+        r = M[i, :] - beta
+        min_r = np.min(r)
+        exp_beta = np.exp(-(r - min_r) / reg) * b
+        alpha[i] = min_r - reg * np.log(np.sum(exp_beta))
+    return alpha
+
+
+def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
+                                log=False):
+    '''
+    Compute the transportation matrix to solve the regularized discrete
+        measures optimal transport max problem
+
+    The function solves the following optimization problem:
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma \geq 0
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term
+        :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are source and target weights (sum to 1)
+    The algorithm used for solving the problem is the SAG or ASGD algorithms
+    as proposed in [18]_
+
+
+    Parameters
+    ----------
+
+    a : np.ndarray(ns,),
+        source measure
+    b : np.ndarray(nt,),
+        target measure
+    M : np.ndarray(ns, nt),
+        cost matrix
+    reg : float number,
+        Regularization term > 0
+    methode : str,
+        used method (SAG or ASGD)
+    numItermax : int number
+        number of iteration
+    lr : float number
+        learning rate
+    n_source : int number
+        size of the source measure
+    n_target : int number
+        size of the target measure
+    log : bool, optional
+        record log if True
+
+    Returns
+    -------
+
+    pi : np.ndarray(ns, nt)
+        transportation matrix
+    log : dict
+        log dictionary return only if log==True in parameters
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 300000
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> method = "ASGD"
+    >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+                                                      method, numItermax)
+    >>> print(asgd_pi)
+
+    References
+    ----------
+
+    [Genevay et al., 2016] :
+                    Stochastic Optimization for Large-scale Optimal Transport,
+                     Advances in Neural Information Processing Systems (2016),
+                      arXiv preprint arxiv:1605.08527.
+    '''
+
+    if method.lower() == "sag":
+        opt_beta = sag_entropic_transport(a, b, M, reg, numItermax, lr)
+    elif method.lower() == "asgd":
+        opt_beta = averaged_sgd_entropic_transport(a, b, M, reg, numItermax, lr)
+    else:
+        print("Please, select your method between SAG and ASGD")
+        return None
+
+    opt_alpha = c_transform_entropic(b, M, reg, opt_beta)
+    pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) *
+          a[:, None] * b[None, :])
+
+    if log:
+        log = {}
+        log['alpha'] = opt_alpha
+        log['beta'] = opt_beta
+        return pi, log
+    else:
+        return pi
+
+
+##############################################################################
+# Optimization toolbox for DUAL problems
+##############################################################################
+
+
+def batch_grad_dual_alpha(M, reg, alpha, beta, batch_size, batch_alpha,
+                          batch_beta):
+    '''
+    Computes the partial gradient of F_\W_varepsilon
+
+    Compute the partial gradient of the dual problem:
+
+    ..math:
+        \forall i in batch_alpha,
+            grad_alpha_i = 1 * batch_size -
+                    sum_{j in batch_beta} exp((alpha_i + beta_j - M_{i,j})/reg)
+
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - alpha, beta are dual variables in R^ixR^J
+    - reg is the regularization term
+    - batch_alpha and batch_beta are list of index
+
+    The algorithm used for solving the dual problem is the SGD algorithm
+    as proposed in [19]_ [alg.1]
+
+    Parameters
+    ----------
+
+    reg : float number,
+        Regularization term > 0
+    M : np.ndarray(ns, nt),
+        cost matrix
+    alpha : np.ndarray(ns,)
+        dual variable
+    beta : np.ndarray(nt,)
+        dual variable
+    batch_size : int number
+        size of the batch
+    batch_alpha : np.ndarray(bs,)
+        batch of index of alpha
+    batch_beta : np.ndarray(bs,)
+        batch of index of beta
+
+    Returns
+    -------
+
+    grad : np.ndarray(ns,)
+        partial grad F in alpha
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 20000
+    >>> lr = 0.1
+    >>> batch_size = 3
+    >>> log = True
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+                                                            batch_size,
+                                                            numItermax, lr, log)
+    >>> print(log['alpha'], log['beta'])
+    >>> print(sgd_dual_pi)
+
+    References
+    ----------
+
+    [Seguy et al., 2018] :
+                    International Conference on Learning Representation (2018),
+                      arXiv preprint arxiv:1711.02283.
+    '''
+
+    grad_alpha = np.zeros(batch_size)
+    grad_alpha[:] = batch_size
+    for j in batch_beta:
+        grad_alpha -= np.exp((alpha[batch_alpha] + beta[j] -
+                              M[batch_alpha, j]) / reg)
+    return grad_alpha
+
+
+def batch_grad_dual_beta(M, reg, alpha, beta, batch_size, batch_alpha,
+                         batch_beta):
+    '''
+    Computes the partial gradient of F_\W_varepsilon
+
+    Compute the partial gradient of the dual problem:
+
+    ..math:
+        \forall j in batch_beta,
+            grad_beta_j = 1 * batch_size -
+                sum_{i in batch_alpha} exp((alpha_i + beta_j - M_{i,j})/reg)
+
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - alpha, beta are dual variables in R^ixR^J
+    - reg is the regularization term
+    - batch_alpha and batch_beta are list of index
+
+    The algorithm used for solving the dual problem is the SGD algorithm
+    as proposed in [19]_ [alg.1]
+
+    Parameters
+    ----------
+
+    M : np.ndarray(ns, nt),
+        cost matrix
+    reg : float number,
+        Regularization term > 0
+    alpha : np.ndarray(ns,)
+        dual variable
+    beta : np.ndarray(nt,)
+        dual variable
+    batch_size : int number
+        size of the batch
+    batch_alpha : np.ndarray(bs,)
+        batch of index of alpha
+    batch_beta : np.ndarray(bs,)
+        batch of index of beta
+
+    Returns
+    -------
+
+    grad : np.ndarray(ns,)
+        partial grad F in beta
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 20000
+    >>> lr = 0.1
+    >>> batch_size = 3
+    >>> log = True
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+                                                            batch_size,
+                                                            numItermax, lr, log)
+    >>> print(log['alpha'], log['beta'])
+    >>> print(sgd_dual_pi)
+
+    References
+    ----------
+
+    [Seguy et al., 2018] :
+                    International Conference on Learning Representation (2018),
+                      arXiv preprint arxiv:1711.02283.
+
+    '''
+
+    grad_beta = np.zeros(batch_size)
+    grad_beta[:] = batch_size
+    for i in batch_alpha:
+        grad_beta -= np.exp((alpha[i] +
+                             beta[batch_beta] - M[i, batch_beta]) / reg)
+    return grad_beta
+
+
+def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr,
+                                alternate=True):
+    '''
+    Compute the sgd algorithm to solve the regularized discrete measures
+        optimal transport dual problem
+
+    The function solves the following optimization problem:
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma \geq 0
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term
+        :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are source and target weights (sum to 1)
+
+    Parameters
+    ----------
+
+    M : np.ndarray(ns, nt),
+        cost matrix
+    reg : float number,
+        Regularization term > 0
+    batch_size : int number
+        size of the batch
+    numItermax : int number
+        number of iteration
+    lr : float number
+        learning rate
+    alternate : bool, optional
+        alternating algorithm
+
+    Returns
+    -------
+
+    alpha : np.ndarray(ns,)
+        dual variable
+    beta : np.ndarray(nt,)
+        dual variable
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 20000
+    >>> lr = 0.1
+    >>> batch_size = 3
+    >>> log = True
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+                                                            batch_size,
+                                                            numItermax, lr, log)
+    >>> print(log['alpha'], log['beta'])
+    >>> print(sgd_dual_pi)
+
+    References
+    ----------
+
+    [Seguy et al., 2018] :
+                    International Conference on Learning Representation (2018),
+                      arXiv preprint arxiv:1711.02283.
+    '''
+
+    n_source = np.shape(M)[0]
+    n_target = np.shape(M)[1]
+    cur_alpha = np.random.randn(n_source)
+    cur_beta = np.random.randn(n_target)
+    if alternate:
+        for cur_iter in range(numItermax):
+            k = np.sqrt(cur_iter + 1)
+            batch_alpha = np.random.choice(n_source, batch_size, replace=False)
+            batch_beta = np.random.choice(n_target, batch_size, replace=False)
+            grad_F_alpha = batch_grad_dual_alpha(M, reg, cur_alpha, cur_beta,
+                                                 batch_size, batch_alpha,
+                                                 batch_beta)
+            cur_alpha[batch_alpha] += (lr / k) * grad_F_alpha
+            grad_F_beta = batch_grad_dual_beta(M, reg, cur_alpha, cur_beta,
+                                               batch_size, batch_alpha,
+                                               batch_beta)
+            cur_beta[batch_beta] += (lr / k) * grad_F_beta
+
+    else:
+        for cur_iter in range(numItermax):
+            k = np.sqrt(cur_iter + 1)
+            batch_alpha = np.random.choice(n_source, batch_size, replace=False)
+            batch_beta = np.random.choice(n_target, batch_size, replace=False)
+            grad_F_alpha = batch_grad_dual_alpha(M, reg, cur_alpha, cur_beta,
+                                                 batch_size, batch_alpha,
+                                                 batch_beta)
+            grad_F_beta = batch_grad_dual_beta(M, reg, cur_alpha, cur_beta,
+                                               batch_size, batch_alpha,
+                                               batch_beta)
+            cur_alpha[batch_alpha] += (lr / k) * grad_F_alpha
+            cur_beta[batch_beta] += (lr / k) * grad_F_beta
+
+    return cur_alpha, cur_beta
+
+
+def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
+                        log=False):
+    '''
+    Compute the transportation matrix to solve the regularized discrete measures
+        optimal transport dual problem
+
+    The function solves the following optimization problem:
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma \geq 0
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term
+        :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are source and target weights (sum to 1)
+
+    Parameters
+    ----------
+
+    a : np.ndarray(ns,),
+        source measure
+    b : np.ndarray(nt,),
+        target measure
+    M : np.ndarray(ns, nt),
+        cost matrix
+    reg : float number,
+        Regularization term > 0
+    batch_size : int number
+        size of the batch
+    numItermax : int number
+        number of iteration
+    lr : float number
+        learning rate
+    log : bool, optional
+        record log if True
+
+    Returns
+    -------
+
+    pi : np.ndarray(ns, nt)
+        transportation matrix
+    log : dict
+        log dictionary return only if log==True in parameters
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 20000
+    >>> lr = 0.1
+    >>> batch_size = 3
+    >>> log = True
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+                                                            batch_size,
+                                                            numItermax, lr, log)
+    >>> print(log['alpha'], log['beta'])
+    >>> print(sgd_dual_pi)
+
+    References
+    ----------
+
+    [Seguy et al., 2018] :
+                    International Conference on Learning Representation (2018),
+                      arXiv preprint arxiv:1711.02283.
+    '''
+
+    opt_alpha, opt_beta = sgd_entropic_regularization(M, reg, batch_size,
+                                                      numItermax, lr)
+    pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) *
+          a[:, None] * b[None, :])
+    if log:
+        log = {}
+        log['alpha'] = opt_alpha
+        log['beta'] = opt_beta
+        return pi, log
+    else:
+        return pi
diff --git a/ot/utils.py b/ot/utils.py
index 7dac283..bb21b38 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -77,6 +77,34 @@ def clean_zeros(a, b, M):
     return a2, b2, M2
 
 
+def euclidean_distances(X, Y, squared=False):
+    """
+    Considering the rows of X (and Y=X) as vectors, compute the
+    distance matrix between each pair of vectors.
+    Parameters
+    ----------
+    X : {array-like}, shape (n_samples_1, n_features)
+    Y : {array-like}, shape (n_samples_2, n_features)
+    squared : boolean, optional
+        Return squared Euclidean distances.
+    Returns
+    -------
+    distances : {array}, shape (n_samples_1, n_samples_2)
+    """
+    XX = np.einsum('ij,ij->i', X, X)[:, np.newaxis]
+    YY = np.einsum('ij,ij->i', Y, Y)[np.newaxis, :]
+    distances = np.dot(X, Y.T)
+    distances *= -2
+    distances += XX
+    distances += YY
+    np.maximum(distances, 0, out=distances)
+    if X is Y:
+        # Ensure that distances between vectors and themselves are set to 0.0.
+        # This may not be the case due to floating point rounding errors.
+        distances.flat[::distances.shape[0] + 1] = 0.0
+    return distances if squared else np.sqrt(distances, out=distances)
+
+
 def dist(x1, x2=None, metric='sqeuclidean'):
     """Compute distance between samples in x1 and x2 using function scipy.spatial.distance.cdist
 
@@ -104,7 +132,8 @@ def dist(x1, x2=None, metric='sqeuclidean'):
     """
     if x2 is None:
         x2 = x1
-
+    if metric == "sqeuclidean":
+        return euclidean_distances(x1, x2, squared=True)
     return cdist(x1, x2, metric=metric)