20 files changed, 98 insertions, 78 deletions
diff --git a/ot/backend.py b/ot/backend.py
index 0dd6fb8..a82c448 100644
--- a/ot/backend.py
+++ b/ot/backend.py
@@ -27,7 +27,7 @@ Examples
         np_config.enable_numpy_behavior()
 
 Performance
---------
+-----------
 
 - CPU: Intel(R) Xeon(R) Gold 6248 CPU @ 2.50GHz
 - GPU: Tesla V100-SXM2-32GB
diff --git a/ot/bregman.py b/ot/bregman.py
index 20bef7e..4503ffc 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -150,7 +150,7 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, stopThr=1e-9,
     ot.bregman.sinkhorn_knopp : Classic Sinkhorn :ref:`[2] <references-sinkhorn>`
     ot.bregman.sinkhorn_stabilized: Stabilized sinkhorn
         :ref:`[9] <references-sinkhorn>` :ref:`[10] <references-sinkhorn>`
-    ot.bregman.sinkhorn_epsilon_scaling: Sinkhorn with epslilon scaling
+    ot.bregman.sinkhorn_epsilon_scaling: Sinkhorn with epsilon scaling
         :ref:`[9] <references-sinkhorn>` :ref:`[10] <references-sinkhorn>`
 
     """
@@ -384,6 +384,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
              \gamma^T \mathbf{1} &= \mathbf{b}
 
              \gamma &\geq 0
+
     where :
 
     - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
@@ -572,6 +573,7 @@ def sinkhorn_log(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False,
              \gamma^T \mathbf{1} &= \mathbf{b}
 
              \gamma &\geq 0
+
     where :
 
     - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
@@ -784,6 +786,7 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False,
              \gamma^T \mathbf{1} &= \mathbf{b}
 
              \gamma &\geq 0
+
     where :
 
     - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
@@ -950,6 +953,7 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
              \gamma^T \mathbf{1} &= \mathbf{b}
 
              \gamma &\geq 0
+
     where :
 
     - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
@@ -2657,7 +2661,7 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
     ----------
 
     .. [4] S. Nakhostin, N. Courty, R. Flamary, D. Tuia, T. Corpetti,
-        Supervised planetary unmixing with optimal transport, Whorkshop
+        Supervised planetary unmixing with optimal transport, Workshop
         on Hyperspectral Image and Signal Processing :
         Evolution in Remote Sensing (WHISPERS), 2016.
     """
@@ -2908,6 +2912,7 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
              \gamma^T \mathbf{1} &= \mathbf{b}
 
              \gamma &\geq 0
+
     where :
 
     - :math:`\mathbf{M}` is the (`n_samples_a`, `n_samples_b`) metric cost matrix
@@ -3104,6 +3109,7 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
              \gamma^T \mathbf{1} &= \mathbf{b}
 
              \gamma &\geq 0
+
     where :
 
     - :math:`\mathbf{M}` is the (`n_samples_a`, `n_samples_b`) metric cost matrix
@@ -3257,7 +3263,6 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
     sinkhorn divergence :math:`S`:
 
     .. math::
-
         W &= \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
         \mathrm{reg} \cdot\Omega(\gamma)
 
@@ -3287,6 +3292,7 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
              \gamma_b^T \mathbf{1} &= \mathbf{b}
 
              \gamma_b &\geq 0
+
     where :
 
     - :math:`\mathbf{M}` (resp. :math:`\mathbf{M_a}`, :math:`\mathbf{M_b}`)
@@ -3352,7 +3358,7 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
     ----------
     .. [23] Aude Genevay, Gabriel Peyré, Marco Cuturi, Learning Generative
         Models with Sinkhorn Divergences,  Proceedings of the Twenty-First
-        International Conference on Artficial Intelligence and Statistics,
+        International Conference on Artificial Intelligence and Statistics,
         (AISTATS) 21, 2018
     '''
     X_s, X_t = list_to_array(X_s, X_t)
diff --git a/ot/coot.py b/ot/coot.py
index 66dd2c8..477529f 100644
--- a/ot/coot.py
+++ b/ot/coot.py
@@ -74,7 +74,7 @@ def co_optimal_transport(X, Y, wx_samp=None, wx_feat=None, wy_samp=None, wy_feat
         Sinkhorn solver. If epsilon is scalar, then the same epsilon is applied to
         both regularization of sample and feature couplings.
     alpha : scalar or indexable object of length 2, float or int, optional (default = 0)
-        Coeffficient parameter of linear terms with respect to the sample and feature couplings.
+        Coefficient parameter of linear terms with respect to the sample and feature couplings.
         If alpha is scalar, then the same alpha is applied to both linear terms.
     M_samp : (n_sample_x, n_sample_y), float, optional (default = None)
         Sample matrix with respect to the linear term on sample coupling.
@@ -295,7 +295,7 @@ def co_optimal_transport2(X, Y, wx_samp=None, wx_feat=None, wy_samp=None, wy_fea
         + \varepsilon_1 \mathbf{KL}(\mathbf{P} | \mathbf{w}_{xs} \mathbf{w}_{ys}^T)
         + \varepsilon_2 \mathbf{KL}(\mathbf{Q} | \mathbf{w}_{xf} \mathbf{w}_{yf}^T)
 
-    Where :
+    where :
 
     - :math:`\mathbf{X}`: Data matrix in the source space
     - :math:`\mathbf{Y}`: Data matrix in the target space
@@ -333,7 +333,7 @@ def co_optimal_transport2(X, Y, wx_samp=None, wx_feat=None, wy_samp=None, wy_fea
         Sinkhorn solver. If epsilon is scalar, then the same epsilon is applied to
         both regularization of sample and feature couplings.
     alpha : scalar or indexable object of length 2, float or int, optional (default = 0)
-        Coeffficient parameter of linear terms with respect to the sample and feature couplings.
+        Coefficient parameter of linear terms with respect to the sample and feature couplings.
         If alpha is scalar, then the same alpha is applied to both linear terms.
     M_samp : (n_sample_x, n_sample_y), float, optional (default = None)
         Sample matrix with respect to the linear term on sample coupling.
@@ -345,7 +345,6 @@ def co_optimal_transport2(X, Y, wx_samp=None, wx_feat=None, wy_samp=None, wy_fea
             tuples of 2 vectors of size (n_sample_x, n_sample_y) and (n_feature_x, n_feature_y).
             Initialization of sample and feature dual vectors
             if using Sinkhorn algorithm. Zero vectors by default.
-
             - "pi_sample" and "pi_feature" whose values are matrices
             of size (n_sample_x, n_sample_y) and (n_feature_x, n_feature_y).
             Initialization of sample and feature couplings.
@@ -382,7 +381,7 @@ def co_optimal_transport2(X, Y, wx_samp=None, wx_feat=None, wy_samp=None, wy_fea
     float
         CO-Optimal Transport distance.
     dict
-        Contains logged informations from :any:`co_optimal_transport` solver.
+        Contains logged information from :any:`co_optimal_transport` solver.
         Only returned if `log` parameter is True
 
     References
diff --git a/ot/da.py b/ot/da.py
index 5067a69..886b7ee 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -28,7 +28,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
                      numInnerItermax=200, stopInnerThr=1e-9, verbose=False,
                      log=False):
     r"""
-    Solve the entropic regularization optimal transport problem with nonconvex
+    Solve the entropic regularization optimal transport problem with non-convex
     group lasso regularization
 
     The function solves the following optimization problem:
@@ -172,13 +172,13 @@ def sinkhorn_l1l2_gl(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
     - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
     - :math:`\Omega_e` is the entropic regularization term
       :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-    - :math:`\Omega_g` is the group lasso regulaization term
+    - :math:`\Omega_g` is the group lasso regularization term
       :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^2`
       where  :math:`\mathcal{I}_c` are the index of samples from class
       `c` in the source domain.
     - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
 
-    The algorithm used for solving the problem is the generalised conditional
+    The algorithm used for solving the problem is the generalized conditional
     gradient as proposed in :ref:`[5, 7] <references-sinkhorn-l1l2-gl>`.
 
 
@@ -296,7 +296,7 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False,
     material of :ref:`[8] <references-joint-OT-mapping-linear>`) using the bias optional argument.
 
     The algorithm used for solving the problem is the block coordinate
-    descent that alternates between updates of :math:`\mathbf{G}` (using conditionnal gradient)
+    descent that alternates between updates of :math:`\mathbf{G}` (using conditional gradient)
     and the update of :math:`\mathbf{L}` using a classical least square solver.
 
 
@@ -494,7 +494,7 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
     material of :ref:`[8] <references-joint-OT-mapping-kernel>`) using the bias optional argument.
 
     The algorithm used for solving the problem is the block coordinate
-    descent that alternates between updates of :math:`\mathbf{G}` (using conditionnal gradient)
+    descent that alternates between updates of :math:`\mathbf{G}` (using conditional gradient)
     and the update of :math:`\mathbf{L}` using a classical kernel least square solver.
 
 
diff --git a/ot/datasets.py b/ot/datasets.py
index a839074..3d633f4 100644
--- a/ot/datasets.py
+++ b/ot/datasets.py
@@ -22,7 +22,7 @@ def make_1D_gauss(n, m, s):
     m : float
         mean value of the gaussian distribution
     s : float
-        standard deviaton of the gaussian distribution
+        standard deviation of the gaussian distribution
 
     Returns
     -------
diff --git a/ot/dr.py b/ot/dr.py
index b92cd14..47c8733 100644
--- a/ot/dr.py
+++ b/ot/dr.py
@@ -5,7 +5,7 @@ Dimension reduction with OT
 
 .. warning::
     Note that by default the module is not imported in :mod:`ot`. In order to
-    use it you need to explicitely import :mod:`ot.dr`
+    use it you need to explicitly import :mod:`ot.dr`
 
 """
 
@@ -83,7 +83,7 @@ def fda(X, y, p=2, reg=1e-16):
     y : ndarray, shape (n,)
         Labels for training samples.
     p : int, optional
-        Size of dimensionnality reduction.
+        Size of dimensionality reduction.
     reg : float, optional
         Regularization term >0 (ridge regularization)
 
@@ -164,7 +164,7 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, sinkhorn_method='sinkhorn', maxiter
     y : ndarray, shape (n,)
         Labels for training samples.
     p : int, optional
-        Size of dimensionnality reduction.
+        Size of dimensionality reduction.
     reg : float, optional
         Regularization term >0 (entropic regularization)
     solver : None | str, optional
@@ -175,7 +175,7 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, sinkhorn_method='sinkhorn', maxiter
     P0 : ndarray, shape (d, p)
         Initial starting point for projection.
     normalize : bool, optional
-        Normalise the Wasserstaiun distance by the average distance on P0 (default : False)
+        Normalize the Wasserstaiun distance by the average distance on P0 (default : False)
     verbose : int, optional
         Print information along iterations.
 
diff --git a/ot/gromov/_bregman.py b/ot/gromov/_bregman.py
index b0cccfb..aa25f1f 100644
--- a/ot/gromov/_bregman.py
+++ b/ot/gromov/_bregman.py
@@ -69,7 +69,7 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon, symmetric=None,
     symmetric : bool, optional
         Either C1 and C2 are to be assumed symmetric or not.
         If let to its default None value, a symmetry test will be conducted.
-        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric).
+        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric).
     G0: array-like, shape (ns,nt), optional
         If None the initial transport plan of the solver is pq^T.
         Otherwise G0 must satisfy marginal constraints and will be used as initial transport of the solver.
@@ -152,7 +152,7 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon, symmetric=None,
 def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon, symmetric=None, G0=None,
                                  max_iter=1000, tol=1e-9, verbose=False, log=False):
     r"""
-    Returns the entropic gromov-wasserstein discrepancy between the two measured similarity matrices :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
+    Returns the entropic Gromov-Wasserstein discrepancy between the two measured similarity matrices :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
 
     The function solves the following optimization problem:
 
@@ -194,7 +194,7 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon, symmetric=None
     symmetric : bool, optional
         Either C1 and C2 are to be assumed symmetric or not.
         If let to its default None value, a symmetry test will be conducted.
-        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric).
+        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric).
     G0: array-like, shape (ns,nt), optional
         If None the initial transport plan of the solver is pq^T.
         Otherwise G0 must satisfy marginal constraints and will be used as initial transport of the solver.
diff --git a/ot/gromov/_dictionary.py b/ot/gromov/_dictionary.py
index 5b32671..0d618d1 100644
--- a/ot/gromov/_dictionary.py
+++ b/ot/gromov/_dictionary.py
@@ -148,7 +148,7 @@ def gromov_wasserstein_dictionary_learning(Cs, D, nt, reg=0., ps=None, q=None, e
             Ts = [None] * batch_size
 
             for batch_idx, C_idx in enumerate(batch):
-                # BCD solver for Gromov-Wassersteisn linear unmixing used independently on each structure of the sampled batch
+                # BCD solver for Gromov-Wasserstein linear unmixing used independently on each structure of the sampled batch
                 unmixings[batch_idx], Cs_embedded[batch_idx], Ts[batch_idx], current_loss = gromov_wasserstein_linear_unmixing(
                     Cs[C_idx], Cdict, reg=reg, p=ps[C_idx], q=q, tol_outer=tol_outer, tol_inner=tol_inner,
                     max_iter_outer=max_iter_outer, max_iter_inner=max_iter_inner, symmetric=symmetric, **kwargs
@@ -252,7 +252,7 @@ def gromov_wasserstein_linear_unmixing(C, Cdict, reg=0., p=None, q=None, tol_out
     Returns
     -------
     w: array-like, shape (D,)
-        gromov-wasserstein linear unmixing of :math:`(\mathbf{C},\mathbf{p})` onto the span of the dictionary.
+        Gromov-Wasserstein linear unmixing of :math:`(\mathbf{C},\mathbf{p})` onto the span of the dictionary.
     Cembedded: array-like, shape (nt,nt)
         embedded structure of :math:`(\mathbf{C},\mathbf{p})` onto the dictionary, :math:`\sum_d w_d\mathbf{C_{dict}[d]}`.
     T: array-like (ns, nt)
@@ -559,7 +559,7 @@ def fused_gromov_wasserstein_dictionary_learning(Cs, Ys, D, nt, alpha, reg=0., p
         Feature matrices composing the dictionary.
         The dictionary leading to the best loss over an epoch is saved and returned.
     log: dict
-        If use_log is True, contains loss evolutions by batches and epoches.
+        If use_log is True, contains loss evolutions by batches and epochs.
     References
     -------
     .. [38] C. Vincent-Cuaz, T. Vayer, R. Flamary, M. Corneli, N. Courty, Online
@@ -634,7 +634,7 @@ def fused_gromov_wasserstein_dictionary_learning(Cs, Ys, D, nt, alpha, reg=0., p
             Ts = [None] * batch_size
 
             for batch_idx, C_idx in enumerate(batch):
-                # BCD solver for Gromov-Wassersteisn linear unmixing used independently on each structure of the sampled batch
+                # BCD solver for Gromov-Wasserstein linear unmixing used independently on each structure of the sampled batch
                 unmixings[batch_idx], Cs_embedded[batch_idx], Ys_embedded[batch_idx], Ts[batch_idx], current_loss = fused_gromov_wasserstein_linear_unmixing(
                     Cs[C_idx], Ys[C_idx], Cdict, Ydict, alpha, reg=reg, p=ps[C_idx], q=q,
                     tol_outer=tol_outer, tol_inner=tol_inner, max_iter_outer=max_iter_outer, max_iter_inner=max_iter_inner, symmetric=symmetric, **kwargs
@@ -736,7 +736,7 @@ def fused_gromov_wasserstein_linear_unmixing(C, Y, Cdict, Ydict, alpha, reg=0.,
     Returns
     -------
     w: array-like, shape (D,)
-        fused gromov-wasserstein linear unmixing of (C,Y,p) onto the span of the dictionary.
+        fused Gromov-Wasserstein linear unmixing of (C,Y,p) onto the span of the dictionary.
     Cembedded: array-like, shape (nt,nt)
         embedded structure of :math:`(\mathbf{C},\mathbf{Y}, \mathbf{p})` onto the dictionary, :math:`\sum_d w_d\mathbf{C_{dict}[d]}`.
     Yembedded: array-like, shape (nt,d)
diff --git a/ot/gromov/_gw.py b/ot/gromov/_gw.py
index bc4719d..cdfa9a3 100644
--- a/ot/gromov/_gw.py
+++ b/ot/gromov/_gw.py
@@ -26,7 +26,7 @@ from ._utils import update_square_loss, update_kl_loss
 def gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', symmetric=None, log=False, armijo=False, G0=None,
                        max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
     r"""
-    Returns the gromov-wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
+    Returns the Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
 
     The function solves the following optimization problem:
 
@@ -39,6 +39,7 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', symmetric=None, log
              \mathbf{\gamma}^T \mathbf{1} &= \mathbf{q}
 
              \mathbf{\gamma} &\geq 0
+
     Where :
 
     - :math:`\mathbf{C_1}`: Metric cost matrix in the source space
@@ -68,7 +69,7 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', symmetric=None, log
     symmetric : bool, optional
         Either C1 and C2 are to be assumed symmetric or not.
         If let to its default None value, a symmetry test will be conducted.
-        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric).
+        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric).
     verbose : bool, optional
         Print information along iterations
     log : bool, optional
@@ -170,7 +171,7 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', symmetric=None, log
 def gromov_wasserstein2(C1, C2, p, q, loss_fun='square_loss', symmetric=None, log=False, armijo=False, G0=None,
                         max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
     r"""
-    Returns the gromov-wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
+    Returns the Gromov-Wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
 
     The function solves the following optimization problem:
 
@@ -183,6 +184,7 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun='square_loss', symmetric=None, lo
              \mathbf{\gamma}^T \mathbf{1} &= \mathbf{q}
 
              \mathbf{\gamma} &\geq 0
+
     Where :
 
     - :math:`\mathbf{C_1}`: Metric cost matrix in the source space
@@ -216,7 +218,7 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun='square_loss', symmetric=None, lo
     symmetric : bool, optional
         Either C1 and C2 are to be assumed symmetric or not.
         If let to its default None value, a symmetry test will be conducted.
-        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric).
+        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric).
     verbose : bool, optional
         Print information along iterations
     log : bool, optional
@@ -241,7 +243,7 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun='square_loss', symmetric=None, lo
     gw_dist : float
         Gromov-Wasserstein distance
     log : dict
-        convergence information and Coupling marix
+        convergence information and Coupling matrix
 
     References
     ----------
@@ -310,6 +312,7 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', symmetric=
         which can lead to copy overhead on GPU arrays.
     .. note:: All computations in the conjugate gradient solver are done with
         numpy to limit memory overhead.
+
     The algorithm used for solving the problem is conditional gradient as discussed in :ref:`[24] <references-fused-gromov-wasserstein>`
 
     Parameters
@@ -329,7 +332,7 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', symmetric=
     symmetric : bool, optional
         Either C1 and C2 are to be assumed symmetric or not.
         If let to its default None value, a symmetry test will be conducted.
-        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric).
+        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric).
     alpha : float, optional
         Trade-off parameter (0 < alpha < 1)
     armijo : bool, optional
@@ -503,7 +506,7 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', symmetric
     Returns
     -------
     fgw-distance : float
-        Fused gromov wasserstein distance for the given parameters.
+        Fused Gromov-Wasserstein distance for the given parameters.
     log : dict
         Log dictionary return only if log==True in parameters.
 
diff --git a/ot/gromov/_semirelaxed.py b/ot/gromov/_semirelaxed.py
index 638bb1c..cb2bf28 100644
--- a/ot/gromov/_semirelaxed.py
+++ b/ot/gromov/_semirelaxed.py
@@ -21,7 +21,7 @@ from ._utils import init_matrix_semirelaxed, gwloss, gwggrad
 def semirelaxed_gromov_wasserstein(C1, C2, p, loss_fun='square_loss', symmetric=None, log=False, G0=None,
                                    max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs):
     r"""
-    Returns the semi-relaxed gromov-wasserstein divergence transport from :math:`(\mathbf{C_1}, \mathbf{p})` to :math:`\mathbf{C_2}`
+    Returns the semi-relaxed Gromov-Wasserstein divergence transport from :math:`(\mathbf{C_1}, \mathbf{p})` to :math:`\mathbf{C_2}`
 
     The function solves the following optimization problem:
 
@@ -32,6 +32,7 @@ def semirelaxed_gromov_wasserstein(C1, C2, p, loss_fun='square_loss', symmetric=
         s.t. \ \mathbf{\gamma} \mathbf{1} &= \mathbf{p}
 
              \mathbf{\gamma} &\geq 0
+
     Where :
 
     - :math:`\mathbf{C_1}`: Metric cost matrix in the source space
@@ -58,7 +59,7 @@ def semirelaxed_gromov_wasserstein(C1, C2, p, loss_fun='square_loss', symmetric=
     symmetric : bool, optional
         Either C1 and C2 are to be assumed symmetric or not.
         If let to its default None value, a symmetry test will be conducted.
-        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric).
+        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric).
     verbose : bool, optional
         Print information along iterations
     log : bool, optional
@@ -156,6 +157,7 @@ def semirelaxed_gromov_wasserstein2(C1, C2, p, loss_fun='square_loss', symmetric
         s.t. \ \mathbf{\gamma} \mathbf{1} &= \mathbf{p}
 
              \mathbf{\gamma} &\geq 0
+
     Where :
 
     - :math:`\mathbf{C_1}`: Metric cost matrix in the source space
@@ -166,6 +168,7 @@ def semirelaxed_gromov_wasserstein2(C1, C2, p, loss_fun='square_loss', symmetric
 
     Note that when using backends, this loss function is differentiable wrt the
     matrices (C1, C2) but not yet for the weights p.
+
     .. note:: This function is backend-compatible and will work on arrays
         from all compatible backends. However all the steps in the conditional
         gradient are not differentiable.
@@ -184,7 +187,7 @@ def semirelaxed_gromov_wasserstein2(C1, C2, p, loss_fun='square_loss', symmetric
     symmetric : bool, optional
         Either C1 and C2 are to be assumed symmetric or not.
         If let to its default None value, a symmetry test will be conducted.
-        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric).
+        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric).
     verbose : bool, optional
         Print information along iterations
     log : bool, optional
@@ -278,7 +281,7 @@ def semirelaxed_fused_gromov_wasserstein(M, C1, C2, p, loss_fun='square_loss', s
     symmetric : bool, optional
         Either C1 and C2 are to be assumed symmetric or not.
         If let to its default None value, a symmetry test will be conducted.
-        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric).
+        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric).
     alpha : float, optional
         Trade-off parameter (0 < alpha < 1)
     G0: array-like, shape (ns,nt), optional
@@ -415,7 +418,7 @@ def semirelaxed_fused_gromov_wasserstein2(M, C1, C2, p, loss_fun='square_loss',
     symmetric : bool, optional
         Either C1 and C2 are to be assumed symmetric or not.
         If let to its default None value, a symmetry test will be conducted.
-        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric).
+        Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric).
     alpha : float, optional
         Trade-off parameter (0 < alpha < 1)
     G0: array-like, shape (ns,nt), optional
@@ -435,7 +438,7 @@ def semirelaxed_fused_gromov_wasserstein2(M, C1, C2, p, loss_fun='square_loss',
     Returns
     -------
     srfgw-divergence : float
-        Semi-relaxed Fused gromov wasserstein divergence for the given parameters.
+        Semi-relaxed Fused Gromov-Wasserstein divergence for the given parameters.
     log : dict
         Log dictionary return only if log==True in parameters.
 
diff --git a/ot/gromov/_utils.py b/ot/gromov/_utils.py
index e842250..ef8cd88 100644
--- a/ot/gromov/_utils.py
+++ b/ot/gromov/_utils.py
@@ -20,7 +20,7 @@ def init_matrix(C1, C2, p, q, loss_fun='square_loss', nx=None):
     r"""Return loss matrices and tensors for Gromov-Wasserstein fast computation
 
     Returns the value of :math:`\mathcal{L}(\mathbf{C_1}, \mathbf{C_2}) \otimes \mathbf{T}` with the
-    selected loss function as the loss function of Gromow-Wasserstein discrepancy.
+    selected loss function as the loss function of Gromov-Wasserstein discrepancy.
 
     The matrices are computed as described in Proposition 1 in :ref:`[12] <references-init-matrix>`
 
@@ -195,7 +195,7 @@ def gwloss(constC, hC1, hC2, T, nx=None):
     Returns
     -------
     loss : float
-        Gromov Wasserstein loss
+        Gromov-Wasserstein loss
 
 
     .. _references-gwloss:
@@ -235,7 +235,7 @@ def gwggrad(constC, hC1, hC2, T, nx=None):
     Returns
     -------
     grad : array-like, shape (`ns`, `nt`)
-           Gromov Wasserstein gradient
+        Gromov-Wasserstein gradient
 
 
     .. _references-gwggrad:
@@ -328,7 +328,7 @@ def init_matrix_semirelaxed(C1, C2, p, loss_fun='square_loss', nx=None):
     r"""Return loss matrices and tensors for semi-relaxed Gromov-Wasserstein fast computation
 
     Returns the value of :math:`\mathcal{L}(\mathbf{C_1}, \mathbf{C_2}) \otimes \mathbf{T}` with the
-    selected loss function as the loss function of semi-relaxed Gromow-Wasserstein discrepancy.
+    selected loss function as the loss function of semi-relaxed Gromov-Wasserstein discrepancy.
 
     The matrices are computed as described in Proposition 1 in :ref:`[12] <references-init-matrix>`
     and adapted to the semi-relaxed problem where the second marginal is not a constant anymore.
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 2ff02ab..4952a21 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -253,7 +253,7 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True, numThreads=1):
         Otherwise returns only the optimal transportation matrix.
     center_dual: boolean, optional (default=True)
         If True, centers the dual potential using function
-        :ref:`center_ot_dual`.
+        :py:func:`ot.lp.center_ot_dual`.
     numThreads: int or "max", optional (default=1, i.e. OpenMP is not used)
         If compiled with OpenMP, chooses the number of threads to parallelize.
         "max" selects the highest number possible.
@@ -418,7 +418,7 @@ def emd2(a, b, M, processes=1,
         If True, returns the optimal transportation matrix in the log.
     center_dual: boolean, optional (default=True)
         If True, centers the dual potential using function
-        :ref:`center_ot_dual`.
+        :py:func:`ot.lp.center_ot_dual`.
     numThreads: int or "max", optional (default=1, i.e. OpenMP is not used)
         If compiled with OpenMP, chooses the number of threads to parallelize.
         "max" selects the highest number possible.
@@ -631,6 +631,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
 
 
     .. _references-free-support-barycenter:
+
     References
     ----------
     .. [20] Cuturi, Marco, and Arnaud Doucet. "Fast computation of Wasserstein barycenters." International Conference on Machine Learning. 2014.
@@ -688,7 +689,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
 def generalized_free_support_barycenter(X_list, a_list, P_list, n_samples_bary, Y_init=None, b=None, weights=None,
                                         numItermax=100, stopThr=1e-7, verbose=False, log=None, numThreads=1, eps=0):
     r"""
-    Solves the free support generalised Wasserstein barycenter problem: finding a barycenter (a discrete measure with
+    Solves the free support generalized Wasserstein barycenter problem: finding a barycenter (a discrete measure with
     a fixed amount of points of uniform weights) whose respective projections fit the input measures.
     More formally:
 
@@ -776,7 +777,7 @@ def generalized_free_support_barycenter(X_list, a_list, P_list, n_samples_bary,
         Y_init = nx.randn(n_samples_bary, d, type_as=X_list[0])
 
     if b is None:
-        b = nx.ones(n_samples_bary, type_as=X_list[0]) / n_samples_bary  # not optimised
+        b = nx.ones(n_samples_bary, type_as=X_list[0]) / n_samples_bary  # not optimized
 
     out = free_support_barycenter(Z_list, a_list, Y_init, b, numItermax=numItermax,
                                   stopThr=stopThr, verbose=verbose, log=log, numThreads=numThreads)
@@ -786,7 +787,7 @@ def generalized_free_support_barycenter(X_list, a_list, P_list, n_samples_bary,
     else:
         Y = out
         log_dict = None
-    Y = Y @ B.T  # return to the Generalised WB formulation
+    Y = Y @ B.T  # return to the Generalized WB formulation
 
     if log:
         return Y, log_dict
diff --git a/ot/lp/cvx.py b/ot/lp/cvx.py
index 361ad0f..3f7eb36 100644
--- a/ot/lp/cvx.py
+++ b/ot/lp/cvx.py
@@ -52,7 +52,7 @@ def barycenter(A, M, weights=None, verbose=False, log=False, solver='interior-po
     reg : float
         Regularization term >0
     weights : np.ndarray (n,)
-        Weights of each histogram a_i on the simplex (barycentric coodinates)
+        Weights of each histogram a_i on the simplex (barycentric coordinates)
     verbose : bool, optional
         Print information along iterations
     log : bool, optional
diff --git a/ot/lp/solver_1d.py b/ot/lp/solver_1d.py
index 840801a..8d841ec 100644
--- a/ot/lp/solver_1d.py
+++ b/ot/lp/solver_1d.py
@@ -37,7 +37,7 @@ def quantile_function(qs, cws, xs):
     n = xs.shape[0]
     if nx.__name__ == 'torch':
         # this is to ensure the best performance for torch searchsorted
-        # and avoid a warninng related to non-contiguous arrays
+        # and avoid a warning related to non-contiguous arrays
         cws = cws.T.contiguous()
         qs = qs.T.contiguous()
     else:
@@ -145,6 +145,7 @@ def emd_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
         s.t. \gamma 1 = a,
              \gamma^T 1= b,
              \gamma\geq 0
+
     where :
 
     - d is the metric
@@ -283,6 +284,7 @@ def emd2_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
         s.t. \gamma 1 = a,
              \gamma^T 1= b,
              \gamma\geq 0
+
     where :
 
     - d is the metric
@@ -464,7 +466,7 @@ def derivative_cost_on_circle(theta, u_values, v_values, u_cdf, v_cdf, p=2):
 
     if nx.__name__ == 'torch':
         # this is to ensure the best performance for torch searchsorted
-        # and avoid a warninng related to non-contiguous arrays
+        # and avoid a warning related to non-contiguous arrays
         u_cdf = u_cdf.contiguous()
         v_cdf_theta = v_cdf_theta.contiguous()
 
@@ -478,7 +480,7 @@ def derivative_cost_on_circle(theta, u_values, v_values, u_cdf, v_cdf, p=2):
 
     if nx.__name__ == 'torch':
         # this is to ensure the best performance for torch searchsorted
-        # and avoid a warninng related to non-contiguous arrays
+        # and avoid a warning related to non-contiguous arrays
         u_cdfm = u_cdfm.contiguous()
         v_cdf_theta = v_cdf_theta.contiguous()
 
@@ -665,8 +667,8 @@ def binary_search_circle(u_values, v_values, u_weights=None, v_weights=None, p=1
 
     if u_values.shape[1] != v_values.shape[1]:
         raise ValueError(
-            "u and v must have the same number of batchs {} and {} respectively given".format(u_values.shape[1],
-                                                                                              v_values.shape[1]))
+            "u and v must have the same number of batches {} and {} respectively given".format(u_values.shape[1],
+                                                                                               v_values.shape[1]))
 
     u_values = u_values % 1
     v_values = v_values % 1
diff --git a/ot/optim.py b/ot/optim.py
index b15c77b..9e65e81 100644
--- a/ot/optim.py
+++ b/ot/optim.py
@@ -138,6 +138,7 @@ def generic_conditional_gradient(a, b, M, f, df, reg1, reg2, lp_solver, line_sea
              \gamma^T \mathbf{1} &= \mathbf{b} (optional constraint)
 
              \gamma &\geq 0
+
     where :
 
     - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
@@ -157,6 +158,7 @@ def generic_conditional_gradient(a, b, M, f, df, reg1, reg2, lp_solver, line_sea
              \gamma^T \mathbf{1} &= \mathbf{b}
 
              \gamma &\geq 0
+
     where :
 
     - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
@@ -224,7 +226,7 @@ def generic_conditional_gradient(a, b, M, f, df, reg1, reg2, lp_solver, line_sea
 
     See Also
     --------
-    ot.lp.emd : Unregularized optimal ransport
+    ot.lp.emd : Unregularized optimal transport
     ot.bregman.sinkhorn : Entropic regularized optimal transport
     """
     a, b, M, G0 = list_to_array(a, b, M, G0)
@@ -325,6 +327,7 @@ def cg(a, b, M, reg, f, df, G0=None, line_search=line_search_armijo,
              \gamma^T \mathbf{1} &= \mathbf{b}
 
              \gamma &\geq 0
+
     where :
 
     - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
@@ -380,7 +383,7 @@ def cg(a, b, M, reg, f, df, G0=None, line_search=line_search_armijo,
 
     See Also
     --------
-    ot.lp.emd : Unregularized optimal ransport
+    ot.lp.emd : Unregularized optimal transport
     ot.bregman.sinkhorn : Entropic regularized optimal transport
 
     """
@@ -407,6 +410,7 @@ def semirelaxed_cg(a, b, M, reg, f, df, G0=None, line_search=line_search_armijo,
         s.t. \ \gamma \mathbf{1} &= \mathbf{a}
 
              \gamma &\geq 0
+
     where :
 
     - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
@@ -492,6 +496,7 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10,
              \gamma^T \mathbf{1} &= \mathbf{b}
 
              \gamma &\geq 0
+
     where :
 
     - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
diff --git a/ot/partial.py b/ot/partial.py
index bf4119d..43f3362 100755
--- a/ot/partial.py
+++ b/ot/partial.py
@@ -516,7 +516,7 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
     nb_dummies : int, optional
         Number of dummy points to add (avoid instabilities in the EMD solver)
     G0 : ndarray, shape (ns, nt), optional
-        Initialisation of the transportation matrix
+        Initialization of the transportation matrix
     thres : float, optional
         quantile of the gradient matrix to populate the cost matrix when 0
         (default: 1)
@@ -686,7 +686,7 @@ def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
     C1 : ndarray, shape (ns, ns)
         Metric cost matrix in the source space
     C2 : ndarray, shape (nt, nt)
-        Metric costfr matrix in the target space
+        Metric cost matrix in the target space
     p : ndarray, shape (ns,)
         Distribution in the source space
     q : ndarray, shape (nt,)
@@ -697,7 +697,7 @@ def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
     nb_dummies : int, optional
         Number of dummy points to add (avoid instabilities in the EMD solver)
     G0 : ndarray, shape (ns, nt), optional
-        Initialisation of the transportation matrix
+        Initialization of the transportation matrix
     thres : float, optional
         quantile of the gradient matrix to populate the cost matrix when 0
         (default: 1)
@@ -958,15 +958,15 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None,
     - `m` is the amount of mass to be transported
 
     The formulation of the GW problem has been proposed in
-    :ref:`[12] <references-entropic-partial-gromov-wassertein>` and the
-    partial GW in :ref:`[29] <references-entropic-partial-gromov-wassertein>`
+    :ref:`[12] <references-entropic-partial-gromov-wasserstein>` and the
+    partial GW in :ref:`[29] <references-entropic-partial-gromov-wasserstein>`
 
     Parameters
     ----------
     C1 : ndarray, shape (ns, ns)
         Metric cost matrix in the source space
     C2 : ndarray, shape (nt, nt)
-        Metric costfr matrix in the target space
+        Metric cost matrix in the target space
     p : ndarray, shape (ns,)
         Distribution in the source space
     q : ndarray, shape (nt,)
@@ -977,7 +977,7 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None,
         Amount of mass to be transported (default:
         :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
     G0 : ndarray, shape (ns, nt), optional
-        Initialisation of the transportation matrix
+        Initialization of the transportation matrix
     numItermax : int, optional
         Max number of iterations
     tol : float, optional
@@ -1016,7 +1016,7 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None,
         log dictionary returned only if `log` is `True`
 
 
-    .. _references-entropic-partial-gromov-wassertein:
+    .. _references-entropic-partial-gromov-wasserstein:
     References
     ----------
     .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
@@ -1107,8 +1107,8 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None,
     - `m` is the amount of mass to be transported
 
     The formulation of the GW problem has been proposed in
-    :ref:`[12] <references-entropic-partial-gromov-wassertein2>` and the
-    partial GW in :ref:`[29] <references-entropic-partial-gromov-wassertein2>`
+    :ref:`[12] <references-entropic-partial-gromov-wasserstein2>` and the
+    partial GW in :ref:`[29] <references-entropic-partial-gromov-wasserstein2>`
 
 
     Parameters
@@ -1116,7 +1116,7 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None,
     C1 : ndarray, shape (ns, ns)
         Metric cost matrix in the source space
     C2 : ndarray, shape (nt, nt)
-        Metric costfr matrix in the target space
+        Metric cost matrix in the target space
     p : ndarray, shape (ns,)
         Distribution in the source space
     q : ndarray, shape (nt,)
@@ -1127,7 +1127,7 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None,
         Amount of mass to be transported (default:
         :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
     G0 : ndarray, shape (ns, nt), optional
-        Initialisation of the transportation matrix
+        Initialization of the transportation matrix
     numItermax : int, optional
         Max number of iterations
     tol : float, optional
@@ -1159,7 +1159,7 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None,
     1.87
 
 
-    .. _references-entropic-partial-gromov-wassertein2:
+    .. _references-entropic-partial-gromov-wasserstein2:
     References
     ----------
     .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
diff --git a/ot/plot.py b/ot/plot.py
index 8ade2eb..4b1bfb1 100644
--- a/ot/plot.py
+++ b/ot/plot.py
@@ -3,7 +3,7 @@ Functions for plotting OT matrices
 
 .. warning::
     Note that by default the module is not import in :mod:`ot`. In order to
-    use it you need to explicitely import :mod:`ot.plot`
+    use it you need to explicitly import :mod:`ot.plot`
 
 
 """
diff --git a/ot/regpath.py b/ot/regpath.py
index e745288..8a9b6d8 100644
--- a/ot/regpath.py
+++ b/ot/regpath.py
@@ -399,7 +399,7 @@ def compute_next_removal(phi, delta, current_gamma):
 
 def complement_schur(M_current, b, d, id_pop):
     r""" This function computes the inverse of the design matrix in the \
-    regularization path using the  Schur complement. Two cases may arise:
+    regularization path using the Schur complement. Two cases may arise:
 
     Case 1: one variable is added to the active set
 
diff --git a/ot/sliced.py b/ot/sliced.py
index fa2141e..3a1644d 100644
--- a/ot/sliced.py
+++ b/ot/sliced.py
@@ -173,7 +173,7 @@ def max_sliced_wasserstein_distance(X_s, X_t, a=None, b=None, n_projections=50,
 
     where :
 
-    - :math:`\theta_\# \mu` stands for the pushforwars of the projection :math:`\mathbb{R}^d \ni X \mapsto \langle \theta, X \rangle`
+    - :math:`\theta_\# \mu` stands for the pushforwards of the projection :math:`\mathbb{R}^d \ni X \mapsto \langle \theta, X \rangle`
 
 
     Parameters
diff --git a/ot/unbalanced.py b/ot/unbalanced.py
index a71a0dd..9584d77 100644
--- a/ot/unbalanced.py
+++ b/ot/unbalanced.py
@@ -121,7 +121,7 @@ def sinkhorn_unbalanced(a, b, M, reg, reg_m, method='sinkhorn', numItermax=1000,
     ot.unbalanced.sinkhorn_stabilized_unbalanced:
         Unbalanced Stabilized sinkhorn :ref:`[9, 10] <references-sinkhorn-unbalanced>`
     ot.unbalanced.sinkhorn_reg_scaling_unbalanced:
-        Unbalanced Sinkhorn with epslilon scaling :ref:`[9, 10] <references-sinkhorn-unbalanced>`
+        Unbalanced Sinkhorn with epsilon scaling :ref:`[9, 10] <references-sinkhorn-unbalanced>`
 
     """
 
@@ -163,6 +163,7 @@ def sinkhorn_unbalanced2(a, b, M, reg, reg_m, method='sinkhorn',
 
         s.t.
              \gamma\geq 0
+
     where :
 
     - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
@@ -240,7 +241,7 @@ def sinkhorn_unbalanced2(a, b, M, reg, reg_m, method='sinkhorn',
     --------
     ot.unbalanced.sinkhorn_knopp : Unbalanced Classic Sinkhorn :ref:`[10] <references-sinkhorn-unbalanced2>`
     ot.unbalanced.sinkhorn_stabilized: Unbalanced Stabilized sinkhorn :ref:`[9, 10] <references-sinkhorn-unbalanced2>`
-    ot.unbalanced.sinkhorn_reg_scaling: Unbalanced Sinkhorn with epslilon scaling :ref:`[9, 10] <references-sinkhorn-unbalanced2>`
+    ot.unbalanced.sinkhorn_reg_scaling: Unbalanced Sinkhorn with epsilon scaling :ref:`[9, 10] <references-sinkhorn-unbalanced2>`
 
     """
     b = list_to_array(b)
@@ -492,7 +493,7 @@ def sinkhorn_stabilized_unbalanced(a, b, M, reg, reg_m, tau=1e5, numItermax=1000
     reg_m: float
         Marginal relaxation term > 0
     tau : float
-        thershold for max value in u or v for log scaling
+        threshold for max value in u or v for log scaling
     numItermax : int, optional
         Max number of iterations
     stopThr : float, optional
@@ -699,7 +700,7 @@ def barycenter_unbalanced_stabilized(A, M, reg, reg_m, weights=None, tau=1e3,
     tau : float
         Stabilization threshold for log domain absorption.
     weights : array-like (n_hists,) optional
-        Weight of each distribution (barycentric coodinates)
+        Weight of each distribution (barycentric coordinates)
         If None, uniform weights are used.
     numItermax : int, optional
         Max number of iterations