From 3fff90eb437dce30fd83012f4c0e24f3fca041b2 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Fri, 14 Jan 2022 17:47:27 +0100
Subject: [WIP] Set dev version and add minigallery to quick start guide (#334)

* change version and add minigallery in quickstart guide

* remove ot.gpu from documentation because it is deprecated and bacckends should be used

* start 0.8.2dev and description in releases.md

* typo for gallery sinkhorn2

* test better doc update for files in .githib folder
---
 docs/source/all.rst                           |   1 -
 docs/source/auto_examples/images/bak.png      | Bin 304669 -> 0 bytes
 docs/source/auto_examples/images/sinkhorn.png | Bin 37204 -> 0 bytes
 docs/source/conf.py                           |   1 -
 docs/source/quickstart.rst                    | 219 ++++++++++----------------
 5 files changed, 86 insertions(+), 135 deletions(-)
 delete mode 100644 docs/source/auto_examples/images/bak.png
 delete mode 100644 docs/source/auto_examples/images/sinkhorn.png

(limited to 'docs/source')

diff --git a/docs/source/all.rst b/docs/source/all.rst
index 6a07599..7f85a91 100644
--- a/docs/source/all.rst
+++ b/docs/source/all.rst
@@ -20,7 +20,6 @@ API and modules
    gromov
    optim
    da
-   gpu
    dr
    utils
    datasets
diff --git a/docs/source/auto_examples/images/bak.png b/docs/source/auto_examples/images/bak.png
deleted file mode 100644
index 25e7e8e..0000000
Binary files a/docs/source/auto_examples/images/bak.png and /dev/null differ
diff --git a/docs/source/auto_examples/images/sinkhorn.png b/docs/source/auto_examples/images/sinkhorn.png
deleted file mode 100644
index e003e13..0000000
Binary files a/docs/source/auto_examples/images/sinkhorn.png and /dev/null differ
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 849e97c..163851f 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -74,7 +74,6 @@ extensions = [
 
 autosummary_generate = True
 
-
 napoleon_numpy_docstring = True
 
 # Add any paths that contain templates here, relative to this directory.
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index 232df7b..e74b019 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -207,13 +207,12 @@ The method implemented for solving the OT problem is the network simplex. It is
 implemented in C from [1]_. It has a complexity of :math:`O(n^3)` but the
 solver is quite efficient and uses sparsity of the solution.
 
-.. hint::
 
-    Examples of use for :any:`ot.emd` are available in :
 
-    - :any:`auto_examples/plot_OT_2D_samples`
-    - :any:`auto_examples/plot_OT_1D`
-    - :any:`auto_examples/plot_OT_L1_vs_L2`
+.. minigallery:: ot.emd
+    :add-heading: Examples of use for :any:`ot.emd`
+    :heading-level: "
+
 
 
 Computing Wasserstein distance
@@ -255,11 +254,9 @@ the :math:`W_1` Wasserstein distance can be done directly with :any:`ot.emd2`
 when providing :code:`M = ot.dist(xs, xt, metric='euclidean')` to use the Euclidean
 distance.
 
-.. hint::
-
-    An example of use for :any:`ot.emd2` is available in :
-
-    - :any:`auto_examples/plot_compute_emd`
+.. minigallery:: ot.emd2
+    :add-heading: Examples of use for :any:`ot.emd2`
+    :heading-level: "
 
 
 Special cases
@@ -416,17 +413,18 @@ of stochastic solvers for entropic regularized OT [18]_ [19]_.  Those pure Pytho
 implementations are not optimized for speed but provide a robust implementation
 of algorithms in [18]_ [19]_.
 
-.. hint::
-    Examples of use for :any:`ot.sinkhorn` are available in :
 
-    - :any:`auto_examples/plot_OT_2D_samples`
-    - :any:`auto_examples/plot_OT_1D`
-    - :any:`auto_examples/plot_OT_1D_smooth`
-    - :any:`auto_examples/plot_stochastic`
+.. minigallery:: ot.sinkhorn
+    :add-heading: Examples of use for :any:`ot.sinkhorn`
+    :heading-level: "
 
+.. minigallery:: ot.sinkhorn2
+    :add-heading: Examples of use for :any:`ot.sinkhorn2`
+    :heading-level: "
 
-Other regularization
-^^^^^^^^^^^^^^^^^^^^
+
+Other regularizations
+^^^^^^^^^^^^^^^^^^^^^
 
 While entropic OT is the most common and favored in practice, there exists other
 kinds of regularizations. We provide in POT two specific solvers for other
@@ -451,12 +449,9 @@ functions :any:`ot.smooth.smooth_ot_dual` or
 :any:`ot.smooth.smooth_ot_semi_dual` with parameter :code:`reg_type='l2'` to
 choose the quadratic regularization.
 
-.. hint::
-    Examples of quadratic regularization are available in :
-
-    - :any:`auto_examples/plot_OT_1D_smooth`
-    - :any:`auto_examples/plot_optim_OTreg`
-
+.. minigallery:: ot.smooth.smooth_ot_dual ot.smooth.smooth_ot_semi_dual ot.optim.cg
+    :add-heading: Examples of use of quadratic regularization
+    :heading-level: "
 
 
 Group Lasso regularization
@@ -480,11 +475,9 @@ be solved using an efficient majoration minimization approach with
 convex group lasso and we provide a solver using generalized conditional
 gradient algorithm [7]_ in function :any:`ot.da.sinkhorn_l1l2_gl`.
 
-.. hint::
-    Examples of group Lasso regularization are available in:
-
-    - :any:`auto_examples/domain-adaptation/plot_otda_classes`
-    - :any:`auto_examples/domain-adaptation/plot_otda_d2`
+.. minigallery::  ot.da.SinkhornLpl1Transport  ot.da.SinkhornL1l2Transport ot.da.sinkhorn_l1l2_gl ot.da.sinkhorn_lpl1_mm
+    :add-heading: Examples of group Lasso regularization
+    :heading-level: "
 
 
 Generic solvers
@@ -520,10 +513,9 @@ generalized conditional gradient [7]_ implemented in :any:`ot.optim.gcg`  that
 does not linearize the entropic term but
 relies on :any:`ot.sinkhorn` for its iterations.
 
-.. hint::
-    An example of generic solvers are available in :
-
-    - :any:`auto_examples/plot_optim_OTreg`
+.. minigallery:: ot.optim.cg ot.optim.gcg
+    :add-heading: Examples of the generic solvers
+    :heading-level: "
 
 
 Wasserstein Barycenters
@@ -581,19 +573,15 @@ the matrix vector production in the Bregman projections by convolution
 operators. We provide an implementation of this algorithm in function
 :any:`ot.bregman.convolutional_barycenter2d`.
 
-.. hint::
-    Examples of Wasserstein (:meth:`ot.lp.barycenter`) and regularized Wasserstein
-    barycenter (:any:`ot.bregman.barycenter`) computation are available in :
 
-    - :any:`auto_examples/barycenters/plot_barycenter_1D`
-    - :any:`auto_examples/barycenters/plot_barycenter_lp_vs_entropic`
 
-    An example of convolutional barycenter
-    (:any:`ot.bregman.convolutional_barycenter2d`) computation
-    for 2D images is available
-    in :
+.. minigallery:: ot.lp.barycenter ot.bregman.barycenter ot.barycenter
+    :add-heading: Examples of Wasserstein and regularized Wasserstein barycenters
+    :heading-level: "
 
-    - :any:`auto_examples/barycenters/plot_convolutional_barycenter`
+.. minigallery:: ot.bregman.convolutional_barycenter2d
+    :add-heading:  An example of convolutional barycenter (:any:`ot.bregman.convolutional_barycenter2d`) computation
+    :heading-level: "
 
 
@@ -613,13 +601,9 @@ We provide a solver based on [20]_ in
 return a locally optimal support :math:`\{x_i\}` for uniform or given weights
 :math:`a`.
 
- .. hint::
-
-    An example of the free support barycenter estimation is available
-    in :
-
-    - :any:`auto_examples/barycenters/plot_free_support_barycenter`
-
+.. minigallery:: ot.lp.free_support_barycenter
+    :add-heading: Examples of free support barycenter estimation
+    :heading-level: "
 
 
@@ -656,12 +640,10 @@ method proposed in [8]_ that estimates a continuous mapping approximating the
 barycentric mapping is provided in :any:`ot.da.joint_OT_mapping_linear` for
 linear mapping and :any:`ot.da.joint_OT_mapping_kernel` for non-linear mapping.
 
- .. hint::
-
-    An example of the linear Monge mapping estimation is available
-    in :
+.. minigallery:: ot.da.joint_OT_mapping_linear ot.da.joint_OT_mapping_linear ot.da.OT_mapping_linear
+    :add-heading: Examples of Monge mapping estimation
+    :heading-level: "
 
-    - :any:`auto_examples/domain-adaptation/plot_otda_linear_mapping`
 
 Domain adaptation classes
 ^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -704,14 +686,11 @@ A list of the provided implementation is given in the following note.
       [14]_
     * :any:`ot.da.MappingTransport`: Nonlinear mapping estimation [8]_
 
-.. hint::
 
-    Examples of the use of OTDA classes are available in:
+.. minigallery:: ot.da.SinkhornTransport ot.da.LinearTransport 
+    :add-heading: Examples of the use of OTDA classes
+    :heading-level: "
 
-    - :any:`auto_examples/domain-adaptation/plot_otda_color_images`
-    - :any:`auto_examples/domain-adaptation/plot_otda_mapping`
-    - :any:`auto_examples/domain-adaptation/plot_otda_mapping_colors_images`
-    - :any:`auto_examples/domain-adaptation/plot_otda_semi_supervised`
 
 Other applications
 ------------------
@@ -746,11 +725,10 @@ respectively. Note that we also provide the Fisher discriminant estimator in
     :code:`autograd`, :any:`ot.dr` is not imported by default. If you want to
     use it you have to specifically import it with :code:`import ot.dr` .
 
-.. hint::
+.. minigallery:: ot.dr.wda
+    :add-heading: Examples of the use of WDA
+    :heading-level: "
 
-    An example of the use of WDA is available in :
-
-    - :any:`auto_examples/others/plot_WDA`
 
 
 Unbalanced optimal transport
@@ -787,11 +765,9 @@ linear term.
       the log stabilized version of the algorithm [10]_.
 
 
-.. hint::
-
-    Examples of the use of :any:`ot.sinkhorn_unbalanced` are available in :
-
-    - :any:`auto_examples/unbalanced-partial/plot_UOT_1D`
+.. minigallery:: ot.sinkhorn_unbalanced ot.sinkhorn_unbalanced2  ot.unbalanced.sinkhorn_unbalanced
+    :add-heading: Examples of Unbalanced OT
+    :heading-level: "
 
 
 Unbalanced Barycenters
@@ -819,11 +795,10 @@ implemented the main function :any:`ot.barycenter_unbalanced`.
       the log stabilized version of the algorithm [10]_.
 
 
-.. hint::
+.. minigallery:: ot.barycenter_unbalanced  ot.unbalanced.barycenter_unbalanced
+    :add-heading: Examples of Unbalanced OT barycenters
+    :heading-level: "
 
-      Examples of the use of :any:`ot.barycenter_unbalanced` are available in :
-
-      - :any:`auto_examples/unbalanced-partial/plot_UOT_barycenter_1D`
 
 
 Partial optimal transport
@@ -865,11 +840,10 @@ is computed in :any:`ot.partial.partial_gromov_wasserstein` and in
 regularization of the problem.
 
 
-.. hint::
-
-    Examples of the use of :any:`ot.partial` are available in:
+.. minigallery:: ot.partial.partial_wasserstein ot.partial.partial_gromov_wasserstein
+    :add-heading: Examples of Partial OT
+    :heading-level: "
 
-    - :any:`auto_examples/unbalanced-partial/plot_partial_wass_and_gromov`
 
 
@@ -898,6 +872,12 @@ There also exists an entropic regularized variant of GW that has been proposed i
 [12]_ and we provide an implementation of their algorithm in
 :any:`ot.gromov.entropic_gromov_wasserstein`.
 
+
+.. minigallery:: ot.gromov.gromov_wasserstein ot.gromov.entropic_gromov_wasserstein  ot.gromov.fused_gromov_wasserstein ot.gromov.gromov_wasserstein2    
+    :add-heading: Examples of computation of GW, regularized G and FGW
+    :heading-level: "
+
+
 Note that similarly to Wasserstein distance GW allows for the definition of GW
 barycenters that can be expressed as
 
@@ -919,59 +899,15 @@ graphs for instance and also provide computable barycenters.
 The implementations of FGW and FGW barycenter is provided in functions
 :any:`ot.gromov.fused_gromov_wasserstein` and :any:`ot.gromov.fgw_barycenters`.
 
-.. hint::
-
-    Examples of computation of GW, regularized G and FGW are available in:
 
-    - :any:`auto_examples/gromov/plot_gromov`
-    - :any:`auto_examples/gromov/plot_fgw`
+.. minigallery:: ot.gromov.gromov_barycenters ot.gromov.fgw_barycenters   
+    :add-heading: Examples of GW, regularized G and FGW barycenters
+    :heading-level: "
 
-    Examples of GW, regularized GW and FGW barycenters are available in:
 
-    - :any:`auto_examples/gromov/plot_gromov_barycenter`
-    - :any:`auto_examples/gromov/plot_barycenter_fgw`
 
-
-GPU acceleration
-^^^^^^^^^^^^^^^^
-
-.. warning::
-
-    The :any:`ot.gpu` has been deprecated since the release 0.8 of POT and
-    should not be used. The GPU implementation (in Pytorch for instance) can be
-    used with the novel backends using the compatible functions from POT.
-
-
-We provide several implementation of our OT solvers in :any:`ot.gpu`. Those
-implementations use the :code:`cupy` toolbox that obviously need to be installed.
-
-
-.. note::
-
-    Several implementations of POT functions (mainly those relying on linear
-    algebra) have been implemented in :any:`ot.gpu`. Here is a short list on the
-    main entries:
-
-    -  :meth:`ot.gpu.dist`: computation of distance matrix
-    -  :meth:`ot.gpu.sinkhorn`: computation of sinkhorn
-    -  :meth:`ot.gpu.sinkhorn_lpl1_mm`: computation of sinkhorn + group lasso
-
-Note that while the :any:`ot.gpu` module has been designed to be compatible with
-POT, calling its function with :any:`numpy`  arrays will incur a large overhead due to
-the memory copy of the array on GPU prior to computation and conversion of the
-array after computation. To avoid this overhead, we provide functions
-:meth:`ot.gpu.to_gpu` and :meth:`ot.gpu.to_np` that perform the conversion
-explicitly.
-
-.. warning::
-
-    Note that due to the hard dependency on :code:`cupy`, :any:`ot.gpu` is not
-    imported by default. If you want to
-    use it you have to specifically import it with :code:`import ot.gpu` .
-
-
-Solving OT with Multiple backends
----------------------------------
+Solving OT with Multiple backends on CPU/GPU
+--------------------------------------------
 
 .. _backends_section:
 
@@ -1002,7 +938,21 @@ the function will be the same type as the inputs and on the same device. When
 possible all computations are done on the same device and also when possible the
 output will be differentiable with respect to the input of the function.
 
+GPU acceleration
+^^^^^^^^^^^^^^^^
+
+The backends provide automatic computations/compatibility on GPU for most of the
+POT functions.
+Note that all solvers relying on the exact OT solver en C++ will need to solve the
+problem on CPU which can incur some memory copy overhead and be far from optimal
+when all other computations are done on GPU. They will still work on array on
+GPU since the copy is done automatically.
 
+Some of the functions that rely on the exact C++ solver are:
+
+- :any:`ot.emd`, :any:`ot.emd2`
+- :any:`ot.gromov_wasserstein`, :any:`ot.gromov_wasserstein2`
+- :any:`ot.optim.cg`
 
 List of compatible Backends
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -1010,18 +960,21 @@ List of compatible Backends
 - `Numpy <https://numpy.org/>`_ (all functions and solvers)
 - `Pytorch <https://pytorch.org/>`_ (all outputs differentiable w.r.t. inputs)
 - `Jax <https://github.com/google/jax>`_ (Some functions are differentiable some require a wrapper)
+- `Tensorflow <https://www.tensorflow.org/>`_ (all outputs differentiable w.r.t. inputs)
+- `Cupy <https://cupy.dev/>`_ (no differentiation, GPU only)
+
 
-List of compatible functions
+List of compatible modules
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 This list will get longer for new releases and will hopefully disappear when POT
 become fully implemented with the backend.
 
-- :any:`ot.emd`
-- :any:`ot.emd2`
-- :any:`ot.sinkhorn`
-- :any:`ot.sinkhorn2`
-- :any:`ot.dist`
+- :any:`ot.bregman`
+- :any:`ot.gromov` (some functions use CPU only solvers with copy overhead)
+- :any:`ot.optim` (some functions use CPU only solvers with copy overhead)
+- :any:`ot.sliced`
+- :any:`ot.utils` (partial)
 
 
 FAQ
-- 
cgit v1.2.3


From 5861209f27fe8e022eca2ed2c8d0bb1da4a1146b Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Mon, 17 Jan 2022 13:45:58 +0100
Subject: [MRG] Default pygment color for doc (#335)

* back to default pygment

* add images back

* move static images and make it work
---
 docs/source/_static/images/bak.png      | Bin 0 -> 304669 bytes
 docs/source/_static/images/sinkhorn.png | Bin 0 -> 37204 bytes
 docs/source/conf.py                     |   2 +-
 examples/plot_Intro_OT.py               |   4 ++--
 4 files changed, 3 insertions(+), 3 deletions(-)
 create mode 100644 docs/source/_static/images/bak.png
 create mode 100644 docs/source/_static/images/sinkhorn.png

(limited to 'docs/source')

diff --git a/docs/source/_static/images/bak.png b/docs/source/_static/images/bak.png
new file mode 100644
index 0000000..25e7e8e
Binary files /dev/null and b/docs/source/_static/images/bak.png differ
diff --git a/docs/source/_static/images/sinkhorn.png b/docs/source/_static/images/sinkhorn.png
new file mode 100644
index 0000000..e003e13
Binary files /dev/null and b/docs/source/_static/images/sinkhorn.png differ
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 163851f..d1b8426 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -141,7 +141,7 @@ exclude_patterns = []
 #show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = 'default'
 
 # A list of ignored prefixes for module index sorting.
 #modindex_common_prefix = []
diff --git a/examples/plot_Intro_OT.py b/examples/plot_Intro_OT.py
index f282950..219aa51 100644
--- a/examples/plot_Intro_OT.py
+++ b/examples/plot_Intro_OT.py
@@ -58,7 +58,7 @@ help(ot.dist)
 # number of Bakeries to Cafés in a City (in this case Manhattan). We did a
 # quick google map search in Manhattan for bakeries and Cafés:
 #
-# .. image:: images/bak.png
+# .. image:: ../_static/images/bak.png
 #     :align: center
 #     :alt: bakery-cafe-manhattan
 #     :width: 600px
@@ -233,7 +233,7 @@ print('Wasserstein loss (EMD) = {0:.2f}'.format(W))
 # The Sinkhorn algorithm is very simple to code. You can implement it directly
 # using the following pseudo-code
 #
-# .. image:: images/sinkhorn.png
+# .. image:: ../_static/images/sinkhorn.png
 #     :align: center
 #     :alt: Sinkhorn algorithm
 #     :width: 440px
-- 
cgit v1.2.3


From a5e0f0d40d5046a6639924347ef97e2ac80ad0c9 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Wed, 2 Feb 2022 11:53:12 +0100
Subject: [MRG] Add weak OT solver (#341)

* add info in release file

* update tests

* pep8

* add weak OT example

* update plot in doc

* correction ewample with empirical sinkhorn

* better thumbnail

* comment from review

* update documenation
---
 README.md                            |   3 +
 RELEASES.md                          |   8 ++-
 docs/source/all.rst                  |   1 +
 examples/others/plot_WeakOT_VS_OT.py |  98 +++++++++++++++++++++++++++
 examples/plot_OT_2D_samples.py       |   5 +-
 ot/__init__.py                       |   5 +-
 ot/gromov.py                         |  16 +++++
 ot/lp/__init__.py                    |   9 ++-
 ot/lp/cvx.py                         |   1 -
 ot/utils.py                          |  12 +++-
 ot/weak.py                           | 124 +++++++++++++++++++++++++++++++++++
 test/test_bregman.py                 |  13 ++--
 test/test_ot.py                      |   2 +-
 test/test_utils.py                   |  18 ++++-
 test/test_weak.py                    |  54 +++++++++++++++
 15 files changed, 343 insertions(+), 26 deletions(-)
 create mode 100644 examples/others/plot_WeakOT_VS_OT.py
 create mode 100644 ot/weak.py
 create mode 100644 test/test_weak.py

(limited to 'docs/source')

diff --git a/README.md b/README.md
index 17fbe81..a7627df 100644
--- a/README.md
+++ b/README.md
@@ -25,6 +25,7 @@ POT provides the following generic OT solvers (links to examples):
 * Sinkhorn divergence [23] and entropic regularization OT from empirical data.
 * Debiased Sinkhorn barycenters [Sinkhorn divergence barycenter](https://pythonot.github.io/auto_examples/barycenters/plot_debiased_barycenter.html) [37]
 * [Smooth optimal transport solvers](https://pythonot.github.io/auto_examples/plot_OT_1D_smooth.html) (dual and semi-dual) for KL and squared L2 regularizations [17].
+* Weak OT solver between empirical distributions [39]
 * Non regularized [Wasserstein barycenters [16] ](https://pythonot.github.io/auto_examples/barycenters/plot_barycenter_lp_vs_entropic.html)) with LP solver (only small scale).
 * [Gromov-Wasserstein distances](https://pythonot.github.io/auto_examples/gromov/plot_gromov.html) and [GW barycenters](https://pythonot.github.io/auto_examples/gromov/plot_gromov_barycenter.html)  (exact [13] and regularized [12]), differentiable using gradients from
  * [Fused-Gromov-Wasserstein distances solver](https://pythonot.github.io/auto_examples/gromov/plot_fgw.html#sphx-glr-auto-examples-plot-fgw-py) and [FGW barycenters](https://pythonot.github.io/auto_examples/gromov/plot_barycenter_fgw.html) [24]
@@ -301,3 +302,5 @@ Conference on Machine Learning, PMLR 119:4692-4701, 2020
 
 [38] C. Vincent-Cuaz, T. Vayer, R. Flamary, M. Corneli, N. Courty, [Online Graph 
 Dictionary Learning](https://arxiv.org/pdf/2102.06555.pdf), International Conference on Machine Learning (ICML), 2021.
+
+[39] Gozlan, N., Roberto, C., Samson, P. M., & Tetali, P. (2017). [Kantorovich duality for general transport costs and applications](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.712.1825&rep=rep1&type=pdf). Journal of Functional Analysis, 273(11), 3327-3405.
\ No newline at end of file
diff --git a/RELEASES.md b/RELEASES.md
index 94c853b..4d05582 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -5,10 +5,12 @@
 
 #### New features
 
-- Better list of related examples in quick start guide with `minigallery` (PR #334)
+- Better list of related examples in quick start guide with `minigallery` (PR #334).
 - Add optional log-domain Sinkhorn implementation in WDA to support smaller values
-  of the regularization parameter (PR #336)
-- Backend implementation for `ot.lp.free_support_barycenter` (PR #340)
+  of the regularization parameter (PR #336).
+- Backend implementation for `ot.lp.free_support_barycenter` (PR #340).
+- Add weak OT solver + example  (PR #341).
+
 
 #### Closed issues
 
diff --git a/docs/source/all.rst b/docs/source/all.rst
index 7f85a91..76d2ff5 100644
--- a/docs/source/all.rst
+++ b/docs/source/all.rst
@@ -28,6 +28,7 @@ API and modules
    unbalanced
    partial
    sliced
+   weak
 
 .. autosummary::
    :toctree: ../modules/generated/
diff --git a/examples/others/plot_WeakOT_VS_OT.py b/examples/others/plot_WeakOT_VS_OT.py
new file mode 100644
index 0000000..a29c875
--- /dev/null
+++ b/examples/others/plot_WeakOT_VS_OT.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+"""
+====================================================
+Weak Optimal Transport VS exact Optimal Transport
+====================================================
+
+Illustration of 2D optimal transport between distributions that are weighted
+sum of diracs. The OT matrix is plotted with the samples.
+
+"""
+
+# Author: Remi Flamary <remi.flamary@polytechnique.edu>
+#
+# License: MIT License
+
+# sphinx_gallery_thumbnail_number = 4
+
+import numpy as np
+import matplotlib.pylab as pl
+import ot
+import ot.plot
+
+##############################################################################
+# Generate data an plot it
+# ------------------------
+
+#%% parameters and data generation
+
+n = 50  # nb samples
+
+mu_s = np.array([0, 0])
+cov_s = np.array([[1, 0], [0, 1]])
+
+mu_t = np.array([4, 4])
+cov_t = np.array([[1, -.8], [-.8, 1]])
+
+xs = ot.datasets.make_2D_samples_gauss(n, mu_s, cov_s)
+xt = ot.datasets.make_2D_samples_gauss(n, mu_t, cov_t)
+
+a, b = ot.unif(n), ot.unif(n)  # uniform distribution on samples
+
+# loss matrix
+M = ot.dist(xs, xt)
+M /= M.max()
+
+#%% plot samples
+
+pl.figure(1)
+pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
+pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
+pl.legend(loc=0)
+pl.title('Source and target distributions')
+
+pl.figure(2)
+pl.imshow(M, interpolation='nearest')
+pl.title('Cost matrix M')
+
+
+##############################################################################
+# Compute Weak OT and exact OT solutions
+# --------------------------------------
+
+#%% EMD
+
+G0 = ot.emd(a, b, M)
+
+#%% Weak OT
+
+Gweak = ot.weak_optimal_transport(xs, xt, a, b)
+
+
+##############################################################################
+# Plot weak OT and exact OT solutions
+# --------------------------------------
+
+pl.figure(3, (8, 5))
+
+pl.subplot(1, 2, 1)
+pl.imshow(G0, interpolation='nearest')
+pl.title('OT matrix')
+
+pl.subplot(1, 2, 2)
+pl.imshow(Gweak, interpolation='nearest')
+pl.title('Weak OT matrix')
+
+pl.figure(4, (8, 5))
+
+pl.subplot(1, 2, 1)
+ot.plot.plot2D_samples_mat(xs, xt, G0, c=[.5, .5, 1])
+pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
+pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
+pl.title('OT matrix with samples')
+
+pl.subplot(1, 2, 2)
+ot.plot.plot2D_samples_mat(xs, xt, Gweak, c=[.5, .5, 1])
+pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
+pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
+pl.title('Weak OT matrix with samples')
diff --git a/examples/plot_OT_2D_samples.py b/examples/plot_OT_2D_samples.py
index af1bc12..c3a7cd8 100644
--- a/examples/plot_OT_2D_samples.py
+++ b/examples/plot_OT_2D_samples.py
@@ -42,7 +42,6 @@ a, b = np.ones((n,)) / n, np.ones((n,)) / n  # uniform distribution on samples
 
 # loss matrix
 M = ot.dist(xs, xt)
-M /= M.max()
 
 ##############################################################################
 # Plot data
@@ -87,7 +86,7 @@ pl.title('OT matrix with samples')
 #%% sinkhorn
 
 # reg term
-lambd = 1e-3
+lambd = 1e-1
 
 Gs = ot.sinkhorn(a, b, M, lambd)
 
@@ -112,7 +111,7 @@ pl.show()
 #%% sinkhorn
 
 # reg term
-lambd = 1e-3
+lambd = 1e-1
 
 Ges = ot.bregman.empirical_sinkhorn(xs, xt, lambd)
 
diff --git a/ot/__init__.py b/ot/__init__.py
index 1ea7403..7253318 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -36,6 +36,7 @@ from . import unbalanced
 from . import partial
 from . import backend
 from . import regpath
+from . import weak
 
 # OT functions
 from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d
@@ -46,7 +47,7 @@ from .da import sinkhorn_lpl1_mm
 from .sliced import sliced_wasserstein_distance, max_sliced_wasserstein_distance
 from .gromov import (gromov_wasserstein, gromov_wasserstein2,
                         gromov_barycenters, fused_gromov_wasserstein, fused_gromov_wasserstein2)
-
+from .weak import weak_optimal_transport
 # utils functions
 from .utils import dist, unif, tic, toc, toq
 
@@ -59,5 +60,5 @@ __all__ = ['emd', 'emd2', 'emd_1d', 'sinkhorn', 'sinkhorn2', 'utils',
            'sinkhorn_unbalanced', 'barycenter_unbalanced',
            'sinkhorn_unbalanced2', 'sliced_wasserstein_distance',
            'gromov_wasserstein', 'gromov_wasserstein2', 'gromov_barycenters', 'fused_gromov_wasserstein', 'fused_gromov_wasserstein2',
-            'max_sliced_wasserstein_distance',
+            'max_sliced_wasserstein_distance', 'weak_optimal_transport',
            'smooth', 'stochastic', 'unbalanced', 'partial', 'regpath']
diff --git a/ot/gromov.py b/ot/gromov.py
index 6544260..b7e7949 100644
--- a/ot/gromov.py
+++ b/ot/gromov.py
@@ -338,6 +338,10 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', log=False, armijo=F
     - :math:`\mathbf{q}`: distribution in the target space
     - `L`: loss function to account for the misfit between the similarity matrices
 
+    .. note:: This function is backend-compatible and will work on arrays
+        from all compatible backends. But the algorithm uses the C++ CPU backend
+        which can lead to copy overhead on GPU arrays.
+
     Parameters
     ----------
     C1 : array-like, shape (ns, ns)
@@ -436,6 +440,10 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun='square_loss', log=False, armijo=
     Note that when using backends, this loss function is differentiable wrt the
     marices and weights for quadratic loss using the gradients from [38]_.
 
+    .. note:: This function is backend-compatible and will work on arrays
+        from all compatible backends. But the algorithm uses the C++ CPU backend
+        which can lead to copy overhead on GPU arrays.
+
     Parameters
     ----------
     C1 : array-like, shape (ns, ns)
@@ -545,6 +553,10 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5,
     - :math:`\mathbf{p}` and :math:`\mathbf{q}` are source and target weights (sum to 1)
     - `L` is a loss function to account for the misfit between the similarity matrices
 
+    .. note:: This function is backend-compatible and will work on arrays
+        from all compatible backends. But the algorithm uses the C++ CPU backend
+        which can lead to copy overhead on GPU arrays.
+
     The algorithm used for solving the problem is conditional gradient as discussed in :ref:`[24] <references-fused-gromov-wasserstein>`
 
     Parameters
@@ -645,6 +657,10 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
     The algorithm used for solving the problem is conditional gradient as
     discussed in :ref:`[24] <references-fused-gromov-wasserstein2>`
 
+    .. note:: This function is backend-compatible and will work on arrays
+        from all compatible backends. But the algorithm uses the C++ CPU backend
+        which can lead to copy overhead on GPU arrays.
+
     Note that when using backends, this loss function is differentiable wrt the
     marices and weights for quadratic loss using the gradients from [38]_.
 
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 2ff7c1f..d9b6fa9 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -26,6 +26,8 @@ from ..utils import dist, list_to_array
 from ..utils import parmap
 from ..backend import get_backend
 
+
+
 __all__ = ['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx', ' emd_1d_sorted',
            'emd_1d', 'emd2_1d', 'wasserstein_1d']
 
@@ -220,7 +222,8 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True, numThreads=1):
         format
 
     .. note:: This function is backend-compatible and will work on arrays
-        from all compatible backends.
+        from all compatible backends. But the algorithm uses the C++ CPU backend
+        which can lead to copy overhead on GPU arrays.
 
     Uses the algorithm proposed in :ref:`[1] <references-emd>`.
 
@@ -358,7 +361,8 @@ def emd2(a, b, M, processes=1,
     - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights
 
     .. note:: This function is backend-compatible and will work on arrays
-        from all compatible backends.
+        from all compatible backends. But the algorithm uses the C++ CPU backend
+        which can lead to copy overhead on GPU arrays.
 
     Uses the algorithm proposed in :ref:`[1] <references-emd2>`.
 
@@ -622,3 +626,4 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
         return X, log_dict
     else:
         return X
+
diff --git a/ot/lp/cvx.py b/ot/lp/cvx.py
index 869d450..fbf3c0e 100644
--- a/ot/lp/cvx.py
+++ b/ot/lp/cvx.py
@@ -11,7 +11,6 @@ import numpy as np
 import scipy as sp
 import scipy.sparse as sps
 
-
 try:
     import cvxopt
     from cvxopt import solvers, matrix, spmatrix
diff --git a/ot/utils.py b/ot/utils.py
index e6c93c8..725ca00 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -116,7 +116,7 @@ def proj_simplex(v, z=1):
         return w
 
 
-def unif(n):
+def unif(n, type_as=None):
     r"""
     Return a uniform histogram of length `n` (simplex).
 
@@ -124,13 +124,19 @@ def unif(n):
     ----------
     n : int
         number of bins in the histogram
+    type_as : array_like
+        array of the same type of the expected output (numpy/pytorch/jax)
 
     Returns
     -------
-    h : np.array (`n`,)
+    h : array_like (`n`,)
         histogram of length `n` such that :math:`\forall i, \mathbf{h}_i = \frac{1}{n}`
     """
-    return np.ones((n,)) / n
+    if type_as is None:
+        return np.ones((n,)) / n
+    else:
+        nx = get_backend(type_as)
+        return nx.ones((n,)) / n
 
 
 def clean_zeros(a, b, M):
diff --git a/ot/weak.py b/ot/weak.py
new file mode 100644
index 0000000..f7d5b23
--- /dev/null
+++ b/ot/weak.py
@@ -0,0 +1,124 @@
+"""
+Weak optimal ransport solvers
+"""
+
+# Author: Remi Flamary <remi.flamary@polytehnique.edu>
+#
+# License: MIT License
+
+from .backend import get_backend
+from .optim import cg
+import numpy as np
+
+__all__ = ['weak_optimal_transport']
+
+
+def weak_optimal_transport(Xa, Xb, a=None, b=None, verbose=False, log=False, G0=None, **kwargs):
+    r"""Solves the weak optimal transport problem between two empirical distributions
+
+
+    .. math::
+        \gamma = \mathop{\arg \min}_\gamma \quad \|X_a-diag(1/a)\gammaX_b\|_F^2
+
+        s.t. \ \gamma \mathbf{1} = \mathbf{a}
+
+             \gamma^T \mathbf{1} = \mathbf{b}
+
+             \gamma \geq 0
+
+    where :
+
+    - :math:`X_a` :math:`X_b`  are the sample matrices.
+    - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights
+
+
+    .. note:: This function is backend-compatible and will work on arrays
+        from all compatible backends. But the algorithm uses the C++ CPU backend
+        which can lead to copy overhead on GPU arrays.
+
+    Uses the conditional gradient algorithm to solve the problem proposed
+    in :ref:`[39] <references-weak>`.
+
+    Parameters
+    ----------
+    Xa : (ns,d) array-like, float
+        Source samples
+    Xb : (nt,d) array-like, float
+        Target samples
+    a : (ns,) array-like, float
+        Source histogram (uniform weight if empty list)
+    b : (nt,) array-like, float
+        Target histogram (uniform weight if empty list))
+    numItermax : int, optional
+        Max number of iterations
+    numItermaxEmd : int, optional
+        Max number of iterations for emd
+    stopThr : float, optional
+        Stop threshold on the relative variation (>0)
+    stopThr2 : float, optional
+        Stop threshold on the absolute variation (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+
+    Returns
+    -------
+    gamma: array-like, shape (ns, nt)
+        Optimal transportation matrix for the given
+        parameters
+    log: dict, optional
+        If input log is true, a dictionary containing the
+        cost and dual variables and exit status
+
+
+    .. _references-weak:
+    References
+    ----------
+    .. [39] Gozlan, N., Roberto, C., Samson, P. M., & Tetali, P. (2017).
+            Kantorovich duality for general transport costs and applications.
+            Journal of Functional Analysis, 273(11), 3327-3405.
+
+    See Also
+    --------
+    ot.bregman.sinkhorn : Entropic regularized OT
+    ot.optim.cg : General regularized OT
+    """
+
+    nx = get_backend(Xa, Xb)
+
+    Xa2 = nx.to_numpy(Xa)
+    Xb2 = nx.to_numpy(Xb)
+
+    if a is None:
+        a2 = np.ones((Xa.shape[0])) / Xa.shape[0]
+    else:
+        a2 = nx.to_numpy(a)
+    if b is None:
+        b2 = np.ones((Xb.shape[0])) / Xb.shape[0]
+    else:
+        b2 = nx.to_numpy(b)
+
+    # init uniform
+    if G0 is None:
+        T0 = a2[:, None] * b2[None, :]
+    else:
+        T0 = nx.to_numpy(G0)
+
+    # weak OT loss
+    def f(T):
+        return np.dot(a2, np.sum((Xa2 - np.dot(T, Xb2) / a2[:, None])**2, 1))
+
+    # weak OT gradient
+    def df(T):
+        return -2 * np.dot(Xa2 - np.dot(T, Xb2) / a2[:, None], Xb2.T)
+
+    # solve with conditional gradient and return solution
+    if log:
+        res, log = cg(a2, b2, 0, 1, f, df, T0, log=log, verbose=verbose, **kwargs)
+        log['u'] = nx.from_numpy(log['u'], type_as=Xa)
+        log['v'] = nx.from_numpy(log['v'], type_as=Xb)
+        return nx.from_numpy(res, type_as=Xa), log
+    else:
+        return nx.from_numpy(cg(a2, b2, 0, 1, f, df, T0, log=log, verbose=verbose, **kwargs), type_as=Xa)
diff --git a/test/test_bregman.py b/test/test_bregman.py
index 6e90aa4..1419f9b 100644
--- a/test/test_bregman.py
+++ b/test/test_bregman.py
@@ -60,7 +60,7 @@ def test_convergence_warning(method):
             ot.sinkhorn2(a1, a2, M, 1, method=method, stopThr=0, numItermax=1)
 
 
-def test_not_impemented_method():
+def test_not_implemented_method():
     # test sinkhorn
     w = 10
     n = w ** 2
@@ -635,7 +635,7 @@ def test_wasserstein_bary_2d(nx, method):
         with pytest.raises(NotImplementedError):
             ot.bregman.convolutional_barycenter2d(A_nx, reg, method=method)
     else:
-        bary_wass_np = ot.bregman.convolutional_barycenter2d(A, reg, method=method)
+        bary_wass_np, log_np = ot.bregman.convolutional_barycenter2d(A, reg, method=method, verbose=True, log=True)
         bary_wass = nx.to_numpy(ot.bregman.convolutional_barycenter2d(A_nx, reg, method=method))
 
         np.testing.assert_allclose(1, np.sum(bary_wass), rtol=1e-3)
@@ -667,7 +667,7 @@ def test_wasserstein_bary_2d_debiased(nx, method):
         with pytest.raises(NotImplementedError):
             ot.bregman.convolutional_barycenter2d_debiased(A_nx, reg, method=method)
     else:
-        bary_wass_np = ot.bregman.convolutional_barycenter2d_debiased(A, reg, method=method)
+        bary_wass_np, log_np = ot.bregman.convolutional_barycenter2d_debiased(A, reg, method=method, verbose=True, log=True)
         bary_wass = nx.to_numpy(ot.bregman.convolutional_barycenter2d_debiased(A_nx, reg, method=method))
 
         np.testing.assert_allclose(1, np.sum(bary_wass), rtol=1e-3)
@@ -940,14 +940,11 @@ def test_screenkhorn(nx):
     bb = nx.from_numpy(b)
     M_nx = nx.from_numpy(M, type_as=ab)
 
-    # np sinkhorn
-    G_sink_np = ot.sinkhorn(a, b, M, 1e-03)
     # sinkhorn
-    G_sink = nx.to_numpy(ot.sinkhorn(ab, bb, M_nx, 1e-03))
+    G_sink = nx.to_numpy(ot.sinkhorn(ab, bb, M_nx, 1e-1))
     # screenkhorn
-    G_screen = nx.to_numpy(ot.bregman.screenkhorn(ab, bb, M_nx, 1e-03, uniform=True, verbose=True))
+    G_screen = nx.to_numpy(ot.bregman.screenkhorn(ab, bb, M_nx, 1e-1, uniform=True, verbose=True))
     # check marginals
-    np.testing.assert_allclose(G_sink_np, G_sink)
     np.testing.assert_allclose(G_sink.sum(0), G_screen.sum(0), atol=1e-02)
     np.testing.assert_allclose(G_sink.sum(1), G_screen.sum(1), atol=1e-02)
 
diff --git a/test/test_ot.py b/test/test_ot.py
index e8e2d97..3e2d845 100644
--- a/test/test_ot.py
+++ b/test/test_ot.py
@@ -232,7 +232,7 @@ def test_emd2_multi():
     # Gaussian distributions
     a = gauss(n, m=20, s=5)  # m= mean, s= std
 
-    ls = np.arange(20, 500, 20)
+    ls = np.arange(20, 500, 100)
     nb = len(ls)
     b = np.zeros((n, nb))
     for i in range(nb):
diff --git a/test/test_utils.py b/test/test_utils.py
index 8b23c22..5ad167b 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -62,12 +62,12 @@ def test_tic_toc():
     import time
 
     ot.tic()
-    time.sleep(0.5)
+    time.sleep(0.1)
     t = ot.toc()
     t2 = ot.toq()
 
     # test timing
-    np.testing.assert_allclose(0.5, t, rtol=1e-1, atol=1e-1)
+    np.testing.assert_allclose(0.1, t, rtol=1e-1, atol=1e-1)
 
     # test toc vs toq
     np.testing.assert_allclose(t, t2, rtol=1e-1, atol=1e-1)
@@ -94,10 +94,22 @@ def test_unif():
     np.testing.assert_allclose(1, np.sum(u))
 
 
-def test_dist():
+def test_unif_backend(nx):
 
     n = 100
 
+    for tp in nx.__type_list__:
+        print(nx.dtype_device(tp))
+
+        u = ot.unif(n, type_as=tp)
+
+        np.testing.assert_allclose(1, np.sum(nx.to_numpy(u)), atol=1e-6)
+
+
+def test_dist():
+
+    n = 10
+
     rng = np.random.RandomState(0)
     x = rng.randn(n, 2)
 
diff --git a/test/test_weak.py b/test/test_weak.py
new file mode 100644
index 0000000..c4c3278
--- /dev/null
+++ b/test/test_weak.py
@@ -0,0 +1,54 @@
+"""Tests for main module ot.weak """
+
+# Author: Remi Flamary <remi.flamary@unice.fr>
+#
+# License: MIT License
+
+import ot
+import numpy as np
+
+
+def test_weak_ot():
+    # test weak ot solver and identity stationary point
+    n = 50
+    rng = np.random.RandomState(0)
+
+    xs = rng.randn(n, 2)
+    xt = rng.randn(n, 2)
+    u = ot.utils.unif(n)
+
+    G, log = ot.weak_optimal_transport(xs, xt, u, u, log=True)
+
+    # check constraints
+    np.testing.assert_allclose(u, G.sum(1))
+    np.testing.assert_allclose(u, G.sum(0))
+
+    # chaeck that identity is recovered
+    G = ot.weak_optimal_transport(xs, xs, G0=np.eye(n) / n)
+
+    # check G is identity
+    np.testing.assert_allclose(G, np.eye(n) / n)
+
+    # check constraints
+    np.testing.assert_allclose(u, G.sum(1))
+    np.testing.assert_allclose(u, G.sum(0))
+
+
+def test_weak_ot_bakends(nx):
+    # test weak ot solver for different backends
+    n = 50
+    rng = np.random.RandomState(0)
+
+    xs = rng.randn(n, 2)
+    xt = rng.randn(n, 2)
+    u = ot.utils.unif(n)
+
+    G = ot.weak_optimal_transport(xs, xt, u, u)
+
+    xs2 = nx.from_numpy(xs)
+    xt2 = nx.from_numpy(xt)
+    u2 = nx.from_numpy(u)
+
+    G2 = ot.weak_optimal_transport(xs2, xt2, u2, u2)
+
+    np.testing.assert_allclose(nx.to_numpy(G2), G)
-- 
cgit v1.2.3


From 3302cd48cdcc5d4832997bae921952cc3917fb59 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Wed, 23 Feb 2022 09:53:13 +0100
Subject: [MRG] Build POT against oldest-supported-numpy (local PR) (#349)

* Configure setup to compile against oldest supported numpy version using the meta-package: https://pypi.org/project/oldest-supported-numpy/ -

* Set minimum Python requirement to `>=3.7` in setup.py since !328 removed Python 3.6 support

* Fix typo in pyproject.toml -

* Update setup.py

* Update setup.py and

* build wheels

* remove install dependencies for wheels building and build wheels

* Apply suggestions from code review

Co-authored-by: David M. Ghiurco <9147386+davidghiurco@users.noreply.github.com>

* correct timing test add info in release file and build wheels

* pep8 and

Co-authored-by: David Ghiurco <9147386+davidghiurco@users.noreply.github.com>
---
 .github/workflows/build_wheels.yml        | 8 +-------
 .github/workflows/build_wheels_weekly.yml | 2 --
 RELEASES.md                               | 1 +
 docs/source/quickstart.rst                | 2 +-
 pyproject.toml                            | 2 +-
 requirements.txt                          | 1 -
 setup.py                                  | 5 +++--
 test/test_utils.py                        | 4 +++-
 8 files changed, 10 insertions(+), 15 deletions(-)

(limited to 'docs/source')

diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml
index c746eb8..475058c 100644
--- a/.github/workflows/build_wheels.yml
+++ b/.github/workflows/build_wheels.yml
@@ -27,8 +27,6 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install -r requirements.txt
-        pip install -U "cython"
 
     - name: Install cibuildwheel
       run: |
@@ -37,7 +35,6 @@ jobs:
     - name: Build wheels
       env:
         CIBW_SKIP: "pp*-win* pp*-macosx* cp2* pp* cp36*" # remove pypy on mac and win (wrong version)
-        CIBW_BEFORE_BUILD: "pip install numpy cython"
       run: |
         python -m cibuildwheel --output-dir wheelhouse
 
@@ -65,8 +62,6 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install -r requirements.txt
-        pip install -U "cython"
 
     - name: Install cibuildwheel
       run: |
@@ -80,8 +75,7 @@ jobs:
 
     - name: Build wheels
       env:
-        CIBW_SKIP: "pp*-win* pp*-macosx* cp2* pp* cp*musl* cp36*" # remove pypy on mac and win (wrong version)
-        CIBW_BEFORE_BUILD: "pip install numpy cython"
+        CIBW_SKIP: "pp*-win* pp*-macosx* cp2* pp* cp*musl*" # remove pypy on mac and win (wrong version)
         CIBW_ARCHS_LINUX: auto aarch64 # force aarch64 with QEMU
         CIBW_ARCHS_MACOS: x86_64 universal2 arm64
       run: |
diff --git a/.github/workflows/build_wheels_weekly.yml b/.github/workflows/build_wheels_weekly.yml
index dbf342f..b9154c5 100644
--- a/.github/workflows/build_wheels_weekly.yml
+++ b/.github/workflows/build_wheels_weekly.yml
@@ -26,8 +26,6 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install -r requirements.txt
-        pip install -U "cython"
 
     - name: Install cibuildwheel
       run: |
diff --git a/RELEASES.md b/RELEASES.md
index 925920a..92b7ba5 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -16,6 +16,7 @@
 
 - Bug in instantiating an `autograd` function (`ValFunction`, Issue #337, PR
   #338)
+- Make POT ABI compatible with old and new numpy (Issue #346, PR #349)
 
 ## 0.8.1.0
 *December 2021*
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index e74b019..09a362b 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -1002,7 +1002,7 @@ FAQ
 
 2. **pip install POT fails with error : ImportError: No module named Cython.Build**
 
-    As discussed shortly in the README file. POT requires to have :code:`numpy`
+    As discussed shortly in the README file. POT<0.8 requires to have :code:`numpy`
     and :code:`cython` installed to build. This corner case is not yet handled
     by :code:`pip` and for now you need to install both library prior to
     installing POT.
diff --git a/pyproject.toml b/pyproject.toml
index 93ebab3..3789206 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,3 @@
 [build-system]
-requires = ["setuptools", "wheel", "numpy>=1.20", "cython>=0.23"]
+requires = ["setuptools", "wheel", "oldest-supported-numpy", "cython>=0.23"]
 build-backend = "setuptools.build_meta"
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
index f9934ce..7cbb29a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,6 +1,5 @@
 numpy>=1.20
 scipy>=1.3
-cython
 matplotlib
 autograd
 pymanopt==0.2.4; python_version <'3'
diff --git a/setup.py b/setup.py
index d46ae1c..c03191a 100644
--- a/setup.py
+++ b/setup.py
@@ -68,8 +68,9 @@ setup(
     license='MIT',
     scripts=[],
     data_files=[],
-    setup_requires=["numpy>=1.20", "cython>=0.23"],
-    install_requires=["numpy>=1.20", "scipy>=1.0"],
+    setup_requires=["oldest-supported-numpy", "cython>=0.23"],
+    install_requires=["numpy>=1.16", "scipy>=1.0"],
+    python_requires=">=3.6",
     classifiers=[
         'Development Status :: 5 - Production/Stable',
         'Intended Audience :: Developers',
diff --git a/test/test_utils.py b/test/test_utils.py
index 5ad167b..3cfd295 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -67,7 +67,9 @@ def test_tic_toc():
     t2 = ot.toq()
 
     # test timing
-    np.testing.assert_allclose(0.1, t, rtol=1e-1, atol=1e-1)
+    # np.testing.assert_allclose(0.1, t, rtol=1e-1, atol=1e-1)
+    # very slow macos github action equality not possible
+    assert t > 0.09
 
     # test toc vs toq
     np.testing.assert_allclose(t, t2, rtol=1e-1, atol=1e-1)
-- 
cgit v1.2.3


From 9b9d2221d257f40ea3eb58b279b30d69162d62bb Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Fri, 18 Mar 2022 08:00:19 +0100
Subject: [MRG] Add logo to POT (#357)

* add logo code and logo to doc

* update release file
---
 RELEASES.md                              |   1 +
 docs/source/_static/images/logo.png      | Bin 0 -> 5038 bytes
 docs/source/_static/images/logo.svg      | 200 +++++++++++++++++++++++++++++++
 docs/source/_static/images/logo_dark.png | Bin 0 -> 3437 bytes
 docs/source/_static/images/logo_dark.svg | 187 +++++++++++++++++++++++++++++
 docs/source/conf.py                      |   4 +-
 docs/source/index.rst                    |   5 +
 examples/others/plot_logo.py             | 112 +++++++++++++++++
 8 files changed, 508 insertions(+), 1 deletion(-)
 create mode 100644 docs/source/_static/images/logo.png
 create mode 100644 docs/source/_static/images/logo.svg
 create mode 100644 docs/source/_static/images/logo_dark.png
 create mode 100644 docs/source/_static/images/logo_dark.svg
 create mode 100644 examples/others/plot_logo.py

(limited to 'docs/source')

diff --git a/RELEASES.md b/RELEASES.md
index 18562e7..0f1f231 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -5,6 +5,7 @@
 
 #### New features
 
+- A brand new logo for POT (PR #357)
 - Better list of related examples in quick start guide with `minigallery` (PR #334).
 - Add optional log-domain Sinkhorn implementation in WDA to support smaller values
   of the regularization parameter (PR #336).
diff --git a/docs/source/_static/images/logo.png b/docs/source/_static/images/logo.png
new file mode 100644
index 0000000..7be5df7
Binary files /dev/null and b/docs/source/_static/images/logo.png differ
diff --git a/docs/source/_static/images/logo.svg b/docs/source/_static/images/logo.svg
new file mode 100644
index 0000000..0bf2cb7
--- /dev/null
+++ b/docs/source/_static/images/logo.svg
@@ -0,0 +1,200 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Created with matplotlib (https://matplotlib.org/) -->
+<svg height="75.384pt" version="1.1" viewBox="0 0 209.7 75.384" width="209.7pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <metadata>
+  <rdf:RDF xmlns:cc="http://creativecommons.org/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+   <cc:Work>
+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+    <dc:date>2022-03-17T17:25:30.736761</dc:date>
+    <dc:format>image/svg+xml</dc:format>
+    <dc:creator>
+     <cc:Agent>
+      <dc:title>Matplotlib v3.3.3, https://matplotlib.org/</dc:title>
+     </cc:Agent>
+    </dc:creator>
+   </cc:Work>
+  </rdf:RDF>
+ </metadata>
+ <defs>
+  <style type="text/css">*{stroke-linecap:butt;stroke-linejoin:round;}</style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="M 0 75.384 
+L 209.7 75.384 
+L 209.7 0 
+L 0 0 
+z
+" style="fill:#ffffff;"/>
+  </g>
+  <g id="axes_1">
+   <g id="line2d_1">
+    <path clip-path="url(#pafc522611b)" d="M 16.077273 11.885975 
+L 47.044503 11.885975 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_2">
+    <path clip-path="url(#pafc522611b)" d="M 16.077273 22.208385 
+L 57.366913 22.208385 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_3">
+    <path clip-path="url(#pafc522611b)" d="M 16.077273 32.530795 
+L 57.366913 32.530795 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_4">
+    <path clip-path="url(#pafc522611b)" d="M 16.077273 42.853205 
+L 47.044503 42.853205 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_5">
+    <path clip-path="url(#pafc522611b)" d="M 16.077273 53.175615 
+L 26.399683 53.175615 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_6">
+    <path clip-path="url(#pafc522611b)" d="M 16.077273 63.498025 
+L 26.399683 63.498025 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_7">
+    <path clip-path="url(#pafc522611b)" d="M 95.353383 11.885975 
+L 107.740275 11.885975 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_8">
+    <path clip-path="url(#pafc522611b)" d="M 82.96649 22.208385 
+L 120.127167 22.208385 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_9">
+    <path clip-path="url(#pafc522611b)" d="M 76.773044 32.530795 
+L 126.320613 32.530795 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_10">
+    <path clip-path="url(#pafc522611b)" d="M 76.773044 42.853205 
+L 126.320613 42.853205 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_11">
+    <path clip-path="url(#pafc522611b)" d="M 82.96649 53.175615 
+L 120.127167 53.175615 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_12">
+    <path clip-path="url(#pafc522611b)" d="M 95.353383 63.498025 
+L 107.740275 63.498025 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_13">
+    <path clip-path="url(#pafc522611b)" d="M 142.010677 11.885975 
+L 193.622727 11.885975 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_14">
+    <path clip-path="url(#pafc522611b)" d="M 142.010677 22.208385 
+L 193.622727 22.208385 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_15">
+    <path clip-path="url(#pafc522611b)" d="M 162.655497 32.530795 
+L 172.977907 32.530795 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_16">
+    <path clip-path="url(#pafc522611b)" d="M 162.655497 42.853205 
+L 172.977907 42.853205 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_17">
+    <path clip-path="url(#pafc522611b)" d="M 162.655497 53.175615 
+L 172.977907 53.175615 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_18">
+    <path clip-path="url(#pafc522611b)" d="M 162.655497 63.498025 
+L 172.977907 63.498025 
+" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+   </g>
+   <g id="line2d_19">
+    <defs>
+     <path d="M 0 3 
+C 0.795609 3 1.55874 2.683901 2.12132 2.12132 
+C 2.683901 1.55874 3 0.795609 3 0 
+C 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132 
+C 1.55874 -2.683901 0.795609 -3 0 -3 
+C -0.795609 -3 -1.55874 -2.683901 -2.12132 -2.12132 
+C -2.683901 -1.55874 -3 -0.795609 -3 0 
+C -3 0.795609 -2.683901 1.55874 -2.12132 2.12132 
+C -1.55874 2.683901 -0.795609 3 0 3 
+z
+" id="m45a57d22c6" style="stroke:#000000;"/>
+    </defs>
+    <g clip-path="url(#pafc522611b)">
+     <use style="fill:#d62728;stroke:#000000;" x="16.077273" xlink:href="#m45a57d22c6" y="11.885975"/>
+     <use style="fill:#d62728;stroke:#000000;" x="16.077273" xlink:href="#m45a57d22c6" y="22.208385"/>
+     <use style="fill:#d62728;stroke:#000000;" x="16.077273" xlink:href="#m45a57d22c6" y="32.530795"/>
+     <use style="fill:#d62728;stroke:#000000;" x="16.077273" xlink:href="#m45a57d22c6" y="42.853205"/>
+     <use style="fill:#d62728;stroke:#000000;" x="16.077273" xlink:href="#m45a57d22c6" y="53.175615"/>
+     <use style="fill:#d62728;stroke:#000000;" x="16.077273" xlink:href="#m45a57d22c6" y="63.498025"/>
+     <use style="fill:#d62728;stroke:#000000;" x="95.353383" xlink:href="#m45a57d22c6" y="11.885975"/>
+     <use style="fill:#d62728;stroke:#000000;" x="82.96649" xlink:href="#m45a57d22c6" y="22.208385"/>
+     <use style="fill:#d62728;stroke:#000000;" x="76.773044" xlink:href="#m45a57d22c6" y="32.530795"/>
+     <use style="fill:#d62728;stroke:#000000;" x="76.773044" xlink:href="#m45a57d22c6" y="42.853205"/>
+     <use style="fill:#d62728;stroke:#000000;" x="82.96649" xlink:href="#m45a57d22c6" y="53.175615"/>
+     <use style="fill:#d62728;stroke:#000000;" x="95.353383" xlink:href="#m45a57d22c6" y="63.498025"/>
+     <use style="fill:#d62728;stroke:#000000;" x="142.010677" xlink:href="#m45a57d22c6" y="11.885975"/>
+     <use style="fill:#d62728;stroke:#000000;" x="142.010677" xlink:href="#m45a57d22c6" y="22.208385"/>
+     <use style="fill:#d62728;stroke:#000000;" x="162.655497" xlink:href="#m45a57d22c6" y="32.530795"/>
+     <use style="fill:#d62728;stroke:#000000;" x="162.655497" xlink:href="#m45a57d22c6" y="42.853205"/>
+     <use style="fill:#d62728;stroke:#000000;" x="162.655497" xlink:href="#m45a57d22c6" y="53.175615"/>
+     <use style="fill:#d62728;stroke:#000000;" x="162.655497" xlink:href="#m45a57d22c6" y="63.498025"/>
+    </g>
+   </g>
+   <g id="line2d_20">
+    <defs>
+     <path d="M 0 3 
+C 0.795609 3 1.55874 2.683901 2.12132 2.12132 
+C 2.683901 1.55874 3 0.795609 3 0 
+C 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132 
+C 1.55874 -2.683901 0.795609 -3 0 -3 
+C -0.795609 -3 -1.55874 -2.683901 -2.12132 -2.12132 
+C -2.683901 -1.55874 -3 -0.795609 -3 0 
+C -3 0.795609 -2.683901 1.55874 -2.12132 2.12132 
+C -1.55874 2.683901 -0.795609 3 0 3 
+z
+" id="mb6ec6fa556" style="stroke:#000000;"/>
+    </defs>
+    <g clip-path="url(#pafc522611b)">
+     <use style="fill:#0000ff;stroke:#000000;" x="47.044503" xlink:href="#mb6ec6fa556" y="11.885975"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="57.366913" xlink:href="#mb6ec6fa556" y="32.530795"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="57.366913" xlink:href="#mb6ec6fa556" y="22.208385"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="47.044503" xlink:href="#mb6ec6fa556" y="42.853205"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="26.399683" xlink:href="#mb6ec6fa556" y="53.175615"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="26.399683" xlink:href="#mb6ec6fa556" y="63.498025"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="107.740275" xlink:href="#mb6ec6fa556" y="11.885975"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="120.127167" xlink:href="#mb6ec6fa556" y="22.208385"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="126.320613" xlink:href="#mb6ec6fa556" y="32.530795"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="126.320613" xlink:href="#mb6ec6fa556" y="42.853205"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="120.127167" xlink:href="#mb6ec6fa556" y="53.175615"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="107.740275" xlink:href="#mb6ec6fa556" y="63.498025"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="193.622727" xlink:href="#mb6ec6fa556" y="11.885975"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="193.622727" xlink:href="#mb6ec6fa556" y="22.208385"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="172.977907" xlink:href="#mb6ec6fa556" y="32.530795"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="172.977907" xlink:href="#mb6ec6fa556" y="42.853205"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="172.977907" xlink:href="#mb6ec6fa556" y="53.175615"/>
+     <use style="fill:#0000ff;stroke:#000000;" x="172.977907" xlink:href="#mb6ec6fa556" y="63.498025"/>
+    </g>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="pafc522611b">
+   <rect height="60.984" width="195.3" x="7.2" y="7.2"/>
+  </clipPath>
+ </defs>
+</svg>
diff --git a/docs/source/_static/images/logo_dark.png b/docs/source/_static/images/logo_dark.png
new file mode 100644
index 0000000..f484188
Binary files /dev/null and b/docs/source/_static/images/logo_dark.png differ
diff --git a/docs/source/_static/images/logo_dark.svg b/docs/source/_static/images/logo_dark.svg
new file mode 100644
index 0000000..56ce2d9
--- /dev/null
+++ b/docs/source/_static/images/logo_dark.svg
@@ -0,0 +1,187 @@
+<?xml version="1.0" encoding="utf-8" standalone="no"?>
+<!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
+  "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
+<!-- Created with matplotlib (https://matplotlib.org/) -->
+<svg height="75.384pt" version="1.1" viewBox="0 0 209.7 75.384" width="209.7pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+ <metadata>
+  <rdf:RDF xmlns:cc="http://creativecommons.org/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+   <cc:Work>
+    <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
+    <dc:date>2022-03-17T17:25:30.847142</dc:date>
+    <dc:format>image/svg+xml</dc:format>
+    <dc:creator>
+     <cc:Agent>
+      <dc:title>Matplotlib v3.3.3, https://matplotlib.org/</dc:title>
+     </cc:Agent>
+    </dc:creator>
+   </cc:Work>
+  </rdf:RDF>
+ </metadata>
+ <defs>
+  <style type="text/css">*{stroke-linecap:butt;stroke-linejoin:round;}</style>
+ </defs>
+ <g id="figure_1">
+  <g id="patch_1">
+   <path d="M 0 75.384 
+L 209.7 75.384 
+L 209.7 0 
+L 0 0 
+z
+" style="fill:none;"/>
+  </g>
+  <g id="axes_1">
+   <g id="line2d_1">
+    <path clip-path="url(#pa995e487cb)" d="M 16.077273 11.885975 
+L 47.044503 11.885975 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_2">
+    <path clip-path="url(#pa995e487cb)" d="M 16.077273 22.208385 
+L 57.366913 22.208385 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_3">
+    <path clip-path="url(#pa995e487cb)" d="M 16.077273 32.530795 
+L 57.366913 32.530795 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_4">
+    <path clip-path="url(#pa995e487cb)" d="M 16.077273 42.853205 
+L 47.044503 42.853205 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_5">
+    <path clip-path="url(#pa995e487cb)" d="M 16.077273 53.175615 
+L 26.399683 53.175615 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_6">
+    <path clip-path="url(#pa995e487cb)" d="M 16.077273 63.498025 
+L 26.399683 63.498025 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_7">
+    <path clip-path="url(#pa995e487cb)" d="M 95.353383 11.885975 
+L 107.740275 11.885975 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_8">
+    <path clip-path="url(#pa995e487cb)" d="M 82.96649 22.208385 
+L 120.127167 22.208385 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_9">
+    <path clip-path="url(#pa995e487cb)" d="M 76.773044 32.530795 
+L 126.320613 32.530795 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_10">
+    <path clip-path="url(#pa995e487cb)" d="M 76.773044 42.853205 
+L 126.320613 42.853205 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_11">
+    <path clip-path="url(#pa995e487cb)" d="M 82.96649 53.175615 
+L 120.127167 53.175615 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_12">
+    <path clip-path="url(#pa995e487cb)" d="M 95.353383 63.498025 
+L 107.740275 63.498025 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_13">
+    <path clip-path="url(#pa995e487cb)" d="M 142.010677 11.885975 
+L 193.622727 11.885975 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_14">
+    <path clip-path="url(#pa995e487cb)" d="M 142.010677 22.208385 
+L 193.622727 22.208385 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_15">
+    <path clip-path="url(#pa995e487cb)" d="M 162.655497 32.530795 
+L 172.977907 32.530795 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_16">
+    <path clip-path="url(#pa995e487cb)" d="M 162.655497 42.853205 
+L 172.977907 42.853205 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_17">
+    <path clip-path="url(#pa995e487cb)" d="M 162.655497 53.175615 
+L 172.977907 53.175615 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_18">
+    <path clip-path="url(#pa995e487cb)" d="M 162.655497 63.498025 
+L 172.977907 63.498025 
+" style="fill:none;stroke:#ffffff;stroke-linecap:square;stroke-opacity:0.8;stroke-width:3;"/>
+   </g>
+   <g id="line2d_19">
+    <defs>
+     <path d="M 0 3 
+C 0.795609 3 1.55874 2.683901 2.12132 2.12132 
+C 2.683901 1.55874 3 0.795609 3 0 
+C 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132 
+C 1.55874 -2.683901 0.795609 -3 0 -3 
+C -0.795609 -3 -1.55874 -2.683901 -2.12132 -2.12132 
+C -2.683901 -1.55874 -3 -0.795609 -3 0 
+C -3 0.795609 -2.683901 1.55874 -2.12132 2.12132 
+C -1.55874 2.683901 -0.795609 3 0 3 
+z
+" id="m5a2277d5a1" style="stroke:#ffffff;"/>
+    </defs>
+    <g clip-path="url(#pa995e487cb)">
+     <use style="fill:#ffffff;stroke:#ffffff;" x="16.077273" xlink:href="#m5a2277d5a1" y="11.885975"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="16.077273" xlink:href="#m5a2277d5a1" y="22.208385"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="16.077273" xlink:href="#m5a2277d5a1" y="32.530795"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="16.077273" xlink:href="#m5a2277d5a1" y="42.853205"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="16.077273" xlink:href="#m5a2277d5a1" y="53.175615"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="16.077273" xlink:href="#m5a2277d5a1" y="63.498025"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="95.353383" xlink:href="#m5a2277d5a1" y="11.885975"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="82.96649" xlink:href="#m5a2277d5a1" y="22.208385"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="76.773044" xlink:href="#m5a2277d5a1" y="32.530795"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="76.773044" xlink:href="#m5a2277d5a1" y="42.853205"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="82.96649" xlink:href="#m5a2277d5a1" y="53.175615"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="95.353383" xlink:href="#m5a2277d5a1" y="63.498025"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="142.010677" xlink:href="#m5a2277d5a1" y="11.885975"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="142.010677" xlink:href="#m5a2277d5a1" y="22.208385"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="162.655497" xlink:href="#m5a2277d5a1" y="32.530795"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="162.655497" xlink:href="#m5a2277d5a1" y="42.853205"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="162.655497" xlink:href="#m5a2277d5a1" y="53.175615"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="162.655497" xlink:href="#m5a2277d5a1" y="63.498025"/>
+    </g>
+   </g>
+   <g id="line2d_20">
+    <g clip-path="url(#pa995e487cb)">
+     <use style="fill:#ffffff;stroke:#ffffff;" x="47.044503" xlink:href="#m5a2277d5a1" y="11.885975"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="57.366913" xlink:href="#m5a2277d5a1" y="32.530795"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="57.366913" xlink:href="#m5a2277d5a1" y="22.208385"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="47.044503" xlink:href="#m5a2277d5a1" y="42.853205"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="26.399683" xlink:href="#m5a2277d5a1" y="53.175615"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="26.399683" xlink:href="#m5a2277d5a1" y="63.498025"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="107.740275" xlink:href="#m5a2277d5a1" y="11.885975"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="120.127167" xlink:href="#m5a2277d5a1" y="22.208385"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="126.320613" xlink:href="#m5a2277d5a1" y="32.530795"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="126.320613" xlink:href="#m5a2277d5a1" y="42.853205"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="120.127167" xlink:href="#m5a2277d5a1" y="53.175615"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="107.740275" xlink:href="#m5a2277d5a1" y="63.498025"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="193.622727" xlink:href="#m5a2277d5a1" y="11.885975"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="193.622727" xlink:href="#m5a2277d5a1" y="22.208385"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="172.977907" xlink:href="#m5a2277d5a1" y="32.530795"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="172.977907" xlink:href="#m5a2277d5a1" y="42.853205"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="172.977907" xlink:href="#m5a2277d5a1" y="53.175615"/>
+     <use style="fill:#ffffff;stroke:#ffffff;" x="172.977907" xlink:href="#m5a2277d5a1" y="63.498025"/>
+    </g>
+   </g>
+  </g>
+ </g>
+ <defs>
+  <clipPath id="pa995e487cb">
+   <rect height="60.984" width="195.3" x="7.2" y="7.2"/>
+  </clipPath>
+ </defs>
+</svg>
diff --git a/docs/source/conf.py b/docs/source/conf.py
index d1b8426..60d0bb7 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -162,6 +162,7 @@ html_theme = 'sphinx_rtd_theme'
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
+
 html_theme_options = {}
 
 # Add any paths that contain custom themes here, relative to this directory.
@@ -176,7 +177,7 @@ html_theme_options = {}
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-#html_logo = None
+html_logo = '_static/images/logo_dark.svg'
 
 # The name of an image file (relative to this directory) to use as a favicon of
 # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
@@ -188,6 +189,7 @@ html_theme_options = {}
 # so a file named "default.css" will overwrite the builtin "default.css".
 html_static_path = ['_static']
 
+
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied
 # directly to the root of the documentation.
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 8de31ae..7ff7d22 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -6,6 +6,10 @@
 POT: Python Optimal Transport
 =============================
 
+.. image:: _static/images/logo.svg
+  :width: 400
+  :alt: POT Logo
+
 Contents
 --------
 
@@ -20,6 +24,7 @@ Contents
    .github/CONTRIBUTING
    .github/CODE_OF_CONDUCT
 
+
 .. include:: ../../README.md
    :parser: myst_parser.sphinx_
 
diff --git a/examples/others/plot_logo.py b/examples/others/plot_logo.py
new file mode 100644
index 0000000..afddcad
--- /dev/null
+++ b/examples/others/plot_logo.py
@@ -0,0 +1,112 @@
+
+# -*- coding: utf-8 -*-
+r"""
+=======================
+Logo of the POT toolbox
+=======================
+
+In this example we plot the logo of the POT toolbox.
+
+A specificity of this logo is that it is done 100% in Python and generated using
+matplotlib using the EMD solver from POT.
+
+"""
+
+# Author: Remi Flamary <remi.flamary@polytechnique.edu>
+#
+# License: MIT License
+
+# sphinx_gallery_thumbnail_number = 1
+
+# %%
+import numpy as np
+import matplotlib.pyplot as pl
+import ot
+
+# %%
+# Data for logo
+# -------------
+
+
+# Letter P
+p1 = np.array([[0, 6.], [0, 5], [0, 4], [0, 3], [0, 2], [0, 1], ])
+p2 = np.array([[1.5, 6], [2, 4], [2, 5], [1.5, 3], [0.5, 2], [.5, 1], ])
+
+# Letter O
+o1 = np.array([[0, 6.], [-1, 5], [-1.5, 4], [-1.5, 3], [-1, 2], [0, 1], ])
+o2 = np.array([[1, 6.], [2, 5], [2.5, 4], [2.5, 3], [2, 2], [1, 1], ])
+
+# scaling and translation for letter O
+o1[:, 0] += 6.4
+o2[:, 0] += 6.4
+o1[:, 0] *= 0.6
+o2[:, 0] *= 0.6
+
+# letter T
+t1 = np.array([[-1, 6.], [-1, 5], [0, 4], [0, 3], [0, 2], [0, 1], ])
+t2 = np.array([[1.5, 6.], [1.5, 5], [0.5, 4], [0.5, 3], [0.5, 2], [0.5, 1], ])
+
+# translatin the T
+t1[:, 0] += 7.1
+t2[:, 0] += 7.1
+
+# Cocatenate all letters
+x1 = np.concatenate((p1, o1, t1), axis=0)
+x2 = np.concatenate((p2, o2, t2), axis=0)
+
+# Horizontal and vertical scaling
+sx = 1.0
+sy = .5
+x1[:, 0] *= sx
+x1[:, 1] *= sy
+x2[:, 0] *= sx
+x2[:, 1] *= sy
+
+# %%
+# Plot the logo (clear background)
+# --------------------------------
+
+# Solve OT problem between the points
+M = ot.dist(x1, x2, metric='euclidean')
+T = ot.emd([], [], M)
+
+pl.figure(1, (3.5, 1.1))
+pl.clf()
+# plot the OT plan
+for i in range(M.shape[0]):
+    for j in range(M.shape[1]):
+        if T[i, j] > 1e-8:
+            pl.plot([x1[i, 0], x2[j, 0]], [x1[i, 1], x2[j, 1]], color='k', alpha=0.6, linewidth=3, zorder=1)
+# plot the samples
+pl.plot(x1[:, 0], x1[:, 1], 'o', markerfacecolor='C3', markeredgecolor='k')
+pl.plot(x2[:, 0], x2[:, 1], 'o', markerfacecolor='b', markeredgecolor='k')
+
+
+pl.axis('equal')
+pl.axis('off')
+
+# Save logo file
+# pl.savefig('logo.svg', dpi=150, bbox_inches='tight')
+# pl.savefig('logo.png', dpi=150, bbox_inches='tight')
+
+# %%
+# Plot the logo (dark background)
+# --------------------------------
+
+pl.figure(2, (3.5, 1.1), facecolor='darkgray')
+pl.clf()
+# plot the OT plan
+for i in range(M.shape[0]):
+    for j in range(M.shape[1]):
+        if T[i, j] > 1e-8:
+            pl.plot([x1[i, 0], x2[j, 0]], [x1[i, 1], x2[j, 1]], color='w', alpha=0.8, linewidth=3, zorder=1)
+# plot the samples
+pl.plot(x1[:, 0], x1[:, 1], 'o', markerfacecolor='w', markeredgecolor='w')
+pl.plot(x2[:, 0], x2[:, 1], 'o', markerfacecolor='w', markeredgecolor='w')
+
+pl.axis('equal')
+pl.axis('off')
+
+# Save logo file
+# pl.savefig('logo_dark.svg', dpi=150, transparent=True, bbox_inches='tight')
+# pl.savefig('logo_dark.png', dpi=150, transparent=True, bbox_inches='tight')
-- 
cgit v1.2.3


From 82452e0f5f6dae05c7a1cc384e7a1fb62ae7e0d5 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Thu, 24 Mar 2022 14:13:25 +0100
Subject: [MRG] Add factored coupling (#358)

* add gfactored ot

* pep8 and add doc

* add exmaple for factotred OT

* final number of PR

* correct test on backends

* remove useless loss

* better tests
---
 README.md                                 |   4 +-
 RELEASES.md                               |   1 +
 docs/source/all.rst                       |   1 +
 examples/others/plot_factored_coupling.py |  86 ++++++++++++++++++
 ot/__init__.py                            |   5 ++
 ot/factored.py                            | 145 ++++++++++++++++++++++++++++++
 ot/plot.py                                |   7 +-
 test/test_factored.py                     |  56 ++++++++++++
 8 files changed, 303 insertions(+), 2 deletions(-)
 create mode 100644 examples/others/plot_factored_coupling.py
 create mode 100644 ot/factored.py
 create mode 100644 test/test_factored.py

(limited to 'docs/source')

diff --git a/README.md b/README.md
index c6bfd9c..ec5d221 100644
--- a/README.md
+++ b/README.md
@@ -305,4 +305,6 @@ Conference on Machine Learning, PMLR 119:4692-4701, 2020
 [38] C. Vincent-Cuaz, T. Vayer, R. Flamary, M. Corneli, N. Courty, [Online Graph 
 Dictionary Learning](https://arxiv.org/pdf/2102.06555.pdf), International Conference on Machine Learning (ICML), 2021.
 
-[39] Gozlan, N., Roberto, C., Samson, P. M., & Tetali, P. (2017). [Kantorovich duality for general transport costs and applications](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.712.1825&rep=rep1&type=pdf). Journal of Functional Analysis, 273(11), 3327-3405.
\ No newline at end of file
+[39] Gozlan, N., Roberto, C., Samson, P. M., & Tetali, P. (2017). [Kantorovich duality for general transport costs and applications](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.712.1825&rep=rep1&type=pdf). Journal of Functional Analysis, 273(11), 3327-3405.
+
+[40] Forrow, A., Hütter, J. C., Nitzan, M., Rigollet, P., Schiebinger, G., & Weed, J. (2019, April). [Statistical optimal transport via factored couplings](http://proceedings.mlr.press/v89/forrow19a/forrow19a.pdf). In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 2454-2465). PMLR.
\ No newline at end of file
diff --git a/RELEASES.md b/RELEASES.md
index 86b401a..c2bd0d1 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -5,6 +5,7 @@
 
 #### New features
 
+- Implementation of factored OT with emd and sinkhorn (PR #358).
 - A brand new logo for POT (PR #357)
 - Better list of related examples in quick start guide with `minigallery` (PR #334).
 - Add optional log-domain Sinkhorn implementation in WDA to support smaller values
diff --git a/docs/source/all.rst b/docs/source/all.rst
index 76d2ff5..3f7d029 100644
--- a/docs/source/all.rst
+++ b/docs/source/all.rst
@@ -29,6 +29,7 @@ API and modules
    partial
    sliced
    weak
+   factored
 
 .. autosummary::
    :toctree: ../modules/generated/
diff --git a/examples/others/plot_factored_coupling.py b/examples/others/plot_factored_coupling.py
new file mode 100644
index 0000000..b5b1c9f
--- /dev/null
+++ b/examples/others/plot_factored_coupling.py
@@ -0,0 +1,86 @@
+# -*- coding: utf-8 -*-
+"""
+==========================================
+Optimal transport with factored couplings
+==========================================
+
+Illustration of the factored coupling OT between 2D empirical distributions
+
+"""
+
+# Author: Remi Flamary <remi.flamary@polytechnique.edu>
+#
+# License: MIT License
+
+# sphinx_gallery_thumbnail_number = 2
+
+import numpy as np
+import matplotlib.pylab as pl
+import ot
+import ot.plot
+
+# %%
+# Generate data an plot it
+# ------------------------
+
+# parameters and data generation
+
+np.random.seed(42)
+
+n = 100  # nb samples
+
+xs = np.random.rand(n, 2) - .5
+
+xs = xs + np.sign(xs)
+
+xt = np.random.rand(n, 2) - .5
+
+a, b = ot.unif(n), ot.unif(n)  # uniform distribution on samples
+
+#%% plot samples
+
+pl.figure(1)
+pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
+pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
+pl.legend(loc=0)
+pl.title('Source and target distributions')
+
+
+# %%
+# Compute Factore OT and exact OT solutions
+# --------------------------------------
+
+#%% EMD
+M = ot.dist(xs, xt)
+G0 = ot.emd(a, b, M)
+
+#%% factored OT OT
+
+Ga, Gb, xb = ot.factored_optimal_transport(xs, xt, a, b, r=4)
+
+
+# %%
+# Plot factored OT and exact OT solutions
+# --------------------------------------
+
+pl.figure(2, (14, 4))
+
+pl.subplot(1, 3, 1)
+ot.plot.plot2D_samples_mat(xs, xt, G0, c=[.2, .2, .2], alpha=0.1)
+pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
+pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
+pl.title('Exact OT with samples')
+
+pl.subplot(1, 3, 2)
+ot.plot.plot2D_samples_mat(xs, xb, Ga, c=[.6, .6, .9], alpha=0.5)
+ot.plot.plot2D_samples_mat(xb, xt, Gb, c=[.9, .6, .6], alpha=0.5)
+pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
+pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
+pl.plot(xb[:, 0], xb[:, 1], 'og', label='Template samples')
+pl.title('Factored OT with template samples')
+
+pl.subplot(1, 3, 3)
+ot.plot.plot2D_samples_mat(xs, xt, Ga.dot(Gb), c=[.2, .2, .2], alpha=0.1)
+pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
+pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
+pl.title('Factored OT low rank OT plan')
diff --git a/ot/__init__.py b/ot/__init__.py
index bda7a35..c5e1967 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -33,6 +33,7 @@ from . import partial
 from . import backend
 from . import regpath
 from . import weak
+from . import factored
 
 # OT functions
 from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d
@@ -44,6 +45,9 @@ from .sliced import sliced_wasserstein_distance, max_sliced_wasserstein_distance
 from .gromov import (gromov_wasserstein, gromov_wasserstein2,
                         gromov_barycenters, fused_gromov_wasserstein, fused_gromov_wasserstein2)
 from .weak import weak_optimal_transport
+from .factored import factored_optimal_transport
+
+
 # utils functions
 from .utils import dist, unif, tic, toc, toq
 
@@ -57,4 +61,5 @@ __all__ = ['emd', 'emd2', 'emd_1d', 'sinkhorn', 'sinkhorn2', 'utils',
            'sinkhorn_unbalanced2', 'sliced_wasserstein_distance',
            'gromov_wasserstein', 'gromov_wasserstein2', 'gromov_barycenters', 'fused_gromov_wasserstein', 'fused_gromov_wasserstein2',
             'max_sliced_wasserstein_distance', 'weak_optimal_transport',
+            'factored_optimal_transport', 
            'smooth', 'stochastic', 'unbalanced', 'partial', 'regpath']
diff --git a/ot/factored.py b/ot/factored.py
new file mode 100644
index 0000000..abc2445
--- /dev/null
+++ b/ot/factored.py
@@ -0,0 +1,145 @@
+"""
+Factored OT solvers (low rank, cost or OT plan)
+"""
+
+# Author: Remi Flamary <remi.flamary@polytehnique.edu>
+#
+# License: MIT License
+
+from .backend import get_backend
+from .utils import dist
+from .lp import emd
+from .bregman import sinkhorn
+
+__all__ = ['factored_optimal_transport']
+
+
+def factored_optimal_transport(Xa, Xb, a=None, b=None, reg=0.0, r=100, X0=None, stopThr=1e-7, numItermax=100, verbose=False, log=False, **kwargs):
+    r"""Solves factored OT problem and return OT plans and intermediate distribution
+
+    This function solve the following OT problem [40]_
+
+    .. math::
+        \mathop{\arg \min}_\mu \quad  W_2^2(\mu_a,\mu)+ W_2^2(\mu,\mu_b)
+
+    where :
+
+    - :math:`\mu_a` and :math:`\mu_b`  are empirical distributions.
+    - :math:`\mu` is an empirical distribution with r samples
+
+    And returns the two OT plans between
+
+    .. note:: This function is backend-compatible and will work on arrays
+        from all compatible backends. But the algorithm uses the C++ CPU backend
+        which can lead to copy overhead on GPU arrays.
+
+    Uses the conditional gradient algorithm to solve the problem proposed in
+    :ref:`[39] <references-weak>`.
+
+    Parameters
+    ----------
+    Xa : (ns,d) array-like, float
+        Source samples
+    Xb : (nt,d) array-like, float
+        Target samples
+    a : (ns,) array-like, float
+        Source histogram (uniform weight if empty list)
+    b : (nt,) array-like, float
+        Target histogram (uniform weight if empty list))
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshold on the relative variation (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+
+    Returns
+    -------
+    Ga: array-like, shape (ns, r)
+        Optimal transportation matrix between source and the intermediate
+        distribution
+    Gb: array-like, shape (r, nt)
+        Optimal transportation matrix between the intermediate and target
+        distribution
+    X: array-like, shape (r, d)
+        Support of the intermediate distribution
+    log: dict, optional
+        If input log is true, a dictionary containing the cost and dual
+        variables and exit status
+
+
+    .. _references-factored:
+    References
+    ----------
+    .. [40] Forrow, A., Hütter, J. C., Nitzan, M., Rigollet, P., Schiebinger,
+        G., & Weed, J. (2019, April). Statistical optimal transport via factored
+        couplings. In The 22nd International Conference on Artificial
+        Intelligence and Statistics (pp. 2454-2465). PMLR.
+
+    See Also
+    --------
+    ot.bregman.sinkhorn : Entropic regularized OT ot.optim.cg : General
+    regularized OT
+    """
+
+    nx = get_backend(Xa, Xb)
+
+    n_a = Xa.shape[0]
+    n_b = Xb.shape[0]
+    d = Xa.shape[1]
+
+    if a is None:
+        a = nx.ones((n_a), type_as=Xa) / n_a
+    if b is None:
+        b = nx.ones((n_b), type_as=Xb) / n_b
+
+    if X0 is None:
+        X = nx.randn(r, d, type_as=Xa)
+    else:
+        X = X0
+
+    w = nx.ones(r, type_as=Xa) / r
+
+    def solve_ot(X1, X2, w1, w2):
+        M = dist(X1, X2)
+        if reg > 0:
+            G, log = sinkhorn(w1, w2, M, reg, log=True, **kwargs)
+            log['cost'] = nx.sum(G * M)
+            return G, log
+        else:
+            return emd(w1, w2, M, log=True, **kwargs)
+
+    norm_delta = []
+
+    # solve the barycenter
+    for i in range(numItermax):
+
+        old_X = X
+
+        # solve OT with template
+        Ga, loga = solve_ot(Xa, X, a, w)
+        Gb, logb = solve_ot(X, Xb, w, b)
+
+        X = 0.5 * (nx.dot(Ga.T, Xa) + nx.dot(Gb, Xb)) * r
+
+        delta = nx.norm(X - old_X)
+        if delta < stopThr:
+            break
+        if log:
+            norm_delta.append(delta)
+
+    if log:
+        log_dic = {'delta_iter': norm_delta,
+                   'ua': loga['u'],
+                   'va': loga['v'],
+                   'ub': logb['u'],
+                   'vb': logb['v'],
+                   'costa': loga['cost'],
+                   'costb': logb['cost'],
+                   }
+        return Ga, Gb, X, log_dic
+
+    return Ga, Gb, X
diff --git a/ot/plot.py b/ot/plot.py
index 2208c90..8ade2eb 100644
--- a/ot/plot.py
+++ b/ot/plot.py
@@ -85,8 +85,13 @@ def plot2D_samples_mat(xs, xt, G, thr=1e-8, **kwargs):
     if ('color' not in kwargs) and ('c' not in kwargs):
         kwargs['color'] = 'k'
     mx = G.max()
+    if 'alpha' in kwargs:
+        scale = kwargs['alpha']
+        del kwargs['alpha']
+    else:
+        scale = 1
     for i in range(xs.shape[0]):
         for j in range(xt.shape[0]):
             if G[i, j] / mx > thr:
                 pl.plot([xs[i, 0], xt[j, 0]], [xs[i, 1], xt[j, 1]],
-                        alpha=G[i, j] / mx, **kwargs)
+                        alpha=G[i, j] / mx * scale, **kwargs)
diff --git a/test/test_factored.py b/test/test_factored.py
new file mode 100644
index 0000000..fd2fd01
--- /dev/null
+++ b/test/test_factored.py
@@ -0,0 +1,56 @@
+"""Tests for main module ot.weak """
+
+# Author: Remi Flamary <remi.flamary@unice.fr>
+#
+# License: MIT License
+
+import ot
+import numpy as np
+
+
+def test_factored_ot():
+    # test weak ot solver and identity stationary point
+    n = 50
+    rng = np.random.RandomState(0)
+
+    xs = rng.randn(n, 2)
+    xt = rng.randn(n, 2)
+    u = ot.utils.unif(n)
+
+    Ga, Gb, X, log = ot.factored_optimal_transport(xs, xt, u, u, r=10, log=True)
+
+    # check constraints
+    np.testing.assert_allclose(u, Ga.sum(1))
+    np.testing.assert_allclose(u, Gb.sum(0))
+
+    Ga, Gb, X, log = ot.factored_optimal_transport(xs, xt, u, u, reg=1, r=10, log=True)
+
+    # check constraints
+    np.testing.assert_allclose(u, Ga.sum(1))
+    np.testing.assert_allclose(u, Gb.sum(0))
+
+
+def test_factored_ot_backends(nx):
+    # test weak ot solver for different backends
+    n = 50
+    rng = np.random.RandomState(0)
+
+    xs = rng.randn(n, 2)
+    xt = rng.randn(n, 2)
+    u = ot.utils.unif(n)
+
+    xs2 = nx.from_numpy(xs)
+    xt2 = nx.from_numpy(xt)
+    u2 = nx.from_numpy(u)
+
+    Ga2, Gb2, X2 = ot.factored_optimal_transport(xs2, xt2, u2, u2, r=10)
+
+    # check constraints
+    np.testing.assert_allclose(u, nx.to_numpy(Ga2).sum(1))
+    np.testing.assert_allclose(u, nx.to_numpy(Gb2).sum(0))
+
+    Ga2, Gb2, X2 = ot.factored_optimal_transport(xs2, xt2, reg=1, r=10, X0=X2)
+
+    # check constraints
+    np.testing.assert_allclose(u, nx.to_numpy(Ga2).sum(1))
+    np.testing.assert_allclose(u, nx.to_numpy(Gb2).sum(0))
-- 
cgit v1.2.3


From ad02112d4288f3efdd5bc6fc6e45444313bba871 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Tue, 5 Apr 2022 11:57:10 +0200
Subject: [MRG] Update examples in the doc (#359)

* add transparent color logo

* add transparent color logo

* move screenkhorn

* move stochastic and install ffmpeg on circleci

* try something

* add sudo

* install ffmpeg before python

* cleanup examples

* test svg scrapper

* add animation for reg path

* better example OT sivergence

* update ttles and add plots

* update free support

* proper figure indexes

* have less frame sin animation

* update readme and release file

* add tests for python 3.10
---
 .circleci/config.yml                               |   7 +
 .github/workflows/build_tests.yml                  |   6 +-
 README.md                                          |  10 +-
 RELEASES.md                                        |   3 +-
 docs/source/_static/images/logo.png                | Bin 5038 -> 4325 bytes
 docs/source/_static/images/logo.svg                | 174 +++++++++----------
 docs/source/conf.py                                |   6 +
 .../backends/plot_sliced_wass_grad_flow_pytorch.py |   2 +
 examples/backends/plot_wass1d_torch.py             |   8 +-
 .../barycenters/plot_free_support_barycenter.py    |  55 +++---
 examples/others/plot_logo.py                       |   8 +-
 examples/others/plot_screenkhorn_1D.py             |  71 ++++++++
 examples/others/plot_stochastic.py                 | 189 +++++++++++++++++++++
 examples/plot_OT_1D.py                             |  12 +-
 examples/plot_OT_1D_smooth.py                      |   6 +-
 examples/plot_OT_2D_samples.py                     |   2 +-
 examples/plot_OT_L1_vs_L2.py                       |  32 ++--
 examples/plot_compute_emd.py                       |  72 +++++---
 examples/plot_optim_OTreg.py                       |  38 ++++-
 examples/plot_screenkhorn_1D.py                    |  71 --------
 examples/plot_stochastic.py                        | 189 ---------------------
 examples/sliced-wasserstein/README.txt             |   2 +-
 examples/sliced-wasserstein/plot_variance.py       |   8 +-
 examples/unbalanced-partial/plot_UOT_1D.py         |  17 +-
 examples/unbalanced-partial/plot_regpath.py        |  88 +++++++++-
 25 files changed, 628 insertions(+), 448 deletions(-)
 create mode 100644 examples/others/plot_screenkhorn_1D.py
 create mode 100644 examples/others/plot_stochastic.py
 delete mode 100644 examples/plot_screenkhorn_1D.py
 delete mode 100644 examples/plot_stochastic.py

(limited to 'docs/source')

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 39c19fb..77ab45c 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -35,6 +35,12 @@ jobs:
               - data-cache-0
               - pip-cache
 
+        - run:
+            name: Install ffmpeg
+            command: |
+              sudo apt update 
+              sudo apt install ffmpeg
+
         - run:
             name: Get Python running
             command: |
@@ -50,6 +56,7 @@ jobs:
             paths:
               - ~/.cache/pip
 
+
         # Look at what we have and fail early if there is some library conflict
         - run:
             name: Check installation
diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml
index 3c99da8..ce725c6 100644
--- a/.github/workflows/build_tests.yml
+++ b/.github/workflows/build_tests.yml
@@ -22,7 +22,7 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        python-version: ["3.7", "3.8", "3.9"]
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
 
     steps:
     - uses: actions/checkout@v1
@@ -93,7 +93,7 @@ jobs:
      strategy:
        max-parallel: 4
        matrix:
-         python-version: ["3.7", "3.8", "3.9"]
+         python-version: ["3.7", "3.8", "3.9", "3.10"]
 
      steps:
      - uses: actions/checkout@v1
@@ -120,7 +120,7 @@ jobs:
     strategy:
       max-parallel: 4
       matrix:
-        python-version: ["3.7", "3.8", "3.9"]
+        python-version: ["3.7", "3.8", "3.9", "3.10"]
 
     steps:
     - uses: actions/checkout@v1
diff --git a/README.md b/README.md
index ec5d221..0c3bd19 100644
--- a/README.md
+++ b/README.md
@@ -29,8 +29,11 @@ POT provides the following generic OT solvers (links to examples):
 * Non regularized [Wasserstein barycenters [16] ](https://pythonot.github.io/auto_examples/barycenters/plot_barycenter_lp_vs_entropic.html)) with LP solver (only small scale).
 * [Gromov-Wasserstein distances](https://pythonot.github.io/auto_examples/gromov/plot_gromov.html) and [GW barycenters](https://pythonot.github.io/auto_examples/gromov/plot_gromov_barycenter.html)  (exact [13] and regularized [12]), differentiable using gradients from
  * [Fused-Gromov-Wasserstein distances solver](https://pythonot.github.io/auto_examples/gromov/plot_fgw.html#sphx-glr-auto-examples-plot-fgw-py) and [FGW barycenters](https://pythonot.github.io/auto_examples/gromov/plot_barycenter_fgw.html) [24]
-* [Stochastic solver](https://pythonot.github.io/auto_examples/plot_stochastic.html) for Large-scale Optimal Transport (semi-dual problem [18] and dual problem [19])
-* [Stochastic solver of Gromov Wasserstein](https://pythonot.github.io/auto_examples/gromov/plot_gromov.html) for large-scale problem with any loss functions [33]
+* [Stochastic
+  solver](https://pythonot.github.io/auto_examples/others/plot_stochastic.html) and
+  [differentiable losses](https://pythonot.github.io/auto_examples/backends/plot_stoch_continuous_ot_pytorch.html) for
+  Large-scale Optimal Transport (semi-dual problem [18] and dual problem [19])
+* [Sampled solver of Gromov Wasserstein](https://pythonot.github.io/auto_examples/gromov/plot_gromov.html) for large-scale problem with any loss functions [33]
 * Non regularized [free support Wasserstein barycenters](https://pythonot.github.io/auto_examples/barycenters/plot_free_support_barycenter.html) [20].
 * [Unbalanced OT](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_UOT_1D.html) with KL relaxation and [barycenter](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_UOT_barycenter_1D.html) [10, 25].
 * [Partial Wasserstein and Gromov-Wasserstein](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_partial_wass_and_gromov.html) (exact [29] and entropic [3]
@@ -119,7 +122,7 @@ Note that for easier access the module is named `ot` instead of `pot`.
 
 ### Dependencies
 
-Some sub-modules require additional dependences which are discussed below
+Some sub-modules require additional dependencies which are discussed below
 
 * **ot.dr** (Wasserstein dimensionality reduction) depends on autograd and pymanopt that can be installed with:
 
@@ -127,7 +130,6 @@ Some sub-modules require additional dependences which are discussed below
 pip install pymanopt autograd
 ```
 
-* **ot.gpu** (GPU accelerated OT) depends on cupy that have to be installed following instructions on [this page](https://docs-cupy.chainer.org/en/stable/install.html). Obviously you will need CUDA installed and a compatible GPU. Note that this module is deprecated since version 0.8 and will be deleted in the future. GPU is now handled automatically through the backends and several solver already can run on GPU using the Pytorch backend.
 
 ## Examples
 
diff --git a/RELEASES.md b/RELEASES.md
index 45336f7..7d458f3 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -5,6 +5,7 @@
 
 #### New features
 
+- Update examples in the gallery (PR #359).
 - Add stochastic loss and OT plan computation for regularized OT and 
   backend examples(PR #360).
 - Implementation of factored OT with emd and sinkhorn (PR #358).
@@ -254,7 +255,7 @@ are coming for the next versions.
 
 #### Closed issues
 
-- Add JMLR paper to teh readme ad Mathieu Blondel to the Acknoledgments (PR
+- Add JMLR paper to the readme and Mathieu Blondel to the Acknoledgments (PR
   #231, #232)
 - Bug in Unbalanced OT example (Issue #127)
 - Clean Cython output when calling setup.py clean (Issue #122)
diff --git a/docs/source/_static/images/logo.png b/docs/source/_static/images/logo.png
index 7be5df7..2dd6f65 100644
Binary files a/docs/source/_static/images/logo.png and b/docs/source/_static/images/logo.png differ
diff --git a/docs/source/_static/images/logo.svg b/docs/source/_static/images/logo.svg
index 0bf2cb7..39fe900 100644
--- a/docs/source/_static/images/logo.svg
+++ b/docs/source/_static/images/logo.svg
@@ -1,24 +1,23 @@
 <?xml version="1.0" encoding="utf-8" standalone="no"?>
 <!DOCTYPE svg PUBLIC "-//W3C//DTD SVG 1.1//EN"
   "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
-<!-- Created with matplotlib (https://matplotlib.org/) -->
-<svg height="75.384pt" version="1.1" viewBox="0 0 209.7 75.384" width="209.7pt" xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink">
+<svg xmlns:xlink="http://www.w3.org/1999/xlink" width="209.7pt" height="75.384pt" viewBox="0 0 209.7 75.384" xmlns="http://www.w3.org/2000/svg" version="1.1">
  <metadata>
-  <rdf:RDF xmlns:cc="http://creativecommons.org/ns#" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+  <rdf:RDF xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:cc="http://creativecommons.org/ns#" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
    <cc:Work>
     <dc:type rdf:resource="http://purl.org/dc/dcmitype/StillImage"/>
-    <dc:date>2022-03-17T17:25:30.736761</dc:date>
+    <dc:date>2022-03-30T17:25:32.476826</dc:date>
     <dc:format>image/svg+xml</dc:format>
     <dc:creator>
      <cc:Agent>
-      <dc:title>Matplotlib v3.3.3, https://matplotlib.org/</dc:title>
+      <dc:title>Matplotlib v3.5.1, https://matplotlib.org/</dc:title>
      </cc:Agent>
     </dc:creator>
    </cc:Work>
   </rdf:RDF>
  </metadata>
  <defs>
-  <style type="text/css">*{stroke-linecap:butt;stroke-linejoin:round;}</style>
+  <style type="text/css">*{stroke-linejoin: round; stroke-linecap: butt}</style>
  </defs>
  <g id="figure_1">
   <g id="patch_1">
@@ -26,103 +25,104 @@
 L 209.7 75.384 
 L 209.7 0 
 L 0 0 
+L 0 75.384 
 z
-" style="fill:#ffffff;"/>
+" style="fill: none"/>
   </g>
   <g id="axes_1">
    <g id="line2d_1">
-    <path clip-path="url(#pafc522611b)" d="M 16.077273 11.885975 
+    <path d="M 16.077273 11.885975 
 L 47.044503 11.885975 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_2">
-    <path clip-path="url(#pafc522611b)" d="M 16.077273 22.208385 
+    <path d="M 16.077273 22.208385 
 L 57.366913 22.208385 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_3">
-    <path clip-path="url(#pafc522611b)" d="M 16.077273 32.530795 
+    <path d="M 16.077273 32.530795 
 L 57.366913 32.530795 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_4">
-    <path clip-path="url(#pafc522611b)" d="M 16.077273 42.853205 
+    <path d="M 16.077273 42.853205 
 L 47.044503 42.853205 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_5">
-    <path clip-path="url(#pafc522611b)" d="M 16.077273 53.175615 
+    <path d="M 16.077273 53.175615 
 L 26.399683 53.175615 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_6">
-    <path clip-path="url(#pafc522611b)" d="M 16.077273 63.498025 
+    <path d="M 16.077273 63.498025 
 L 26.399683 63.498025 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_7">
-    <path clip-path="url(#pafc522611b)" d="M 95.353383 11.885975 
+    <path d="M 95.353383 11.885975 
 L 107.740275 11.885975 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_8">
-    <path clip-path="url(#pafc522611b)" d="M 82.96649 22.208385 
+    <path d="M 82.96649 22.208385 
 L 120.127167 22.208385 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_9">
-    <path clip-path="url(#pafc522611b)" d="M 76.773044 32.530795 
+    <path d="M 76.773044 32.530795 
 L 126.320613 32.530795 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_10">
-    <path clip-path="url(#pafc522611b)" d="M 76.773044 42.853205 
+    <path d="M 76.773044 42.853205 
 L 126.320613 42.853205 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_11">
-    <path clip-path="url(#pafc522611b)" d="M 82.96649 53.175615 
+    <path d="M 82.96649 53.175615 
 L 120.127167 53.175615 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_12">
-    <path clip-path="url(#pafc522611b)" d="M 95.353383 63.498025 
+    <path d="M 95.353383 63.498025 
 L 107.740275 63.498025 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_13">
-    <path clip-path="url(#pafc522611b)" d="M 142.010677 11.885975 
+    <path d="M 142.010677 11.885975 
 L 193.622727 11.885975 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_14">
-    <path clip-path="url(#pafc522611b)" d="M 142.010677 22.208385 
+    <path d="M 142.010677 22.208385 
 L 193.622727 22.208385 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_15">
-    <path clip-path="url(#pafc522611b)" d="M 162.655497 32.530795 
+    <path d="M 162.655497 32.530795 
 L 172.977907 32.530795 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_16">
-    <path clip-path="url(#pafc522611b)" d="M 162.655497 42.853205 
+    <path d="M 162.655497 42.853205 
 L 172.977907 42.853205 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_17">
-    <path clip-path="url(#pafc522611b)" d="M 162.655497 53.175615 
+    <path d="M 162.655497 53.175615 
 L 172.977907 53.175615 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_18">
-    <path clip-path="url(#pafc522611b)" d="M 162.655497 63.498025 
+    <path d="M 162.655497 63.498025 
 L 172.977907 63.498025 
-" style="fill:none;stroke:#000000;stroke-linecap:square;stroke-opacity:0.6;stroke-width:3;"/>
+" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/>
    </g>
    <g id="line2d_19">
     <defs>
-     <path d="M 0 3 
+     <path id="m5ead2df136" d="M 0 3 
 C 0.795609 3 1.55874 2.683901 2.12132 2.12132 
 C 2.683901 1.55874 3 0.795609 3 0 
 C 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132 
@@ -132,32 +132,32 @@ C -2.683901 -1.55874 -3 -0.795609 -3 0
 C -3 0.795609 -2.683901 1.55874 -2.12132 2.12132 
 C -1.55874 2.683901 -0.795609 3 0 3 
 z
-" id="m45a57d22c6" style="stroke:#000000;"/>
+" style="stroke: #000000"/>
     </defs>
-    <g clip-path="url(#pafc522611b)">
-     <use style="fill:#d62728;stroke:#000000;" x="16.077273" xlink:href="#m45a57d22c6" y="11.885975"/>
-     <use style="fill:#d62728;stroke:#000000;" x="16.077273" xlink:href="#m45a57d22c6" y="22.208385"/>
-     <use style="fill:#d62728;stroke:#000000;" x="16.077273" xlink:href="#m45a57d22c6" y="32.530795"/>
-     <use style="fill:#d62728;stroke:#000000;" x="16.077273" xlink:href="#m45a57d22c6" y="42.853205"/>
-     <use style="fill:#d62728;stroke:#000000;" x="16.077273" xlink:href="#m45a57d22c6" y="53.175615"/>
-     <use style="fill:#d62728;stroke:#000000;" x="16.077273" xlink:href="#m45a57d22c6" y="63.498025"/>
-     <use style="fill:#d62728;stroke:#000000;" x="95.353383" xlink:href="#m45a57d22c6" y="11.885975"/>
-     <use style="fill:#d62728;stroke:#000000;" x="82.96649" xlink:href="#m45a57d22c6" y="22.208385"/>
-     <use style="fill:#d62728;stroke:#000000;" x="76.773044" xlink:href="#m45a57d22c6" y="32.530795"/>
-     <use style="fill:#d62728;stroke:#000000;" x="76.773044" xlink:href="#m45a57d22c6" y="42.853205"/>
-     <use style="fill:#d62728;stroke:#000000;" x="82.96649" xlink:href="#m45a57d22c6" y="53.175615"/>
-     <use style="fill:#d62728;stroke:#000000;" x="95.353383" xlink:href="#m45a57d22c6" y="63.498025"/>
-     <use style="fill:#d62728;stroke:#000000;" x="142.010677" xlink:href="#m45a57d22c6" y="11.885975"/>
-     <use style="fill:#d62728;stroke:#000000;" x="142.010677" xlink:href="#m45a57d22c6" y="22.208385"/>
-     <use style="fill:#d62728;stroke:#000000;" x="162.655497" xlink:href="#m45a57d22c6" y="32.530795"/>
-     <use style="fill:#d62728;stroke:#000000;" x="162.655497" xlink:href="#m45a57d22c6" y="42.853205"/>
-     <use style="fill:#d62728;stroke:#000000;" x="162.655497" xlink:href="#m45a57d22c6" y="53.175615"/>
-     <use style="fill:#d62728;stroke:#000000;" x="162.655497" xlink:href="#m45a57d22c6" y="63.498025"/>
+    <g clip-path="url(#p367fff45ba)">
+     <use xlink:href="#m5ead2df136" x="16.077273" y="11.885975" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="16.077273" y="22.208385" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="16.077273" y="32.530795" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="16.077273" y="42.853205" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="16.077273" y="53.175615" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="16.077273" y="63.498025" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="95.353383" y="11.885975" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="82.96649" y="22.208385" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="76.773044" y="32.530795" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="76.773044" y="42.853205" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="82.96649" y="53.175615" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="95.353383" y="63.498025" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="142.010677" y="11.885975" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="142.010677" y="22.208385" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="162.655497" y="32.530795" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="162.655497" y="42.853205" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="162.655497" y="53.175615" style="fill: #d62728; stroke: #000000"/>
+     <use xlink:href="#m5ead2df136" x="162.655497" y="63.498025" style="fill: #d62728; stroke: #000000"/>
     </g>
    </g>
    <g id="line2d_20">
     <defs>
-     <path d="M 0 3 
+     <path id="m39fe4d1791" d="M 0 3 
 C 0.795609 3 1.55874 2.683901 2.12132 2.12132 
 C 2.683901 1.55874 3 0.795609 3 0 
 C 3 -0.795609 2.683901 -1.55874 2.12132 -2.12132 
@@ -167,34 +167,34 @@ C -2.683901 -1.55874 -3 -0.795609 -3 0
 C -3 0.795609 -2.683901 1.55874 -2.12132 2.12132 
 C -1.55874 2.683901 -0.795609 3 0 3 
 z
-" id="mb6ec6fa556" style="stroke:#000000;"/>
+" style="stroke: #000000"/>
     </defs>
-    <g clip-path="url(#pafc522611b)">
-     <use style="fill:#0000ff;stroke:#000000;" x="47.044503" xlink:href="#mb6ec6fa556" y="11.885975"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="57.366913" xlink:href="#mb6ec6fa556" y="32.530795"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="57.366913" xlink:href="#mb6ec6fa556" y="22.208385"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="47.044503" xlink:href="#mb6ec6fa556" y="42.853205"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="26.399683" xlink:href="#mb6ec6fa556" y="53.175615"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="26.399683" xlink:href="#mb6ec6fa556" y="63.498025"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="107.740275" xlink:href="#mb6ec6fa556" y="11.885975"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="120.127167" xlink:href="#mb6ec6fa556" y="22.208385"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="126.320613" xlink:href="#mb6ec6fa556" y="32.530795"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="126.320613" xlink:href="#mb6ec6fa556" y="42.853205"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="120.127167" xlink:href="#mb6ec6fa556" y="53.175615"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="107.740275" xlink:href="#mb6ec6fa556" y="63.498025"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="193.622727" xlink:href="#mb6ec6fa556" y="11.885975"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="193.622727" xlink:href="#mb6ec6fa556" y="22.208385"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="172.977907" xlink:href="#mb6ec6fa556" y="32.530795"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="172.977907" xlink:href="#mb6ec6fa556" y="42.853205"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="172.977907" xlink:href="#mb6ec6fa556" y="53.175615"/>
-     <use style="fill:#0000ff;stroke:#000000;" x="172.977907" xlink:href="#mb6ec6fa556" y="63.498025"/>
+    <g clip-path="url(#p367fff45ba)">
+     <use xlink:href="#m39fe4d1791" x="47.044503" y="11.885975" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="57.366913" y="32.530795" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="57.366913" y="22.208385" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="47.044503" y="42.853205" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="26.399683" y="53.175615" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="26.399683" y="63.498025" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="107.740275" y="11.885975" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="120.127167" y="22.208385" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="126.320613" y="32.530795" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="126.320613" y="42.853205" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="120.127167" y="53.175615" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="107.740275" y="63.498025" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="193.622727" y="11.885975" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="193.622727" y="22.208385" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="172.977907" y="32.530795" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="172.977907" y="42.853205" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="172.977907" y="53.175615" style="fill: #0000ff; stroke: #000000"/>
+     <use xlink:href="#m39fe4d1791" x="172.977907" y="63.498025" style="fill: #0000ff; stroke: #000000"/>
     </g>
    </g>
   </g>
  </g>
  <defs>
-  <clipPath id="pafc522611b">
-   <rect height="60.984" width="195.3" x="7.2" y="7.2"/>
+  <clipPath id="p367fff45ba">
+   <rect x="7.2" y="7.2" width="195.3" height="60.984"/>
   </clipPath>
  </defs>
 </svg>
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 60d0bb7..9526518 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -17,9 +17,15 @@ import os
 import re
 try:
     import sphinx_gallery
+
 except ImportError:
     print("warning sphinx-gallery not installed")
 
+
+    
+
+
+
 # !!!! allow readthedoc compilation
 try:
     from unittest.mock import MagicMock
diff --git a/examples/backends/plot_sliced_wass_grad_flow_pytorch.py b/examples/backends/plot_sliced_wass_grad_flow_pytorch.py
index 05b9952..cf5d64d 100644
--- a/examples/backends/plot_sliced_wass_grad_flow_pytorch.py
+++ b/examples/backends/plot_sliced_wass_grad_flow_pytorch.py
@@ -27,6 +27,8 @@ Machine Learning (pp. 4104-4113). PMLR.
 #
 # License: MIT License
 
+# sphinx_gallery_thumbnail_number = 4
+
 
 # %%
 # Loading the data
diff --git a/examples/backends/plot_wass1d_torch.py b/examples/backends/plot_wass1d_torch.py
index 0abdd6d..cd8e2fd 100644
--- a/examples/backends/plot_wass1d_torch.py
+++ b/examples/backends/plot_wass1d_torch.py
@@ -1,9 +1,9 @@
 r"""
-=================================
-Wasserstein 1D with PyTorch
-=================================
+=================================================
+Wasserstein 1D (flow and barycenter) with PyTorch
+=================================================
 
-In this small example, we consider the following minization problem:
+In this small example, we consider the following minimization problem:
 
 .. math::
   \mu^* = \min_\mu W(\mu,\nu)
diff --git a/examples/barycenters/plot_free_support_barycenter.py b/examples/barycenters/plot_free_support_barycenter.py
index 2d68a39..226dfeb 100644
--- a/examples/barycenters/plot_free_support_barycenter.py
+++ b/examples/barycenters/plot_free_support_barycenter.py
@@ -9,61 +9,62 @@ sum of diracs.
 
 """
 
-# Author: Vivien Seguy <vivien.seguy@iip.ist.i.kyoto-u.ac.jp>
+# Authors: Vivien Seguy <vivien.seguy@iip.ist.i.kyoto-u.ac.jp>
+#          Rémi Flamary <remi.flamary@polytechnique.edu>
 #
 # License: MIT License
 
+# sphinx_gallery_thumbnail_number = 2
+
 import numpy as np
 import matplotlib.pylab as pl
 import ot
 
 
-##############################################################################
+# %%
 # Generate data
 # -------------
 
-N = 3
+N = 2
 d = 2
-measures_locations = []
-measures_weights = []
-
-for i in range(N):
 
-    n_i = np.random.randint(low=1, high=20)  # nb samples
+I1 = pl.imread('../../data/redcross.png').astype(np.float64)[::4, ::4, 2]
+I2 = pl.imread('../../data/duck.png').astype(np.float64)[::4, ::4, 2]
 
-    mu_i = np.random.normal(0., 4., (d,))  # Gaussian mean
+sz = I2.shape[0]
+XX, YY = np.meshgrid(np.arange(sz), np.arange(sz))
 
-    A_i = np.random.rand(d, d)
-    cov_i = np.dot(A_i, A_i.transpose())  # Gaussian covariance matrix
+x1 = np.stack((XX[I1 == 0], YY[I1 == 0]), 1) * 1.0
+x2 = np.stack((XX[I2 == 0] + 80, -YY[I2 == 0] + 32), 1) * 1.0
+x3 = np.stack((XX[I2 == 0], -YY[I2 == 0] + 32), 1) * 1.0
 
-    x_i = ot.datasets.make_2D_samples_gauss(n_i, mu_i, cov_i)  # Dirac locations
-    b_i = np.random.uniform(0., 1., (n_i,))
-    b_i = b_i / np.sum(b_i)  # Dirac weights
+measures_locations = [x1, x2]
+measures_weights = [ot.unif(x1.shape[0]), ot.unif(x2.shape[0])]
 
-    measures_locations.append(x_i)
-    measures_weights.append(b_i)
+pl.figure(1, (12, 4))
+pl.scatter(x1[:, 0], x1[:, 1], alpha=0.5)
+pl.scatter(x2[:, 0], x2[:, 1], alpha=0.5)
+pl.title('Distributions')
 
 
-##############################################################################
+# %%
 # Compute free support barycenter
 # -------------------------------
 
-k = 10  # number of Diracs of the barycenter
+k = 200  # number of Diracs of the barycenter
 X_init = np.random.normal(0., 1., (k, d))  # initial Dirac locations
 b = np.ones((k,)) / k  # weights of the barycenter (it will not be optimized, only the locations are optimized)
 
 X = ot.lp.free_support_barycenter(measures_locations, measures_weights, X_init, b)
 
-
-##############################################################################
-# Plot data
+# %%
+# Plot the barycenter
 # ---------
 
-pl.figure(1)
-for (x_i, b_i) in zip(measures_locations, measures_weights):
-    color = np.random.randint(low=1, high=10 * N)
-    pl.scatter(x_i[:, 0], x_i[:, 1], s=b_i * 1000, label='input measure')
-pl.scatter(X[:, 0], X[:, 1], s=b * 1000, c='black', marker='^', label='2-Wasserstein barycenter')
+pl.figure(2, (8, 3))
+pl.scatter(x1[:, 0], x1[:, 1], alpha=0.5)
+pl.scatter(x2[:, 0], x2[:, 1], alpha=0.5)
+pl.scatter(X[:, 0], X[:, 1], s=b * 1000, marker='s', label='2-Wasserstein barycenter')
 pl.title('Data measures and their barycenter')
-pl.legend(loc=0)
+pl.legend(loc="lower right")
 pl.show()
diff --git a/examples/others/plot_logo.py b/examples/others/plot_logo.py
index afddcad..9414371 100644
--- a/examples/others/plot_logo.py
+++ b/examples/others/plot_logo.py
@@ -7,8 +7,8 @@ Logo of the POT toolbox
 
 In this example we plot the logo of the POT toolbox.
 
-A specificity of this logo is that it is done 100% in Python and generated using
-matplotlib using the EMD solver from POT.
+This logo is that it is done 100% in Python and generated using
+matplotlib and ploting teh solution of the EMD solver from POT.
 
 """
 
@@ -86,8 +86,8 @@ pl.axis('equal')
 pl.axis('off')
 
 # Save logo file
-# pl.savefig('logo.svg', dpi=150, bbox_inches='tight')
-# pl.savefig('logo.png', dpi=150, bbox_inches='tight')
+# pl.savefig('logo.svg', dpi=150, transparent=True, bbox_inches='tight')
+# pl.savefig('logo.png', dpi=150, transparent=True, bbox_inches='tight')
 
 # %%
 # Plot the logo (dark background)
diff --git a/examples/others/plot_screenkhorn_1D.py b/examples/others/plot_screenkhorn_1D.py
new file mode 100644
index 0000000..2023649
--- /dev/null
+++ b/examples/others/plot_screenkhorn_1D.py
@@ -0,0 +1,71 @@
+# -*- coding: utf-8 -*-
+"""
+========================================
+Screened optimal transport (Screenkhorn)
+========================================
+
+This example illustrates the computation of Screenkhorn [26].
+
+[26] Alaya M. Z., Bérar M., Gasso G., Rakotomamonjy A. (2019).
+Screening Sinkhorn Algorithm for Regularized Optimal Transport,
+Advances in Neural Information Processing Systems 33 (NeurIPS).
+"""
+
+# Author: Mokhtar Z. Alaya <mokhtarzahdi.alaya@gmail.com>
+#
+# License: MIT License
+
+import numpy as np
+import matplotlib.pylab as pl
+import ot.plot
+from ot.datasets import make_1D_gauss as gauss
+from ot.bregman import screenkhorn
+
+##############################################################################
+# Generate data
+# -------------
+
+#%% parameters
+
+n = 100  # nb bins
+
+# bin positions
+x = np.arange(n, dtype=np.float64)
+
+# Gaussian distributions
+a = gauss(n, m=20, s=5)  # m= mean, s= std
+b = gauss(n, m=60, s=10)
+
+# loss matrix
+M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)))
+M /= M.max()
+
+##############################################################################
+# Plot distributions and loss matrix
+# ----------------------------------
+
+#%% plot the distributions
+
+pl.figure(1, figsize=(6.4, 3))
+pl.plot(x, a, 'b', label='Source distribution')
+pl.plot(x, b, 'r', label='Target distribution')
+pl.legend()
+
+# plot distributions and loss matrix
+
+pl.figure(2, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, M, 'Cost matrix M')
+
+##############################################################################
+# Solve Screenkhorn
+# -----------------------
+
+# Screenkhorn
+lambd = 2e-03  # entropy parameter
+ns_budget = 30  # budget number of points to be keeped in the source distribution
+nt_budget = 30  # budget number of points to be keeped in the target distribution
+
+G_screen = screenkhorn(a, b, M, lambd, ns_budget, nt_budget, uniform=False, restricted=True, verbose=True)
+pl.figure(4, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, G_screen, 'OT matrix Screenkhorn')
+pl.show()
diff --git a/examples/others/plot_stochastic.py b/examples/others/plot_stochastic.py
new file mode 100644
index 0000000..3a1ef31
--- /dev/null
+++ b/examples/others/plot_stochastic.py
@@ -0,0 +1,189 @@
+"""
+===================
+Stochastic examples
+===================
+
+This example is designed to show how to use the stochatic optimization
+algorithms for discrete and semi-continuous measures from the POT library.
+
+[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F.
+Stochastic Optimization for Large-scale Optimal Transport.
+Advances in Neural Information Processing Systems (2016).
+
+[19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A. &
+Blondel, M. Large-scale Optimal Transport and Mapping Estimation.
+International Conference on Learning Representation (2018)
+
+"""
+
+# Author: Kilian Fatras <kilian.fatras@gmail.com>
+#
+# License: MIT License
+
+import matplotlib.pylab as pl
+import numpy as np
+import ot
+import ot.plot
+
+
+#############################################################################
+# Compute the Transportation Matrix for the Semi-Dual Problem
+# -----------------------------------------------------------
+#
+# Discrete case
+# `````````````
+#
+# Sample two discrete measures for the discrete case and compute their cost
+# matrix c.
+
+n_source = 7
+n_target = 4
+reg = 1
+numItermax = 1000
+
+a = ot.utils.unif(n_source)
+b = ot.utils.unif(n_target)
+
+rng = np.random.RandomState(0)
+X_source = rng.randn(n_source, 2)
+Y_target = rng.randn(n_target, 2)
+M = ot.dist(X_source, Y_target)
+
+#############################################################################
+# Call the "SAG" method to find the transportation matrix in the discrete case
+
+method = "SAG"
+sag_pi = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method,
+                                                numItermax)
+print(sag_pi)
+
+#############################################################################
+# Semi-Continuous Case
+# ````````````````````
+#
+# Sample one general measure a, one discrete measures b for the semicontinous
+# case, the points where source and target measures are defined and compute the
+# cost matrix.
+
+n_source = 7
+n_target = 4
+reg = 1
+numItermax = 1000
+log = True
+
+a = ot.utils.unif(n_source)
+b = ot.utils.unif(n_target)
+
+rng = np.random.RandomState(0)
+X_source = rng.randn(n_source, 2)
+Y_target = rng.randn(n_target, 2)
+M = ot.dist(X_source, Y_target)
+
+#############################################################################
+# Call the "ASGD" method to find the transportation matrix in the semicontinous
+# case.
+
+method = "ASGD"
+asgd_pi, log_asgd = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method,
+                                                           numItermax, log=log)
+print(log_asgd['alpha'], log_asgd['beta'])
+print(asgd_pi)
+
+#############################################################################
+# Compare the results with the Sinkhorn algorithm
+
+sinkhorn_pi = ot.sinkhorn(a, b, M, reg)
+print(sinkhorn_pi)
+
+
+##############################################################################
+# Plot Transportation Matrices
+# ````````````````````````````
+#
+# For SAG
+
+pl.figure(4, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, sag_pi, 'semi-dual : OT matrix SAG')
+pl.show()
+
+
+##############################################################################
+# For ASGD
+
+pl.figure(4, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, asgd_pi, 'semi-dual : OT matrix ASGD')
+pl.show()
+
+
+##############################################################################
+# For Sinkhorn
+
+pl.figure(4, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, sinkhorn_pi, 'OT matrix Sinkhorn')
+pl.show()
+
+
+#############################################################################
+# Compute the Transportation Matrix for the Dual Problem
+# ------------------------------------------------------
+#
+# Semi-continuous case
+# ````````````````````
+#
+# Sample one general measure a, one discrete measures b for the semi-continuous
+# case and compute the cost matrix c.
+
+n_source = 7
+n_target = 4
+reg = 1
+numItermax = 100000
+lr = 0.1
+batch_size = 3
+log = True
+
+a = ot.utils.unif(n_source)
+b = ot.utils.unif(n_target)
+
+rng = np.random.RandomState(0)
+X_source = rng.randn(n_source, 2)
+Y_target = rng.randn(n_target, 2)
+M = ot.dist(X_source, Y_target)
+
+#############################################################################
+#
+# Call the "SGD" dual method to find the transportation matrix in the
+# semi-continuous case
+
+sgd_dual_pi, log_sgd = ot.stochastic.solve_dual_entropic(a, b, M, reg,
+                                                         batch_size, numItermax,
+                                                         lr, log=log)
+print(log_sgd['alpha'], log_sgd['beta'])
+print(sgd_dual_pi)
+
+#############################################################################
+#
+# Compare the results with the Sinkhorn algorithm
+# ```````````````````````````````````````````````
+#
+# Call the Sinkhorn algorithm from POT
+
+sinkhorn_pi = ot.sinkhorn(a, b, M, reg)
+print(sinkhorn_pi)
+
+##############################################################################
+# Plot Transportation Matrices
+# ````````````````````````````
+#
+# For SGD
+
+pl.figure(4, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, sgd_dual_pi, 'dual : OT matrix SGD')
+pl.show()
+
+
+##############################################################################
+# For Sinkhorn
+
+pl.figure(4, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, sinkhorn_pi, 'OT matrix Sinkhorn')
+pl.show()
diff --git a/examples/plot_OT_1D.py b/examples/plot_OT_1D.py
index 15ead96..62f0b7d 100644
--- a/examples/plot_OT_1D.py
+++ b/examples/plot_OT_1D.py
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 """
-====================
-1D optimal transport
-====================
+======================================
+Optimal Transport for 1D distributions
+======================================
 
 This example illustrates the computation of EMD and Sinkhorn transport plans
 and their visualization.
@@ -64,7 +64,11 @@ ot.plot.plot1D_mat(a, b, M, 'Cost matrix M')
 
 #%% EMD
 
-G0 = ot.emd(a, b, M)
+# use fast 1D solver
+G0 = ot.emd_1d(x, x, a, b)
+
+# Equivalent to
+# G0 = ot.emd(a, b, M)
 
 pl.figure(3, figsize=(5, 5))
 ot.plot.plot1D_mat(a, b, G0, 'OT matrix G0')
diff --git a/examples/plot_OT_1D_smooth.py b/examples/plot_OT_1D_smooth.py
index b07f99f..5415e4f 100644
--- a/examples/plot_OT_1D_smooth.py
+++ b/examples/plot_OT_1D_smooth.py
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 """
-===========================
-1D smooth optimal transport
-===========================
+================================
+Smooth optimal transport example
+================================
 
 This example illustrates the computation of EMD, Sinkhorn and smooth OT plans
 and their visualization.
diff --git a/examples/plot_OT_2D_samples.py b/examples/plot_OT_2D_samples.py
index c3a7cd8..1d82fb8 100644
--- a/examples/plot_OT_2D_samples.py
+++ b/examples/plot_OT_2D_samples.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 """
 ====================================================
-2D Optimal transport between empirical distributions
+Optimal Transport between 2D empirical distributions
 ====================================================
 
 Illustration of 2D optimal transport between discributions that are weighted
diff --git a/examples/plot_OT_L1_vs_L2.py b/examples/plot_OT_L1_vs_L2.py
index cb94574..cce51f8 100644
--- a/examples/plot_OT_L1_vs_L2.py
+++ b/examples/plot_OT_L1_vs_L2.py
@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 """
-==========================================
-2D Optimal transport for different metrics
-==========================================
+================================================
+Optimal Transport with different gournd metrics
+================================================
 
-2D OT on empirical distributio  with different gound metric.
+2D OT on empirical distributio  with different ground metric.
 
 Stole the figure idea from Fig. 1 and 2 in
 https://arxiv.org/pdf/1706.07650.pdf
@@ -23,7 +23,7 @@ import matplotlib.pylab as pl
 import ot
 import ot.plot
 
-##############################################################################
+# %%
 # Dataset 1 : uniform sampling
 # ----------------------------
 
@@ -46,7 +46,7 @@ M2 = ot.dist(xs, xt, metric='sqeuclidean')
 M2 /= M2.max()
 
 # loss matrix
-Mp = np.sqrt(ot.dist(xs, xt, metric='euclidean'))
+Mp = ot.dist(xs, xt, metric='cityblock')
 Mp /= Mp.max()
 
 # Data
@@ -71,7 +71,7 @@ pl.title('Squared Euclidean cost')
 
 pl.subplot(1, 3, 3)
 pl.imshow(Mp, interpolation='nearest')
-pl.title('Sqrt Euclidean cost')
+pl.title('L1 (cityblock cost')
 pl.tight_layout()
 
 ##############################################################################
@@ -109,22 +109,22 @@ pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
 pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
 pl.axis('equal')
 # pl.legend(loc=0)
-pl.title('OT sqrt Euclidean')
+pl.title('OT L1 (cityblock)')
 pl.tight_layout()
 
 pl.show()
 
 
-##############################################################################
+# %%
 # Dataset 2 : Partial circle
 # --------------------------
 
-n = 50  # nb samples
+n = 20  # nb samples
 xtot = np.zeros((n + 1, 2))
 xtot[:, 0] = np.cos(
-    (np.arange(n + 1) + 1.0) * 0.9 / (n + 2) * 2 * np.pi)
+    (np.arange(n + 1) + 1.0) * 0.8 / (n + 2) * 2 * np.pi)
 xtot[:, 1] = np.sin(
-    (np.arange(n + 1) + 1.0) * 0.9 / (n + 2) * 2 * np.pi)
+    (np.arange(n + 1) + 1.0) * 0.8 / (n + 2) * 2 * np.pi)
 
 xs = xtot[:n, :]
 xt = xtot[1:, :]
@@ -140,7 +140,7 @@ M2 = ot.dist(xs, xt, metric='sqeuclidean')
 M2 /= M2.max()
 
 # loss matrix
-Mp = np.sqrt(ot.dist(xs, xt, metric='euclidean'))
+Mp = ot.dist(xs, xt, metric='cityblock')
 Mp /= Mp.max()
 
 
@@ -166,13 +166,13 @@ pl.title('Squared Euclidean cost')
 
 pl.subplot(1, 3, 3)
 pl.imshow(Mp, interpolation='nearest')
-pl.title('Sqrt Euclidean cost')
+pl.title('L1 (cityblock) cost')
 pl.tight_layout()
 
 ##############################################################################
 # Dataset 2 : Plot  OT Matrices
 # -----------------------------
-
+#
 
 #%% EMD
 G1 = ot.emd(a, b, M1)
@@ -204,7 +204,7 @@ pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
 pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
 pl.axis('equal')
 # pl.legend(loc=0)
-pl.title('OT sqrt Euclidean')
+pl.title('OT L1 (cityblock)')
 pl.tight_layout()
 
 pl.show()
diff --git a/examples/plot_compute_emd.py b/examples/plot_compute_emd.py
index 527a847..36cc7da 100644
--- a/examples/plot_compute_emd.py
+++ b/examples/plot_compute_emd.py
@@ -1,10 +1,10 @@
 # -*- coding: utf-8 -*-
 """
-=================
-Plot multiple EMD
-=================
+==================
+OT distances in 1D
+==================
 
-Shows how to compute multiple EMD and Sinkhorn with two different
+Shows how to compute multiple Wassersein and Sinkhorn with two different
 ground metrics and plot their values for different distributions.
 
 
@@ -14,7 +14,7 @@ ground metrics and plot their values for different distributions.
 #
 # License: MIT License
 
-# sphinx_gallery_thumbnail_number = 3
+# sphinx_gallery_thumbnail_number = 2
 
 import numpy as np
 import matplotlib.pylab as pl
@@ -29,7 +29,7 @@ from ot.datasets import make_1D_gauss as gauss
 #%% parameters
 
 n = 100  # nb bins
-n_target = 50  # nb target distributions
+n_target = 20  # nb target distributions
 
 
 # bin positions
@@ -47,9 +47,9 @@ for i, m in enumerate(lst_m):
 
 # loss matrix and normalization
 M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)), 'euclidean')
-M /= M.max()
+M /= M.max() * 0.1
 M2 = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)), 'sqeuclidean')
-M2 /= M2.max()
+M2 /= M2.max() * 0.1
 
 ##############################################################################
 # Plot data
@@ -59,10 +59,12 @@ M2 /= M2.max()
 
 pl.figure(1)
 pl.subplot(2, 1, 1)
-pl.plot(x, a, 'b', label='Source distribution')
+pl.plot(x, a, 'r', label='Source distribution')
 pl.title('Source distribution')
 pl.subplot(2, 1, 2)
-pl.plot(x, B, label='Target distributions')
+for i in range(n_target):
+    pl.plot(x, B[:, i], 'b', alpha=i / n_target)
+pl.plot(x, B[:, -1], 'b', label='Target distributions')
 pl.title('Target distributions')
 pl.tight_layout()
 
@@ -73,14 +75,27 @@ pl.tight_layout()
 
 #%% Compute and plot distributions and loss matrix
 
-d_emd = ot.emd2(a, B, M)  # direct computation of EMD
-d_emd2 = ot.emd2(a, B, M2)  # direct computation of EMD with loss M2
-
+d_emd = ot.emd2(a, B, M)  # direct computation of OT loss
+d_emd2 = ot.emd2(a, B, M2)  # direct computation of OT loss with metrixc M2
+d_tv = [np.sum(abs(a - B[:, i])) for i in range(n_target)]
 
 pl.figure(2)
-pl.plot(d_emd, label='Euclidean EMD')
-pl.plot(d_emd2, label='Squared Euclidean EMD')
-pl.title('EMD distances')
+pl.subplot(2, 1, 1)
+pl.plot(x, a, 'r', label='Source distribution')
+pl.title('Distributions')
+for i in range(n_target):
+    pl.plot(x, B[:, i], 'b', alpha=i / n_target)
+pl.plot(x, B[:, -1], 'b', label='Target distributions')
+pl.ylim((-.01, 0.13))
+pl.xticks(())
+pl.legend()
+pl.subplot(2, 1, 2)
+pl.plot(d_emd, label='Euclidean OT')
+pl.plot(d_emd2, label='Squared Euclidean OT')
+pl.plot(d_tv, label='Total Variation (TV)')
+#pl.xlim((-7,23))
+pl.xlabel('Displacement')
+pl.title('Divergences')
 pl.legend()
 
 ##############################################################################
@@ -88,17 +103,30 @@ pl.legend()
 # -----------------------------------------
 
 #%%
-reg = 1e-2
+reg = 1e-1
 d_sinkhorn = ot.sinkhorn2(a, B, M, reg)
 d_sinkhorn2 = ot.sinkhorn2(a, B, M2, reg)
 
-pl.figure(2)
+pl.figure(3)
 pl.clf()
-pl.plot(d_emd, label='Euclidean EMD')
-pl.plot(d_emd2, label='Squared Euclidean EMD')
+
+pl.subplot(2, 1, 1)
+pl.plot(x, a, 'r', label='Source distribution')
+pl.title('Distributions')
+for i in range(n_target):
+    pl.plot(x, B[:, i], 'b', alpha=i / n_target)
+pl.plot(x, B[:, -1], 'b', label='Target distributions')
+pl.ylim((-.01, 0.13))
+pl.xticks(())
+pl.legend()
+pl.subplot(2, 1, 2)
+pl.plot(d_emd, label='Euclidean OT')
+pl.plot(d_emd2, label='Squared Euclidean OT')
 pl.plot(d_sinkhorn, '+', label='Euclidean Sinkhorn')
 pl.plot(d_sinkhorn2, '+', label='Squared Euclidean Sinkhorn')
-pl.title('EMD distances')
+pl.plot(d_tv, label='Total Variation (TV)')
+#pl.xlim((-7,23))
+pl.xlabel('Displacement')
+pl.title('Divergences')
 pl.legend()
-
 pl.show()
diff --git a/examples/plot_optim_OTreg.py b/examples/plot_optim_OTreg.py
index 5eb15bd..7b021d2 100644
--- a/examples/plot_optim_OTreg.py
+++ b/examples/plot_optim_OTreg.py
@@ -24,7 +24,7 @@ arXiv preprint arXiv:1510.06567.
 
 
 """
-# sphinx_gallery_thumbnail_number = 4
+# sphinx_gallery_thumbnail_number = 5
 
 import numpy as np
 import matplotlib.pylab as pl
@@ -58,7 +58,7 @@ M /= M.max()
 
 G0 = ot.emd(a, b, M)
 
-pl.figure(3, figsize=(5, 5))
+pl.figure(1, figsize=(5, 5))
 ot.plot.plot1D_mat(a, b, G0, 'OT matrix G0')
 
 ##############################################################################
@@ -80,7 +80,7 @@ reg = 1e-1
 
 Gl2 = ot.optim.cg(a, b, M, reg, f, df, verbose=True)
 
-pl.figure(3)
+pl.figure(2)
 ot.plot.plot1D_mat(a, b, Gl2, 'OT matrix Frob. reg')
 
 ##############################################################################
@@ -102,7 +102,7 @@ reg = 1e-3
 
 Ge = ot.optim.cg(a, b, M, reg, f, df, verbose=True)
 
-pl.figure(4, figsize=(5, 5))
+pl.figure(3, figsize=(5, 5))
 ot.plot.plot1D_mat(a, b, Ge, 'OT matrix Entrop. reg')
 
 ##############################################################################
@@ -125,6 +125,34 @@ reg2 = 1e-1
 
 Gel2 = ot.optim.gcg(a, b, M, reg1, reg2, f, df, verbose=True)
 
-pl.figure(5, figsize=(5, 5))
+pl.figure(4, figsize=(5, 5))
 ot.plot.plot1D_mat(a, b, Gel2, 'OT entropic + matrix Frob. reg')
 pl.show()
+
+
+# %%
+# Comparison of the OT matrices
+
+nvisu = 40
+
+pl.figure(5, figsize=(10, 4))
+
+pl.subplot(2, 2, 1)
+pl.imshow(G0[:nvisu, :])
+pl.axis('off')
+pl.title('Exact OT')
+
+pl.subplot(2, 2, 2)
+pl.imshow(Gl2[:nvisu, :])
+pl.axis('off')
+pl.title('Frobenius reg.')
+
+pl.subplot(2, 2, 3)
+pl.imshow(Ge[:nvisu, :])
+pl.axis('off')
+pl.title('Entropic reg.')
+
+pl.subplot(2, 2, 4)
+pl.imshow(Gel2[:nvisu, :])
+pl.axis('off')
+pl.title('Entropic + Frobenius reg.')
diff --git a/examples/plot_screenkhorn_1D.py b/examples/plot_screenkhorn_1D.py
deleted file mode 100644
index 785642a..0000000
--- a/examples/plot_screenkhorn_1D.py
+++ /dev/null
@@ -1,71 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-===============================
-1D Screened optimal transport
-===============================
-
-This example illustrates the computation of Screenkhorn [26].
-
-[26] Alaya M. Z., Bérar M., Gasso G., Rakotomamonjy A. (2019).
-Screening Sinkhorn Algorithm for Regularized Optimal Transport,
-Advances in Neural Information Processing Systems 33 (NeurIPS).
-"""
-
-# Author: Mokhtar Z. Alaya <mokhtarzahdi.alaya@gmail.com>
-#
-# License: MIT License
-
-import numpy as np
-import matplotlib.pylab as pl
-import ot.plot
-from ot.datasets import make_1D_gauss as gauss
-from ot.bregman import screenkhorn
-
-##############################################################################
-# Generate data
-# -------------
-
-#%% parameters
-
-n = 100  # nb bins
-
-# bin positions
-x = np.arange(n, dtype=np.float64)
-
-# Gaussian distributions
-a = gauss(n, m=20, s=5)  # m= mean, s= std
-b = gauss(n, m=60, s=10)
-
-# loss matrix
-M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)))
-M /= M.max()
-
-##############################################################################
-# Plot distributions and loss matrix
-# ----------------------------------
-
-#%% plot the distributions
-
-pl.figure(1, figsize=(6.4, 3))
-pl.plot(x, a, 'b', label='Source distribution')
-pl.plot(x, b, 'r', label='Target distribution')
-pl.legend()
-
-# plot distributions and loss matrix
-
-pl.figure(2, figsize=(5, 5))
-ot.plot.plot1D_mat(a, b, M, 'Cost matrix M')
-
-##############################################################################
-# Solve Screenkhorn
-# -----------------------
-
-# Screenkhorn
-lambd = 2e-03  # entropy parameter
-ns_budget = 30  # budget number of points to be keeped in the source distribution
-nt_budget = 30  # budget number of points to be keeped in the target distribution
-
-G_screen = screenkhorn(a, b, M, lambd, ns_budget, nt_budget, uniform=False, restricted=True, verbose=True)
-pl.figure(4, figsize=(5, 5))
-ot.plot.plot1D_mat(a, b, G_screen, 'OT matrix Screenkhorn')
-pl.show()
diff --git a/examples/plot_stochastic.py b/examples/plot_stochastic.py
deleted file mode 100644
index 3a1ef31..0000000
--- a/examples/plot_stochastic.py
+++ /dev/null
@@ -1,189 +0,0 @@
-"""
-===================
-Stochastic examples
-===================
-
-This example is designed to show how to use the stochatic optimization
-algorithms for discrete and semi-continuous measures from the POT library.
-
-[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F.
-Stochastic Optimization for Large-scale Optimal Transport.
-Advances in Neural Information Processing Systems (2016).
-
-[19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A. &
-Blondel, M. Large-scale Optimal Transport and Mapping Estimation.
-International Conference on Learning Representation (2018)
-
-"""
-
-# Author: Kilian Fatras <kilian.fatras@gmail.com>
-#
-# License: MIT License
-
-import matplotlib.pylab as pl
-import numpy as np
-import ot
-import ot.plot
-
-
-#############################################################################
-# Compute the Transportation Matrix for the Semi-Dual Problem
-# -----------------------------------------------------------
-#
-# Discrete case
-# `````````````
-#
-# Sample two discrete measures for the discrete case and compute their cost
-# matrix c.
-
-n_source = 7
-n_target = 4
-reg = 1
-numItermax = 1000
-
-a = ot.utils.unif(n_source)
-b = ot.utils.unif(n_target)
-
-rng = np.random.RandomState(0)
-X_source = rng.randn(n_source, 2)
-Y_target = rng.randn(n_target, 2)
-M = ot.dist(X_source, Y_target)
-
-#############################################################################
-# Call the "SAG" method to find the transportation matrix in the discrete case
-
-method = "SAG"
-sag_pi = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method,
-                                                numItermax)
-print(sag_pi)
-
-#############################################################################
-# Semi-Continuous Case
-# ````````````````````
-#
-# Sample one general measure a, one discrete measures b for the semicontinous
-# case, the points where source and target measures are defined and compute the
-# cost matrix.
-
-n_source = 7
-n_target = 4
-reg = 1
-numItermax = 1000
-log = True
-
-a = ot.utils.unif(n_source)
-b = ot.utils.unif(n_target)
-
-rng = np.random.RandomState(0)
-X_source = rng.randn(n_source, 2)
-Y_target = rng.randn(n_target, 2)
-M = ot.dist(X_source, Y_target)
-
-#############################################################################
-# Call the "ASGD" method to find the transportation matrix in the semicontinous
-# case.
-
-method = "ASGD"
-asgd_pi, log_asgd = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method,
-                                                           numItermax, log=log)
-print(log_asgd['alpha'], log_asgd['beta'])
-print(asgd_pi)
-
-#############################################################################
-# Compare the results with the Sinkhorn algorithm
-
-sinkhorn_pi = ot.sinkhorn(a, b, M, reg)
-print(sinkhorn_pi)
-
-
-##############################################################################
-# Plot Transportation Matrices
-# ````````````````````````````
-#
-# For SAG
-
-pl.figure(4, figsize=(5, 5))
-ot.plot.plot1D_mat(a, b, sag_pi, 'semi-dual : OT matrix SAG')
-pl.show()
-
-
-##############################################################################
-# For ASGD
-
-pl.figure(4, figsize=(5, 5))
-ot.plot.plot1D_mat(a, b, asgd_pi, 'semi-dual : OT matrix ASGD')
-pl.show()
-
-
-##############################################################################
-# For Sinkhorn
-
-pl.figure(4, figsize=(5, 5))
-ot.plot.plot1D_mat(a, b, sinkhorn_pi, 'OT matrix Sinkhorn')
-pl.show()
-
-
-#############################################################################
-# Compute the Transportation Matrix for the Dual Problem
-# ------------------------------------------------------
-#
-# Semi-continuous case
-# ````````````````````
-#
-# Sample one general measure a, one discrete measures b for the semi-continuous
-# case and compute the cost matrix c.
-
-n_source = 7
-n_target = 4
-reg = 1
-numItermax = 100000
-lr = 0.1
-batch_size = 3
-log = True
-
-a = ot.utils.unif(n_source)
-b = ot.utils.unif(n_target)
-
-rng = np.random.RandomState(0)
-X_source = rng.randn(n_source, 2)
-Y_target = rng.randn(n_target, 2)
-M = ot.dist(X_source, Y_target)
-
-#############################################################################
-#
-# Call the "SGD" dual method to find the transportation matrix in the
-# semi-continuous case
-
-sgd_dual_pi, log_sgd = ot.stochastic.solve_dual_entropic(a, b, M, reg,
-                                                         batch_size, numItermax,
-                                                         lr, log=log)
-print(log_sgd['alpha'], log_sgd['beta'])
-print(sgd_dual_pi)
-
-#############################################################################
-#
-# Compare the results with the Sinkhorn algorithm
-# ```````````````````````````````````````````````
-#
-# Call the Sinkhorn algorithm from POT
-
-sinkhorn_pi = ot.sinkhorn(a, b, M, reg)
-print(sinkhorn_pi)
-
-##############################################################################
-# Plot Transportation Matrices
-# ````````````````````````````
-#
-# For SGD
-
-pl.figure(4, figsize=(5, 5))
-ot.plot.plot1D_mat(a, b, sgd_dual_pi, 'dual : OT matrix SGD')
-pl.show()
-
-
-##############################################################################
-# For Sinkhorn
-
-pl.figure(4, figsize=(5, 5))
-ot.plot.plot1D_mat(a, b, sinkhorn_pi, 'OT matrix Sinkhorn')
-pl.show()
diff --git a/examples/sliced-wasserstein/README.txt b/examples/sliced-wasserstein/README.txt
index a575345..73e6122 100644
--- a/examples/sliced-wasserstein/README.txt
+++ b/examples/sliced-wasserstein/README.txt
@@ -1,4 +1,4 @@
 
 
 Sliced Wasserstein Distance
----------------------------
\ No newline at end of file
+---------------------------
diff --git a/examples/sliced-wasserstein/plot_variance.py b/examples/sliced-wasserstein/plot_variance.py
index 7d73907..f12b522 100644
--- a/examples/sliced-wasserstein/plot_variance.py
+++ b/examples/sliced-wasserstein/plot_variance.py
@@ -1,8 +1,8 @@
 # -*- coding: utf-8 -*-
 """
-==============================
-2D Sliced Wasserstein Distance
-==============================
+===============================================
+Sliced Wasserstein Distance on 2D distributions
+===============================================
 
 This example illustrates the computation of the sliced Wasserstein Distance as
 proposed in [31].
@@ -16,6 +16,8 @@ measures." Journal of Mathematical Imaging and Vision 51.1 (2015): 22-45
 #
 # License: MIT License
 
+# sphinx_gallery_thumbnail_number = 2
+
 import matplotlib.pylab as pl
 import numpy as np
 
diff --git a/examples/unbalanced-partial/plot_UOT_1D.py b/examples/unbalanced-partial/plot_UOT_1D.py
index 183849c..06dd02d 100644
--- a/examples/unbalanced-partial/plot_UOT_1D.py
+++ b/examples/unbalanced-partial/plot_UOT_1D.py
@@ -12,6 +12,8 @@ using a Kullback-Leibler relaxation.
 #
 # License: MIT License
 
+# sphinx_gallery_thumbnail_number = 4
+
 import numpy as np
 import matplotlib.pylab as pl
 import ot
@@ -69,7 +71,20 @@ epsilon = 0.1  # entropy parameter
 alpha = 1.  # Unbalanced KL relaxation parameter
 Gs = ot.unbalanced.sinkhorn_unbalanced(a, b, M, epsilon, alpha, verbose=True)
 
-pl.figure(4, figsize=(5, 5))
+pl.figure(3, figsize=(5, 5))
 ot.plot.plot1D_mat(a, b, Gs, 'UOT matrix Sinkhorn')
 
 pl.show()
+
+
+# %%
+# plot the transported mass
+# -------------------------
+
+pl.figure(4, figsize=(6.4, 3))
+pl.plot(x, a, 'b', label='Source distribution')
+pl.plot(x, b, 'r', label='Target distribution')
+pl.fill(x, Gs.sum(1), 'b', alpha=0.5, label='Transported source')
+pl.fill(x, Gs.sum(0), 'r', alpha=0.5, label='Transported target')
+pl.legend(loc='upper right')
+pl.title('Distributions and transported mass for UOT')
diff --git a/examples/unbalanced-partial/plot_regpath.py b/examples/unbalanced-partial/plot_regpath.py
index 4a51c2d..782e8c2 100644
--- a/examples/unbalanced-partial/plot_regpath.py
+++ b/examples/unbalanced-partial/plot_regpath.py
@@ -15,11 +15,12 @@ penalized linear regression.
 # Author: Haoran Wu <haoran.wu@univ-ubs.fr>
 # License: MIT License
 
+# sphinx_gallery_thumbnail_number = 2
 
 import numpy as np
 import matplotlib.pylab as pl
 import ot
-
+import matplotlib.animation as animation
 ##############################################################################
 # Generate data
 # -------------
@@ -72,6 +73,9 @@ t2, t_list2, g_list2 = ot.regpath.regularization_path(a, b, M, reg=final_gamma,
 ##############################################################################
 # Plot the regularization path
 # ----------------
+#
+# The OT plan is ploted as a function of $\gamma$ that is the inverse of the
+# weight on the marginal relaxations.
 
 #%% fully relaxed l2-penalized UOT
 
@@ -103,13 +107,53 @@ for p in range(4):
 pl.show()
 
 
+# %%
+# Animation of the regpath for UOT l2
+# ------------------------
+
+nv = 100
+g_list_v = np.logspace(-.5, -2.5, nv)
+
+pl.figure(3)
+
+
+def _update_plot(iv):
+    pl.clf()
+    tp = ot.regpath.compute_transport_plan(g_list_v[iv], g_list,
+                                           t_list)
+    P = tp.reshape((n, n))
+    if P.sum() > 0:
+        P = P / P.max()
+    for i in range(n):
+        for j in range(n):
+            if P[i, j] > 0:
+                pl.plot([xs[i, 0], xt[j, 0]], [xs[i, 1], xt[j, 1]], color='C2',
+                        alpha=P[i, j] * 0.5)
+    pl.scatter(xs[:, 0], xs[:, 1], c='C0', alpha=0.2)
+    pl.scatter(xt[:, 0], xt[:, 1], c='C1', alpha=0.2)
+    pl.scatter(xs[:, 0], xs[:, 1], c='C0', s=P.sum(1).ravel() * (1 + p) * 4,
+               label='Re-weighted source', alpha=1)
+    pl.scatter(xt[:, 0], xt[:, 1], c='C1', s=P.sum(0).ravel() * (1 + p) * 4,
+               label='Re-weighted target', alpha=1)
+    pl.plot([], [], color='C2', alpha=0.8, label='OT plan')
+    pl.title(r'$\ell_2$ UOT $\gamma$={:1.3f}'.format(g_list_v[iv]),
+             fontsize=11)
+    return 1
+
+
+i = 0
+_update_plot(i)
+
+ani = animation.FuncAnimation(pl.gcf(), _update_plot, nv, interval=50, repeat_delay=2000)
+
+
 ##############################################################################
 # Plot the semi-relaxed regularization path
 # -------------------
 
 #%% semi-relaxed l2-penalized UOT
 
-pl.figure(3)
+pl.figure(4)
 selected_gamma = [10, 1, 1e-1, 1e-2]
 for p in range(4):
     tp = ot.regpath.compute_transport_plan(selected_gamma[p], g_list2,
@@ -133,3 +177,43 @@ for p in range(4):
     if p < 2:
         pl.xticks(())
 pl.show()
+
+
+# %%
+# Animation of the regpath for semi-relaxed UOT l2
+# ------------------------
+
+nv = 100
+g_list_v = np.logspace(2.5, -2, nv)
+
+pl.figure(5)
+
+
+def _update_plot(iv):
+    pl.clf()
+    tp = ot.regpath.compute_transport_plan(g_list_v[iv], g_list2,
+                                           t_list2)
+    P = tp.reshape((n, n))
+    if P.sum() > 0:
+        P = P / P.max()
+    for i in range(n):
+        for j in range(n):
+            if P[i, j] > 0:
+                pl.plot([xs[i, 0], xt[j, 0]], [xs[i, 1], xt[j, 1]], color='C2',
+                        alpha=P[i, j] * 0.5)
+    pl.scatter(xs[:, 0], xs[:, 1], c='C0', alpha=0.2)
+    pl.scatter(xt[:, 0], xt[:, 1], c='C1', alpha=0.2)
+    pl.scatter(xs[:, 0], xs[:, 1], c='C0', s=P.sum(1).ravel() * (1 + p) * 4,
+               label='Re-weighted source', alpha=1)
+    pl.scatter(xt[:, 0], xt[:, 1], c='C1', s=P.sum(0).ravel() * (1 + p) * 4,
+               label='Re-weighted target', alpha=1)
+    pl.plot([], [], color='C2', alpha=0.8, label='OT plan')
+    pl.title(r'Semi-relaxed $\ell_2$ UOT $\gamma$={:1.3f}'.format(g_list_v[iv]),
+             fontsize=11)
+    return 1
+
+
+i = 0
+_update_plot(i)
+
+ani = animation.FuncAnimation(pl.gcf(), _update_plot, nv, interval=50, repeat_delay=2000)
-- 
cgit v1.2.3


From 0b223ff883fd73601984a92c31cb70d4aded16e8 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Thu, 7 Apr 2022 14:18:54 +0200
Subject: [MRG] Remove deprecated ot.gpu submodule (#361)

* remove all cpu submodule and tests

* speedup tests gromov
---
 README.md                  |   2 +-
 RELEASES.md                |   1 +
 docs/source/quickstart.rst |  58 +++++++++++---
 ot/gpu/__init__.py         |  50 ------------
 ot/gpu/bregman.py          | 196 ---------------------------------------------
 ot/gpu/da.py               | 144 ---------------------------------
 ot/gpu/utils.py            | 101 -----------------------
 test/test_gpu.py           | 106 ------------------------
 test/test_gromov.py        | 129 ++++++++++++++---------------
 9 files changed, 113 insertions(+), 674 deletions(-)
 delete mode 100644 ot/gpu/__init__.py
 delete mode 100644 ot/gpu/bregman.py
 delete mode 100644 ot/gpu/da.py
 delete mode 100644 ot/gpu/utils.py
 delete mode 100644 test/test_gpu.py

(limited to 'docs/source')

diff --git a/README.md b/README.md
index 0c3bd19..2ace69c 100644
--- a/README.md
+++ b/README.md
@@ -185,7 +185,7 @@ The contributors to this library are
 * [Alexandre Gramfort](http://alexandre.gramfort.net/) (CI, documentation)
 * [Laetitia Chapel](http://people.irisa.fr/Laetitia.Chapel/) (Partial OT)
 * [Michael Perrot](http://perso.univ-st-etienne.fr/pem82055/) (Mapping estimation)
-* [Léo Gautheron](https://github.com/aje) (GPU implementation)
+* [Léo Gautheron](https://github.com/aje) (Initial GPU implementation)
 * [Nathalie Gayraud](https://www.linkedin.com/in/nathalie-t-h-gayraud/?ppe=1) (DA classes)
 * [Stanislas Chambon](https://slasnista.github.io/) (DA classes)
 * [Antoine Rolet](https://arolet.github.io/) (EMD solver debug)
diff --git a/RELEASES.md b/RELEASES.md
index 7d458f3..b54a84a 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -5,6 +5,7 @@
 
 #### New features
 
+- remode deprecated `ot.gpu` submodule (PR #361)
 - Update examples in the gallery (PR #359).
 - Add stochastic loss and OT plan computation for regularized OT and 
   backend examples(PR #360).
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index 09a362b..b4cc8ab 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -1028,15 +1028,6 @@ FAQ
     speedup can be obtained by using a GPU implementation since all operations
     are matrix/vector products.
 
-4. **Using GPU fails with error:  module 'ot' has no attribute 'gpu'**
-
-    In order to limit import time and hard dependencies in POT. we do not import
-    some sub-modules automatically with :code:`import ot`. In order to use the
-    acceleration in :any:`ot.gpu` you need first to import is with
-    :code:`import ot.gpu`.
-
-    See `Issue #85 <https://github.com/rflamary/POT/issues/85>`__ and :any:`ot.gpu`
-    for more details.
 
 
 References
@@ -1172,3 +1163,52 @@ References
 .. [30] Flamary, Rémi, et al. "Optimal transport with Laplacian regularization:
     Applications to domain adaptation and shape matching." NIPS Workshop on Optimal
     Transport and Machine Learning OTML. 2014.
+
+.. [31] Bonneel, Nicolas, et al. `Sliced and radon wasserstein barycenters of
+    measures
+    <https://perso.liris.cnrs.fr/nicolas.bonneel/WassersteinSliced-JMIV.pdf>`_\
+    , Journal of Mathematical Imaging and Vision 51.1 (2015): 22-45
+
+.. [32] Huang, M., Ma S., Lai, L. (2021). `A Riemannian Block Coordinate Descent Method for Computing the Projection Robust Wasserstein Distance <http://proceedings.mlr.press/v139/huang21e.html>`_\ , Proceedings of the 38th International Conference on Machine Learning (ICML).
+
+.. [33] Kerdoncuff T., Emonet R., Marc S. `Sampled Gromov Wasserstein
+    <https://hal.archives-ouvertes.fr/hal-03232509/document>`_\ , Machine
+    Learning Journal (MJL), 2021
+
+.. [34] Feydy, J., Séjourné, T., Vialard, F. X., Amari, S. I., Trouvé, A., &
+    Peyré, G. (2019, April). `Interpolating between optimal transport and MMD
+    using Sinkhorn divergences
+    <http://proceedings.mlr.press/v89/feydy19a/feydy19a.pdf>`_. In The 22nd
+    International Conference on Artificial Intelligence and Statistics (pp.
+    2681-2690). PMLR.
+
+.. [35] Deshpande, I., Hu, Y. T., Sun, R., Pyrros, A., Siddiqui, N., Koyejo, S., 
+    & Schwing, A. G. (2019). `Max-sliced wasserstein distance and its use 
+    for gans
+    <https://openaccess.thecvf.com/content_CVPR_2019/papers/Deshpande_Max-Sliced_Wasserstein_Distance_and_Its_Use_for_GANs_CVPR_2019_paper.pdf>`_. 
+    In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (pp. 10648-10656).
+
+.. [36] Liutkus, A., Simsekli, U., Majewski, S., Durmus, A., & Stöter, F. R. 
+       (2019, May). `Sliced-Wasserstein flows: Nonparametric generative modeling via
+        optimal transport and diffusions
+        <http://proceedings.mlr.press/v97/liutkus19a/liutkus19a.pdf>`_. In International
+        Conference on Machine Learning (pp. 4104-4113). PMLR.
+
+.. [37] Janati, H., Cuturi, M., Gramfort, A. `Debiased sinkhorn barycenters 
+    <http://proceedings.mlr.press/v119/janati20a/janati20a.pdf>`_ Proceedings of
+    the 37th International Conference on Machine Learning, PMLR 119:4692-4701, 2020
+
+.. [38] C. Vincent-Cuaz, T. Vayer, R. Flamary, M. Corneli, N. Courty, `Online
+       Graph Dictionary Learning <https://arxiv.org/pdf/2102.06555.pdf>`_\ , 
+       International Conference on Machine Learning (ICML), 2021.
+
+.. [39] Gozlan, N., Roberto, C., Samson, P. M., & Tetali, P. (2017).
+    `Kantorovich duality for general transport costs and applications
+    <https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.712.1825&rep=rep1&type=pdf>`_.
+    Journal of Functional Analysis, 273(11), 3327-3405.
+
+.. [40] Forrow, A., Hütter, J. C., Nitzan, M., Rigollet, P., Schiebinger, G., &
+    Weed, J. (2019, April). `Statistical optimal transport via factored
+    couplings <http://proceedings.mlr.press/v89/forrow19a/forrow19a.pdf>`_. In
+    The 22nd International Conference on Artificial Intelligence and Statistics
+    (pp. 2454-2465). PMLR.
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
deleted file mode 100644
index 12db605..0000000
--- a/ot/gpu/__init__.py
+++ /dev/null
@@ -1,50 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-GPU implementation for several OT solvers and utility 
-functions. 
-
-The GPU backend in handled by `cupy 
-<https://cupy.chainer.org/>`_.
-
-.. warning::
-    This module is now deprecated and will be removed in future releases. POT
-    now privides a backend mechanism that allows for solving prolem on GPU wth
-    the pytorch backend.
-    
-
-.. warning::
-    Note that by default the module is not imported in :mod:`ot`. In order to 
-    use it you need to explicitely import :mod:`ot.gpu` .
-
-By default, the functions in this module accept and return numpy arrays 
-in order to proide drop-in replacement for the other POT function but
-the transfer between CPU en GPU comes with a significant overhead.
-
-In order to get the best performances, we recommend to give only cupy 
-arrays to the functions and desactivate the conversion to numpy of the 
-result of the function with parameter ``to_numpy=False``.
-
-"""
-
-# Author: Remi Flamary <remi.flamary@unice.fr>
-#         Leo Gautheron <https://github.com/aje>
-#
-# License: MIT License
-
-import warnings
-
-from . import bregman
-from . import da
-from .bregman import sinkhorn
-from .da import sinkhorn_lpl1_mm
-
-from . import utils
-from .utils import dist, to_gpu, to_np
-
-
-warnings.warn('This module is deprecated and will be removed in the next minor release of POT', category=DeprecationWarning)
-
-
-__all__ = ["utils", "dist", "sinkhorn",
-           "sinkhorn_lpl1_mm", 'bregman', 'da', 'to_gpu', 'to_np']
-
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
deleted file mode 100644
index 76af00e..0000000
--- a/ot/gpu/bregman.py
+++ /dev/null
@@ -1,196 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Bregman projections for regularized OT with GPU
-"""
-
-# Author: Remi Flamary <remi.flamary@unice.fr>
-#         Leo Gautheron <https://github.com/aje>
-#
-# License: MIT License
-
-import cupy as np  # np used for matrix computation
-import cupy as cp  # cp used for cupy specific operations
-from . import utils
-
-
-def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
-                   verbose=False, log=False, to_numpy=True, **kwargs):
-    r"""
-    Solve the entropic regularization optimal transport on GPU
-
-    If the input matrix are in numpy format, they will be uploaded to the
-    GPU first which can incur significant time overhead.
-
-    The function solves the following optimization problem:
-
-    .. math::
-        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
-
-        s.t. \gamma 1 = a
-
-             \gamma^T 1= b
-
-             \gamma\geq 0
-    where :
-
-    - M is the (ns,nt) metric cost matrix
-    - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-    - a and b are source and target weights (sum to 1)
-
-    The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [2]_
-
-
-    Parameters
-    ----------
-    a : np.ndarray (ns,)
-        samples weights in the source domain
-    b : np.ndarray (nt,) or np.ndarray (nt,nbb)
-        samples in the target domain, compute sinkhorn with multiple targets
-        and fixed M if b is a matrix (return OT loss + dual variables in log)
-    M : np.ndarray (ns,nt)
-        loss matrix
-    reg : float
-        Regularization term >0
-    numItermax : int, optional
-        Max number of iterations
-    stopThr : float, optional
-        Stop threshold on error (>0)
-    verbose : bool, optional
-        Print information along iterations
-    log : bool, optional
-        record log if True
-    to_numpy : boolean, optional (default True)
-        If true convert back the GPU array result to numpy format.
-
-
-    Returns
-    -------
-    gamma : (ns x nt) ndarray
-        Optimal transportation matrix for the given parameters
-    log : dict
-        log dictionary return only if log==True in parameters
-
-
-    References
-    ----------
-
-    .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
-
-
-    See Also
-    --------
-    ot.lp.emd : Unregularized OT
-    ot.optim.cg : General regularized OT
-
-    """
-
-    a = cp.asarray(a)
-    b = cp.asarray(b)
-    M = cp.asarray(M)
-
-    if len(a) == 0:
-        a = np.ones((M.shape[0],)) / M.shape[0]
-    if len(b) == 0:
-        b = np.ones((M.shape[1],)) / M.shape[1]
-
-    # init data
-    Nini = len(a)
-    Nfin = len(b)
-
-    if len(b.shape) > 1:
-        nbb = b.shape[1]
-    else:
-        nbb = 0
-
-    if log:
-        log = {'err': []}
-
-    # we assume that no distances are null except those of the diagonal of
-    # distances
-    if nbb:
-        u = np.ones((Nini, nbb)) / Nini
-        v = np.ones((Nfin, nbb)) / Nfin
-    else:
-        u = np.ones(Nini) / Nini
-        v = np.ones(Nfin) / Nfin
-
-    # print(reg)
-
-    # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
-    K = np.empty(M.shape, dtype=M.dtype)
-    np.divide(M, -reg, out=K)
-    np.exp(K, out=K)
-
-    # print(np.min(K))
-    tmp2 = np.empty(b.shape, dtype=M.dtype)
-
-    Kp = (1 / a).reshape(-1, 1) * K
-    cpt = 0
-    err = 1
-    while (err > stopThr and cpt < numItermax):
-        uprev = u
-        vprev = v
-
-        KtransposeU = np.dot(K.T, u)
-        v = np.divide(b, KtransposeU)
-        u = 1. / np.dot(Kp, v)
-
-        if (np.any(KtransposeU == 0) or
-                np.any(np.isnan(u)) or np.any(np.isnan(v)) or
-                np.any(np.isinf(u)) or np.any(np.isinf(v))):
-            # we have reached the machine precision
-            # come back to previous solution and quit loop
-            print('Warning: numerical errors at iteration', cpt)
-            u = uprev
-            v = vprev
-            break
-        if cpt % 10 == 0:
-            # we can speed up the process by checking for the error only all
-            # the 10th iterations
-            if nbb:
-                err = np.sqrt(
-                    np.sum((u - uprev)**2) / np.sum((u)**2)
-                    + np.sum((v - vprev)**2) / np.sum((v)**2)
-                )
-            else:
-                # compute right marginal tmp2= (diag(u)Kdiag(v))^T1
-                tmp2 = np.sum(u[:, None] * K * v[None, :], 0)
-                #tmp2=np.einsum('i,ij,j->j', u, K, v)
-                err = np.linalg.norm(tmp2 - b)  # violation of marginal
-            if log:
-                log['err'].append(err)
-
-            if verbose:
-                if cpt % 200 == 0:
-                    print(
-                        '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
-                print('{:5d}|{:8e}|'.format(cpt, err))
-        cpt = cpt + 1
-    if log:
-        log['u'] = u
-        log['v'] = v
-
-    if nbb:  # return only loss
-        #res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) (explodes cupy memory)
-        res = np.empty(nbb)
-        for i in range(nbb):
-            res[i] = np.sum(u[:, None, i] * (K * M) * v[None, :, i])
-        if to_numpy:
-            res = utils.to_np(res)
-        if log:
-            return res, log
-        else:
-            return res
-
-    else:  # return OT matrix
-        res = u.reshape((-1, 1)) * K * v.reshape((1, -1))
-        if to_numpy:
-            res = utils.to_np(res)
-        if log:
-            return res, log
-        else:
-            return res
-
-
-# define sinkhorn as sinkhorn_knopp
-sinkhorn = sinkhorn_knopp
diff --git a/ot/gpu/da.py b/ot/gpu/da.py
deleted file mode 100644
index 7adb830..0000000
--- a/ot/gpu/da.py
+++ /dev/null
@@ -1,144 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Domain adaptation with optimal transport with GPU implementation
-"""
-
-# Author: Remi Flamary <remi.flamary@unice.fr>
-#         Nicolas Courty <ncourty@irisa.fr>
-#         Michael Perrot <michael.perrot@univ-st-etienne.fr>
-#         Leo Gautheron <https://github.com/aje>
-#
-# License: MIT License
-
-
-import cupy as np  # np used for matrix computation
-import cupy as cp  # cp used for cupy specific operations
-import numpy as npp
-from . import utils
-
-from .bregman import sinkhorn
-
-
-def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
-                     numInnerItermax=200, stopInnerThr=1e-9, verbose=False,
-                     log=False, to_numpy=True):
-    """
-    Solve the entropic regularization optimal transport problem with nonconvex
-    group lasso regularization on GPU
-
-    If the input matrix are in numpy format, they will be uploaded to the
-    GPU first which can incur significant time overhead.
-
-
-    The function solves the following optimization problem:
-
-    .. math::
-        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)
-        + \eta \Omega_g(\gamma)
-
-        s.t. \gamma 1 = a
-
-             \gamma^T 1= b
-
-             \gamma\geq 0
-    where :
-
-    - M is the (ns,nt) metric cost matrix
-    - :math:`\Omega_e` is the entropic regularization term
-        :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-    - :math:`\Omega_g` is the group lasso  regulaization term
-      :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1`
-      where  :math:`\mathcal{I}_c` are the index of samples from class c
-      in the source domain.
-    - a and b are source and target weights (sum to 1)
-
-    The algorithm used for solving the problem is the generalised conditional
-    gradient as proposed in  [5]_ [7]_
-
-
-    Parameters
-    ----------
-    a : np.ndarray (ns,)
-        samples weights in the source domain
-    labels_a : np.ndarray (ns,)
-        labels of samples in the source domain
-    b : np.ndarray (nt,)
-        samples weights in the target domain
-    M : np.ndarray (ns,nt)
-        loss matrix
-    reg : float
-        Regularization term for entropic regularization >0
-    eta : float, optional
-        Regularization term  for group lasso regularization >0
-    numItermax : int, optional
-        Max number of iterations
-    numInnerItermax : int, optional
-        Max number of iterations (inner sinkhorn solver)
-    stopInnerThr : float, optional
-        Stop threshold on error (inner sinkhorn solver) (>0)
-    verbose : bool, optional
-        Print information along iterations
-    log : bool, optional
-        record log if True
-    to_numpy : boolean, optional (default True)
-        If true convert back the GPU array result to numpy format.
-
-
-    Returns
-    -------
-    gamma : (ns x nt) ndarray
-        Optimal transportation matrix for the given parameters
-    log : dict
-        log dictionary return only if log==True in parameters
-
-
-    References
-    ----------
-
-    .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
-       "Optimal Transport for Domain Adaptation," in IEEE
-       Transactions on Pattern Analysis and Machine Intelligence ,
-       vol.PP, no.99, pp.1-1
-    .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
-       Generalized conditional gradient: analysis of convergence
-       and applications. arXiv preprint arXiv:1510.06567.
-
-    See Also
-    --------
-    ot.lp.emd : Unregularized OT
-    ot.bregman.sinkhorn : Entropic regularized OT
-    ot.optim.cg : General regularized OT
-
-    """
-
-    a, labels_a, b, M = utils.to_gpu(a, labels_a, b, M)
-
-    p = 0.5
-    epsilon = 1e-3
-
-    indices_labels = []
-    labels_a2 = cp.asnumpy(labels_a)
-    classes = npp.unique(labels_a2)
-    for c in classes:
-        idxc = utils.to_gpu(*npp.where(labels_a2 == c))
-        indices_labels.append(idxc)
-
-    W = np.zeros(M.shape)
-
-    for cpt in range(numItermax):
-        Mreg = M + eta * W
-        transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax,
-                          stopThr=stopInnerThr, to_numpy=False)
-        # the transport has been computed. Check if classes are really
-        # separated
-        W = np.ones(M.shape)
-        for (i, c) in enumerate(classes):
-
-            majs = np.sum(transp[indices_labels[i]], axis=0)
-            majs = p * ((majs + epsilon)**(p - 1))
-            W[indices_labels[i]] = majs
-
-    if to_numpy:
-        return utils.to_np(transp)
-    else:
-        return transp
diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py
deleted file mode 100644
index 41e168a..0000000
--- a/ot/gpu/utils.py
+++ /dev/null
@@ -1,101 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Utility functions for GPU
-"""
-
-# Author: Remi Flamary <remi.flamary@unice.fr>
-#         Nicolas Courty <ncourty@irisa.fr>
-#         Leo Gautheron <https://github.com/aje>
-#
-# License: MIT License
-
-import cupy as np  # np used for matrix computation
-import cupy as cp  # cp used for cupy specific operations
-
-
-def euclidean_distances(a, b, squared=False, to_numpy=True):
-    """
-    Compute the pairwise euclidean distance between matrices a and b.
-
-    If the input matrix are in numpy format, they will be uploaded to the
-    GPU first which can incur significant time overhead.
-
-    Parameters
-    ----------
-    a : np.ndarray (n, f)
-        first matrix
-    b : np.ndarray (m, f)
-        second matrix
-    to_numpy : boolean, optional (default True)
-        If true convert back the GPU array result to numpy format.
-    squared : boolean, optional (default False)
-        if True, return squared euclidean distance matrix
-
-    Returns
-    -------
-    c : (n x m) np.ndarray or cupy.ndarray
-        pairwise euclidean distance distance matrix
-    """
-
-    a, b = to_gpu(a, b)
-
-    a2 = np.sum(np.square(a), 1)
-    b2 = np.sum(np.square(b), 1)
-
-    c = -2 * np.dot(a, b.T)
-    c += a2[:, None]
-    c += b2[None, :]
-
-    if not squared:
-        np.sqrt(c, out=c)
-    if to_numpy:
-        return to_np(c)
-    else:
-        return c
-
-
-def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True):
-    """Compute distance between samples in x1 and x2 on gpu
-
-    Parameters
-    ----------
-
-    x1 : np.array (n1,d)
-        matrix with n1 samples of size d
-    x2 : np.array (n2,d), optional
-        matrix with n2 samples of size d (if None then x2=x1)
-    metric : str
-        Metric from 'sqeuclidean', 'euclidean',
-
-
-    Returns
-    -------
-
-    M : np.array (n1,n2)
-        distance matrix computed with given metric
-
-    """
-    if x2 is None:
-        x2 = x1
-    if metric == "sqeuclidean":
-        return euclidean_distances(x1, x2, squared=True, to_numpy=to_numpy)
-    elif metric == "euclidean":
-        return euclidean_distances(x1, x2, squared=False, to_numpy=to_numpy)
-    else:
-        raise NotImplementedError
-
-
-def to_gpu(*args):
-    """ Upload numpy arrays to GPU and return them"""
-    if len(args) > 1:
-        return (cp.asarray(x) for x in args)
-    else:
-        return cp.asarray(args[0])
-
-
-def to_np(*args):
-    """ convert GPU arras to numpy and return them"""
-    if len(args) > 1:
-        return (cp.asnumpy(x) for x in args)
-    else:
-        return cp.asnumpy(args[0])
diff --git a/test/test_gpu.py b/test/test_gpu.py
deleted file mode 100644
index 8e62a74..0000000
--- a/test/test_gpu.py
+++ /dev/null
@@ -1,106 +0,0 @@
-"""Tests for module gpu for gpu acceleration """
-
-# Author: Remi Flamary <remi.flamary@unice.fr>
-#
-# License: MIT License
-
-import numpy as np
-import ot
-import pytest
-
-try:  # test if cudamat installed
-    import ot.gpu
-    nogpu = False
-except ImportError:
-    nogpu = True
-
-
-@pytest.mark.skipif(nogpu, reason="No GPU available")
-def test_gpu_old_doctests():
-    a = [.5, .5]
-    b = [.5, .5]
-    M = [[0., 1.], [1., 0.]]
-    G = ot.sinkhorn(a, b, M, 1)
-    np.testing.assert_allclose(G, np.array([[0.36552929, 0.13447071],
-                                            [0.13447071, 0.36552929]]))
-
-
-@pytest.mark.skipif(nogpu, reason="No GPU available")
-def test_gpu_dist():
-
-    rng = np.random.RandomState(0)
-
-    for n_samples in [50, 100, 500, 1000]:
-        print(n_samples)
-        a = rng.rand(n_samples // 4, 100)
-        b = rng.rand(n_samples, 100)
-
-        M = ot.dist(a.copy(), b.copy())
-        M2 = ot.gpu.dist(a.copy(), b.copy())
-
-        np.testing.assert_allclose(M, M2, rtol=1e-10)
-
-        M2 = ot.gpu.dist(a.copy(), b.copy(), metric='euclidean', to_numpy=False)
-
-        # check raise not implemented wrong metric
-        with pytest.raises(NotImplementedError):
-            M2 = ot.gpu.dist(a.copy(), b.copy(), metric='cityblock', to_numpy=False)
-
-
-@pytest.mark.skipif(nogpu, reason="No GPU available")
-def test_gpu_sinkhorn():
-
-    rng = np.random.RandomState(0)
-
-    for n_samples in [50, 100, 500, 1000]:
-        a = rng.rand(n_samples // 4, 100)
-        b = rng.rand(n_samples, 100)
-
-        wa = ot.unif(n_samples // 4)
-        wb = ot.unif(n_samples)
-
-        wb2 = np.random.rand(n_samples, 20)
-        wb2 /= wb2.sum(0, keepdims=True)
-
-        M = ot.dist(a.copy(), b.copy())
-        M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False)
-
-        reg = 1
-
-        G = ot.sinkhorn(wa, wb, M, reg)
-        G1 = ot.gpu.sinkhorn(wa, wb, M, reg)
-
-        np.testing.assert_allclose(G1, G, rtol=1e-10)
-
-        # run all on gpu
-        ot.gpu.sinkhorn(wa, wb, M2, reg, to_numpy=False, log=True)
-
-        # run sinkhorn for multiple targets
-        ot.gpu.sinkhorn(wa, wb2, M2, reg, to_numpy=False, log=True)
-
-
-@pytest.mark.skipif(nogpu, reason="No GPU available")
-def test_gpu_sinkhorn_lpl1():
-
-    rng = np.random.RandomState(0)
-
-    for n_samples in [50, 100, 500]:
-        print(n_samples)
-        a = rng.rand(n_samples // 4, 100)
-        labels_a = np.random.randint(10, size=(n_samples // 4))
-        b = rng.rand(n_samples, 100)
-
-        wa = ot.unif(n_samples // 4)
-        wb = ot.unif(n_samples)
-
-        M = ot.dist(a.copy(), b.copy())
-        M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False)
-
-        reg = 1
-
-        G = ot.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M, reg)
-        G1 = ot.gpu.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M, reg)
-
-        np.testing.assert_allclose(G1, G, rtol=1e-10)
-
-        ot.gpu.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M2, reg, to_numpy=False, log=True)
diff --git a/test/test_gromov.py b/test/test_gromov.py
index 12fd2b9..9c85b92 100644
--- a/test/test_gromov.py
+++ b/test/test_gromov.py
@@ -188,7 +188,7 @@ def test_gromov2_gradients():
 @pytest.skip_backend("jax", reason="test very slow with jax backend")
 @pytest.skip_backend("tf", reason="test very slow with tf backend")
 def test_entropic_gromov(nx):
-    n_samples = 50  # nb samples
+    n_samples = 10  # nb samples
 
     mu_s = np.array([0, 0])
     cov_s = np.array([[1, 0], [0, 1]])
@@ -222,9 +222,9 @@ def test_entropic_gromov(nx):
         q, Gb.sum(0), atol=1e-04)  # cf convergence gromov
 
     gw, log = ot.gromov.entropic_gromov_wasserstein2(
-        C1, C2, p, q, 'kl_loss', epsilon=1e-2, log=True)
+        C1, C2, p, q, 'kl_loss', max_iter=10, epsilon=1e-2, log=True)
     gwb, logb = ot.gromov.entropic_gromov_wasserstein2(
-        C1b, C2b, pb, qb, 'kl_loss', epsilon=1e-2, log=True)
+        C1b, C2b, pb, qb, 'kl_loss', max_iter=10, epsilon=1e-2, log=True)
     gwb = nx.to_numpy(gwb)
 
     G = log['T']
@@ -245,7 +245,7 @@ def test_entropic_gromov(nx):
 @pytest.skip_backend("tf", reason="test very slow with tf backend")
 def test_entropic_gromov_dtype_device(nx):
     # setup
-    n_samples = 50  # nb samples
+    n_samples = 5  # nb samples
 
     mu_s = np.array([0, 0])
     cov_s = np.array([[1, 0], [0, 1]])
@@ -280,7 +280,7 @@ def test_entropic_gromov_dtype_device(nx):
 
 
 def test_pointwise_gromov(nx):
-    n_samples = 50  # nb samples
+    n_samples = 5  # nb samples
 
     mu_s = np.array([0, 0])
     cov_s = np.array([[1, 0], [0, 1]])
@@ -331,14 +331,12 @@ def test_pointwise_gromov(nx):
     Gb = nx.to_numpy(nx.todense(Gb))
 
     np.testing.assert_allclose(G, Gb, atol=1e-06)
-    np.testing.assert_allclose(float(logb['gw_dist_estimated']), 0.10342276348494964, atol=1e-8)
-    np.testing.assert_allclose(float(logb['gw_dist_std']), 0.0015952535464736394, atol=1e-8)
 
 
 @pytest.skip_backend("tf", reason="test very slow with tf backend")
 @pytest.skip_backend("jax", reason="test very slow with jax backend")
 def test_sampled_gromov(nx):
-    n_samples = 50  # nb samples
+    n_samples = 5  # nb samples
 
     mu_s = np.array([0, 0], dtype=np.float64)
     cov_s = np.array([[1, 0], [0, 1]], dtype=np.float64)
@@ -365,9 +363,9 @@ def test_sampled_gromov(nx):
         return nx.abs(x - y)
 
     G, log = ot.gromov.sampled_gromov_wasserstein(
-        C1, C2, p, q, loss, max_iter=100, epsilon=1, log=True, verbose=True, random_state=42)
+        C1, C2, p, q, loss, max_iter=20, nb_samples_grad=2, epsilon=1, log=True, verbose=True, random_state=42)
     Gb, logb = ot.gromov.sampled_gromov_wasserstein(
-        C1b, C2b, pb, qb, lossb, max_iter=100, epsilon=1, log=True, verbose=True, random_state=42)
+        C1b, C2b, pb, qb, lossb, max_iter=20, nb_samples_grad=2, epsilon=1, log=True, verbose=True, random_state=42)
     Gb = nx.to_numpy(Gb)
 
     # check constraints
@@ -377,13 +375,10 @@ def test_sampled_gromov(nx):
     np.testing.assert_allclose(
         q, Gb.sum(0), atol=1e-04)  # cf convergence gromov
 
-    np.testing.assert_allclose(float(logb['gw_dist_estimated']), 0.05679474884977278, atol=1e-08)
-    np.testing.assert_allclose(float(logb['gw_dist_std']), 0.0005986592106971995, atol=1e-08)
-
 
 def test_gromov_barycenter(nx):
-    ns = 10
-    nt = 20
+    ns = 5
+    nt = 8
 
     Xs, ys = ot.datasets.make_data_classif('3gauss', ns, random_state=42)
     Xt, yt = ot.datasets.make_data_classif('3gauss2', nt, random_state=42)
@@ -450,8 +445,8 @@ def test_gromov_barycenter(nx):
 
 @pytest.mark.filterwarnings("ignore:divide")
 def test_gromov_entropic_barycenter(nx):
-    ns = 10
-    nt = 20
+    ns = 5
+    nt = 10
 
     Xs, ys = ot.datasets.make_data_classif('3gauss', ns, random_state=42)
     Xt, yt = ot.datasets.make_data_classif('3gauss2', nt, random_state=42)
@@ -517,7 +512,7 @@ def test_gromov_entropic_barycenter(nx):
 
 
 def test_fgw(nx):
-    n_samples = 50  # nb samples
+    n_samples = 20  # nb samples
 
     mu_s = np.array([0, 0])
     cov_s = np.array([[1, 0], [0, 1]])
@@ -579,7 +574,7 @@ def test_fgw(nx):
 
 
 def test_fgw2_gradients():
-    n_samples = 50  # nb samples
+    n_samples = 20  # nb samples
 
     mu_s = np.array([0, 0])
     cov_s = np.array([[1, 0], [0, 1]])
@@ -625,8 +620,8 @@ def test_fgw2_gradients():
 def test_fgw_barycenter(nx):
     np.random.seed(42)
 
-    ns = 50
-    nt = 60
+    ns = 10
+    nt = 20
 
     Xs, ys = ot.datasets.make_data_classif('3gauss', ns, random_state=42)
     Xt, yt = ot.datasets.make_data_classif('3gauss2', nt, random_state=42)
@@ -674,7 +669,7 @@ def test_fgw_barycenter(nx):
 
 
 def test_gromov_wasserstein_linear_unmixing(nx):
-    n = 10
+    n = 4
 
     X1, y1 = ot.datasets.make_data_classif('3gauss', n, random_state=42)
     X2, y2 = ot.datasets.make_data_classif('3gauss2', n, random_state=42)
@@ -709,10 +704,10 @@ def test_gromov_wasserstein_linear_unmixing(nx):
         tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
     )
 
-    np.testing.assert_allclose(unmixing1, nx.to_numpy(unmixing1b), atol=1e-06)
-    np.testing.assert_allclose(unmixing1, [1., 0.], atol=1e-01)
-    np.testing.assert_allclose(unmixing2, nx.to_numpy(unmixing2b), atol=1e-06)
-    np.testing.assert_allclose(unmixing2, [0., 1.], atol=1e-01)
+    np.testing.assert_allclose(unmixing1, nx.to_numpy(unmixing1b), atol=5e-06)
+    np.testing.assert_allclose(unmixing1, [1., 0.], atol=5e-01)
+    np.testing.assert_allclose(unmixing2, nx.to_numpy(unmixing2b), atol=5e-06)
+    np.testing.assert_allclose(unmixing2, [0., 1.], atol=5e-01)
     np.testing.assert_allclose(C1_emb, nx.to_numpy(C1b_emb), atol=1e-06)
     np.testing.assert_allclose(C2_emb, nx.to_numpy(C2b_emb), atol=1e-06)
     np.testing.assert_allclose(reconstruction1, nx.to_numpy(reconstruction1b), atol=1e-06)
@@ -758,7 +753,7 @@ def test_gromov_wasserstein_linear_unmixing(nx):
 def test_gromov_wasserstein_dictionary_learning(nx):
 
     # create dataset composed from 2 structures which are repeated 5 times
-    shape = 10
+    shape = 4
     n_samples = 2
     n_atoms = 2
     projection = 'nonnegative_symmetric'
@@ -795,7 +790,7 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     for i in range(n_samples):
         _, _, _, reconstruction = ot.gromov.gromov_wasserstein_linear_unmixing(
             Cs[i], Cdict_init, p=ps[i], q=q, reg=0.,
-            tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
+            tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50
         )
         initial_total_reconstruction += reconstruction
 
@@ -803,7 +798,7 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     Cdict, log = ot.gromov.gromov_wasserstein_dictionary_learning(
         Cs, D=n_atoms, nt=shape, ps=ps, q=q, Cdict_init=Cdict_init,
         epochs=epochs, batch_size=2 * n_samples, learning_rate=1., reg=0.,
-        tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200,
+        tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
         projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose
     )
     # > Compute reconstruction of samples on learned dictionary without backend
@@ -811,7 +806,7 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     for i in range(n_samples):
         _, _, _, reconstruction = ot.gromov.gromov_wasserstein_linear_unmixing(
             Cs[i], Cdict, p=None, q=None, reg=0.,
-            tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
+            tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50
         )
         total_reconstruction += reconstruction
 
@@ -822,7 +817,7 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     Cdictb, log = ot.gromov.gromov_wasserstein_dictionary_learning(
         Csb, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=Cdict_initb,
         epochs=epochs, batch_size=n_samples, learning_rate=1., reg=0.,
-        tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200,
+        tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
         projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose
     )
     # Compute reconstruction of samples on learned dictionary
@@ -830,7 +825,7 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     for i in range(n_samples):
         _, _, _, reconstruction = ot.gromov.gromov_wasserstein_linear_unmixing(
             Csb[i], Cdictb, p=psb[i], q=qb, reg=0.,
-            tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
+            tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50
         )
         total_reconstruction_b += reconstruction
 
@@ -846,7 +841,7 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     Cdict_bis, log = ot.gromov.gromov_wasserstein_dictionary_learning(
         Cs, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=None,
         epochs=epochs, batch_size=n_samples, learning_rate=1., reg=0.,
-        tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200,
+        tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
         projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose
     )
     # > Compute reconstruction of samples on learned dictionary
@@ -854,7 +849,7 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     for i in range(n_samples):
         _, _, _, reconstruction = ot.gromov.gromov_wasserstein_linear_unmixing(
             Cs[i], Cdict_bis, p=ps[i], q=q, reg=0.,
-            tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
+            tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50
         )
         total_reconstruction_bis += reconstruction
 
@@ -865,7 +860,7 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     Cdictb_bis, log = ot.gromov.gromov_wasserstein_dictionary_learning(
         Csb, D=n_atoms, nt=shape, ps=psb, q=qb, Cdict_init=None,
         epochs=epochs, batch_size=n_samples, learning_rate=1., reg=0.,
-        tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200,
+        tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
         projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose
     )
     # > Compute reconstruction of samples on learned dictionary
@@ -873,7 +868,7 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     for i in range(n_samples):
         _, _, _, reconstruction = ot.gromov.gromov_wasserstein_linear_unmixing(
             Csb[i], Cdictb_bis, p=None, q=None, reg=0.,
-            tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
+            tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50
         )
         total_reconstruction_b_bis += reconstruction
 
@@ -892,7 +887,7 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     Cdict_bis2, log = ot.gromov.gromov_wasserstein_dictionary_learning(
         Cs, D=n_atoms, nt=shape, ps=ps, q=q, Cdict_init=Cdict,
         epochs=epochs, batch_size=n_samples, learning_rate=10., reg=0.,
-        tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200,
+        tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
         projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer, verbose=verbose
     )
     # > Compute reconstruction of samples on learned dictionary
@@ -900,7 +895,7 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     for i in range(n_samples):
         _, _, _, reconstruction = ot.gromov.gromov_wasserstein_linear_unmixing(
             Cs[i], Cdict_bis2, p=ps[i], q=q, reg=0.,
-            tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
+            tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50
         )
         total_reconstruction_bis2 += reconstruction
 
@@ -911,7 +906,7 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     Cdictb_bis2, log = ot.gromov.gromov_wasserstein_dictionary_learning(
         Csb, D=n_atoms, nt=shape, ps=psb, q=qb, Cdict_init=Cdictb,
         epochs=epochs, batch_size=n_samples, learning_rate=10., reg=0.,
-        tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200,
+        tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
         projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer, verbose=verbose
     )
     # > Compute reconstruction of samples on learned dictionary
@@ -919,7 +914,7 @@ def test_gromov_wasserstein_dictionary_learning(nx):
     for i in range(n_samples):
         _, _, _, reconstruction = ot.gromov.gromov_wasserstein_linear_unmixing(
             Csb[i], Cdictb_bis2, p=psb[i], q=qb, reg=0.,
-            tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
+            tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50
         )
         total_reconstruction_b_bis2 += reconstruction
 
@@ -929,7 +924,7 @@ def test_gromov_wasserstein_dictionary_learning(nx):
 
 def test_fused_gromov_wasserstein_linear_unmixing(nx):
 
-    n = 10
+    n = 4
     X1, y1 = ot.datasets.make_data_classif('3gauss', n, random_state=42)
     X2, y2 = ot.datasets.make_data_classif('3gauss2', n, random_state=42)
     F, y = ot.datasets.make_data_classif('3gauss', n, random_state=42)
@@ -947,28 +942,28 @@ def test_fused_gromov_wasserstein_linear_unmixing(nx):
 
     unmixing1, C1_emb, Y1_emb, OT, reconstruction1 = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
         C1, F, Cdict, Ydict, p=p, q=p, alpha=0.5, reg=reg,
-        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200
+        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50
     )
 
     unmixing1b, C1b_emb, Y1b_emb, OTb, reconstruction1b = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
         C1b, Fb, Cdictb, Ydictb, p=None, q=None, alpha=0.5, reg=reg,
-        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200
+        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50
     )
 
     unmixing2, C2_emb, Y2_emb, OT, reconstruction2 = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
         C2, F, Cdict, Ydict, p=None, q=None, alpha=0.5, reg=reg,
-        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200
+        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50
     )
 
     unmixing2b, C2b_emb, Y2b_emb, OTb, reconstruction2b = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
         C2b, Fb, Cdictb, Ydictb, p=pb, q=pb, alpha=0.5, reg=reg,
-        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200
+        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50
     )
 
-    np.testing.assert_allclose(unmixing1, nx.to_numpy(unmixing1b), atol=1e-06)
-    np.testing.assert_allclose(unmixing1, [1., 0.], atol=1e-01)
-    np.testing.assert_allclose(unmixing2, nx.to_numpy(unmixing2b), atol=1e-06)
-    np.testing.assert_allclose(unmixing2, [0., 1.], atol=1e-01)
+    np.testing.assert_allclose(unmixing1, nx.to_numpy(unmixing1b), atol=4e-06)
+    np.testing.assert_allclose(unmixing1, [1., 0.], atol=4e-01)
+    np.testing.assert_allclose(unmixing2, nx.to_numpy(unmixing2b), atol=4e-06)
+    np.testing.assert_allclose(unmixing2, [0., 1.], atol=4e-01)
     np.testing.assert_allclose(C1_emb, nx.to_numpy(C1b_emb), atol=1e-03)
     np.testing.assert_allclose(C2_emb, nx.to_numpy(C2b_emb), atol=1e-03)
     np.testing.assert_allclose(Y1_emb, nx.to_numpy(Y1b_emb), atol=1e-03)
@@ -983,22 +978,22 @@ def test_fused_gromov_wasserstein_linear_unmixing(nx):
 
     unmixing1, C1_emb, Y1_emb, OT, reconstruction1 = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
         C1, F, Cdict, Ydict, p=p, q=p, alpha=0.5, reg=reg,
-        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200
+        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50
     )
 
     unmixing1b, C1b_emb, Y1b_emb, OTb, reconstruction1b = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
         C1b, Fb, Cdictb, Ydictb, p=None, q=None, alpha=0.5, reg=reg,
-        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200
+        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50
     )
 
     unmixing2, C2_emb, Y2_emb, OT, reconstruction2 = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
         C2, F, Cdict, Ydict, p=None, q=None, alpha=0.5, reg=reg,
-        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200
+        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50
     )
 
     unmixing2b, C2b_emb, Y2b_emb, OTb, reconstruction2b = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
         C2b, Fb, Cdictb, Ydictb, p=pb, q=pb, alpha=0.5, reg=reg,
-        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200
+        tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50
     )
 
     np.testing.assert_allclose(unmixing1, nx.to_numpy(unmixing1b), atol=1e-06)
@@ -1018,7 +1013,7 @@ def test_fused_gromov_wasserstein_linear_unmixing(nx):
 def test_fused_gromov_wasserstein_dictionary_learning(nx):
 
     # create dataset composed from 2 structures which are repeated 5 times
-    shape = 10
+    shape = 4
     n_samples = 2
     n_atoms = 2
     projection = 'nonnegative_symmetric'
@@ -1060,7 +1055,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     for i in range(n_samples):
         _, _, _, _, reconstruction = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
             Cs[i], Ys[i], Cdict_init, Ydict_init, p=ps[i], q=q,
-            alpha=alpha, reg=0., tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
+            alpha=alpha, reg=0., tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50
         )
         initial_total_reconstruction += reconstruction
 
@@ -1069,7 +1064,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     Cdict, Ydict, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning(
         Cs, Ys, D=n_atoms, nt=shape, ps=ps, q=q, Cdict_init=Cdict_init, Ydict_init=Ydict_init,
         epochs=epochs, batch_size=n_samples, learning_rate_C=1., learning_rate_Y=1., alpha=alpha, reg=0.,
-        tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200,
+        tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
         projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose
     )
     # > Compute reconstruction of samples on learned dictionary
@@ -1077,7 +1072,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     for i in range(n_samples):
         _, _, _, _, reconstruction = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
             Cs[i], Ys[i], Cdict, Ydict, p=None, q=None, alpha=alpha, reg=0.,
-            tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
+            tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50
         )
         total_reconstruction += reconstruction
     # Compare both
@@ -1088,7 +1083,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     Cdictb, Ydictb, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning(
         Csb, Ysb, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=Cdict_initb, Ydict_init=Ydict_initb,
         epochs=epochs, batch_size=2 * n_samples, learning_rate_C=1., learning_rate_Y=1., alpha=alpha, reg=0.,
-        tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200,
+        tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
         projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose
     )
     # > Compute reconstruction of samples on learned dictionary
@@ -1096,7 +1091,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     for i in range(n_samples):
         _, _, _, _, reconstruction = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
             Csb[i], Ysb[i], Cdictb, Ydictb, p=psb[i], q=qb, alpha=alpha, reg=0.,
-            tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
+            tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50
         )
         total_reconstruction_b += reconstruction
 
@@ -1111,7 +1106,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     Cdict_bis, Ydict_bis, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning(
         Cs, Ys, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=None, Ydict_init=None,
         epochs=epochs, batch_size=n_samples, learning_rate_C=1., learning_rate_Y=1., alpha=alpha, reg=0.,
-        tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200,
+        tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
         projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose
     )
     # > Compute reconstruction of samples on learned dictionary
@@ -1119,7 +1114,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     for i in range(n_samples):
         _, _, _, _, reconstruction = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
             Cs[i], Ys[i], Cdict_bis, Ydict_bis, p=ps[i], q=q, alpha=alpha, reg=0.,
-            tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
+            tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50
         )
         total_reconstruction_bis += reconstruction
 
@@ -1130,7 +1125,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     Cdictb_bis, Ydictb_bis, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning(
         Csb, Ysb, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=None, Ydict_init=None,
         epochs=epochs, batch_size=n_samples, learning_rate_C=1., learning_rate_Y=1., alpha=alpha, reg=0.,
-        tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200,
+        tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
         projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose
     )
 
@@ -1139,7 +1134,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     for i in range(n_samples):
         _, _, _, _, reconstruction = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
             Csb[i], Ysb[i], Cdictb_bis, Ydictb_bis, p=psb[i], q=qb, alpha=alpha, reg=0.,
-            tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
+            tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50
         )
         total_reconstruction_b_bis += reconstruction
 
@@ -1156,7 +1151,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     Cdict_bis2, Ydict_bis2, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning(
         Cs, Ys, D=n_atoms, nt=shape, ps=ps, q=q, Cdict_init=Cdict, Ydict_init=Ydict,
         epochs=epochs, batch_size=n_samples, learning_rate_C=10., learning_rate_Y=10., alpha=alpha, reg=0.,
-        tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200,
+        tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
         projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer, verbose=verbose
     )
     # > Compute reconstruction of samples on learned dictionary
@@ -1164,7 +1159,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     for i in range(n_samples):
         _, _, _, _, reconstruction = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
             Cs[i], Ys[i], Cdict_bis2, Ydict_bis2, p=ps[i], q=q, alpha=alpha, reg=0.,
-            tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
+            tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50
         )
         total_reconstruction_bis2 += reconstruction
 
@@ -1175,7 +1170,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     Cdictb_bis2, Ydictb_bis2, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning(
         Csb, Ysb, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=Cdictb, Ydict_init=Ydictb,
         epochs=epochs, batch_size=n_samples, learning_rate_C=10., learning_rate_Y=10., alpha=alpha, reg=0.,
-        tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200,
+        tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50,
         projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer, verbose=verbose
     )
 
@@ -1184,7 +1179,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx):
     for i in range(n_samples):
         _, _, _, _, reconstruction = ot.gromov.fused_gromov_wasserstein_linear_unmixing(
             Csb[i], Ysb[i], Cdictb_bis2, Ydictb_bis2, p=None, q=None, alpha=alpha, reg=0.,
-            tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200
+            tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50
         )
         total_reconstruction_b_bis2 += reconstruction
 
-- 
cgit v1.2.3


From ac4cf442735ed4c0d5405ad861eddaa02afd4edd Mon Sep 17 00:00:00 2001
From: Laetitia Chapel <laetitia.chapel@univ-ubs.fr>
Date: Mon, 11 Apr 2022 15:38:18 +0200
Subject: [MRG] MM algorithms for UOT (#362)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* bugfix

* update refs partial OT

* fixes small typos in plot_partial_wass_and_gromov

* fix small bugs in partial.py

* update README

* pep8 bugfix

* modif doctest

* fix bugtests

* update on test_partial and test on the numerical precision on ot/partial

* resolve merge pb

* Delete partial.py

* update unbalanced: mm algo+plot

* update unbalanced: mm algo+plot

* update unbalanced: mm algo+plot

* update unbalanced: mm algo+plot

* update unbalanced: mm algo+plot

* add test mm algo unbalanced OT

* add test mm algo unbalanced OT

* add test mm algo unbalanced OT

* add test mm algo unbalanced OT

* add test mm algo unbalanced OT

* add test mm algo unbalanced OT

* add test mm algo unbalanced OT

* add test mm algo unbalanced OT

* update unbalanced: mm algo+plot

* update unbalanced: mm algo+plot

* update releases.md with new MM UOT algorithms

Co-authored-by: Rémi Flamary <remi.flamary@gmail.com>
---
 README.md                                         |   6 +-
 RELEASES.md                                       |   1 +
 docs/source/all.rst                               |   1 +
 examples/unbalanced-partial/plot_unbalanced_OT.py | 116 +++++
 ot/partial.py                                     |  84 +++-
 ot/regpath.py                                     | 545 ++++++++++++++--------
 ot/unbalanced.py                                  | 223 +++++++++
 test/test_unbalanced.py                           |  50 ++
 8 files changed, 802 insertions(+), 224 deletions(-)
 create mode 100644 examples/unbalanced-partial/plot_unbalanced_OT.py

(limited to 'docs/source')

diff --git a/README.md b/README.md
index 2ace69c..1b50aeb 100644
--- a/README.md
+++ b/README.md
@@ -35,7 +35,7 @@ POT provides the following generic OT solvers (links to examples):
   Large-scale Optimal Transport (semi-dual problem [18] and dual problem [19])
 * [Sampled solver of Gromov Wasserstein](https://pythonot.github.io/auto_examples/gromov/plot_gromov.html) for large-scale problem with any loss functions [33]
 * Non regularized [free support Wasserstein barycenters](https://pythonot.github.io/auto_examples/barycenters/plot_free_support_barycenter.html) [20].
-* [Unbalanced OT](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_UOT_1D.html) with KL relaxation and [barycenter](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_UOT_barycenter_1D.html) [10, 25].
+* [One dimensional Unbalanced OT](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_UOT_1D.html) with KL relaxation and [barycenter](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_UOT_barycenter_1D.html) [10, 25]. Also [exact unbalanced OT](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_unbalanced_ot.html) with KL and quadratic regularization and the [regularization path of UOT](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_regpath.html) [41]
 * [Partial Wasserstein and Gromov-Wasserstein](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_partial_wass_and_gromov.html) (exact [29] and entropic [3]
   formulations).
 * [Sliced Wasserstein](https://pythonot.github.io/auto_examples/sliced-wasserstein/plot_variance.html) [31, 32] and Max-sliced Wasserstein [35] that can be used for gradient flows [36].
@@ -309,4 +309,6 @@ Dictionary Learning](https://arxiv.org/pdf/2102.06555.pdf), International Confer
 
 [39] Gozlan, N., Roberto, C., Samson, P. M., & Tetali, P. (2017). [Kantorovich duality for general transport costs and applications](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.712.1825&rep=rep1&type=pdf). Journal of Functional Analysis, 273(11), 3327-3405.
 
-[40] Forrow, A., Hütter, J. C., Nitzan, M., Rigollet, P., Schiebinger, G., & Weed, J. (2019, April). [Statistical optimal transport via factored couplings](http://proceedings.mlr.press/v89/forrow19a/forrow19a.pdf). In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 2454-2465). PMLR.
\ No newline at end of file
+[40] Forrow, A., Hütter, J. C., Nitzan, M., Rigollet, P., Schiebinger, G., & Weed, J. (2019, April). [Statistical optimal transport via factored couplings](http://proceedings.mlr.press/v89/forrow19a/forrow19a.pdf). In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 2454-2465). PMLR.
+
+[41] Chapel*, L., Flamary*, R., Wu, H., Févotte, C., Gasso, G. (2021). [Unbalanced Optimal Transport through Non-negative Penalized Linear Regression](https://proceedings.neurips.cc/paper/2021/file/c3c617a9b80b3ae1ebd868b0017cc349-Paper.pdf) Advances in Neural Information Processing Systems (NeurIPS), 2020. (Two first co-authors)
\ No newline at end of file
diff --git a/RELEASES.md b/RELEASES.md
index b54a84a..7942a15 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -19,6 +19,7 @@
 - Add backend support for Domain Adaptation and Unbalanced solvers (PR #343).
 - Add (F)GW linear dictionary learning solvers + example  (PR #319)
 - Add links to related PR and Issues in the doc release page (PR #350)
+- Add new minimization-maximization algorithms for solving exact Unbalanced OT + example (PR #362)
 
 #### Closed issues
 
diff --git a/docs/source/all.rst b/docs/source/all.rst
index 3f7d029..1ec6be3 100644
--- a/docs/source/all.rst
+++ b/docs/source/all.rst
@@ -26,6 +26,7 @@ API and modules
    plot
    stochastic
    unbalanced
+   regpath
    partial
    sliced
    weak
diff --git a/examples/unbalanced-partial/plot_unbalanced_OT.py b/examples/unbalanced-partial/plot_unbalanced_OT.py
new file mode 100644
index 0000000..03487e7
--- /dev/null
+++ b/examples/unbalanced-partial/plot_unbalanced_OT.py
@@ -0,0 +1,116 @@
+# -*- coding: utf-8 -*-
+"""
+==============================================================
+2D examples of exact and entropic unbalanced optimal transport
+==============================================================
+This example is designed to show how to compute unbalanced and
+partial OT in POT.
+
+UOT aims at solving the following optimization problem:
+
+    .. math::
+        W = \min_{\gamma} <\gamma, \mathbf{M}>_F +
+        \mathrm{reg}\cdot\Omega(\gamma) +
+          \mathrm{reg_m} \cdot \mathrm{div}(\gamma \mathbf{1}, \mathbf{a}) +
+        \mathrm{reg_m} \cdot \mathrm{div}(\gamma^T \mathbf{1}, \mathbf{b})
+
+        s.t.
+             \gamma \geq 0
+
+where :math:`\mathrm{div}` is a divergence.
+When using the entropic UOT, :math:`\mathrm{reg}>0` and :math:`\mathrm{div}`
+should be the Kullback-Leibler divergence.
+When solving exact UOT, :math:`\mathrm{reg}=0` and :math:`\mathrm{div}`
+can be either the Kullback-Leibler or the quadratic divergence.
+Using :math:`\ell_1` norm gives the so-called partial OT.
+"""
+
+# Author: Laetitia Chapel <laetitia.chapel@univ-ubs.fr>
+# License: MIT License
+
+import numpy as np
+import matplotlib.pylab as pl
+import ot
+
+##############################################################################
+# Generate data
+# -------------
+
+# %% parameters and data generation
+
+n = 40  # nb samples
+
+mu_s = np.array([-1, -1])
+cov_s = np.array([[1, 0], [0, 1]])
+
+mu_t = np.array([4, 4])
+cov_t = np.array([[1, -.8], [-.8, 1]])
+
+np.random.seed(0)
+xs = ot.datasets.make_2D_samples_gauss(n, mu_s, cov_s)
+xt = ot.datasets.make_2D_samples_gauss(n, mu_t, cov_t)
+
+n_noise = 10
+
+xs = np.concatenate((xs, ((np.random.rand(n_noise, 2) - 4))), axis=0)
+xt = np.concatenate((xt, ((np.random.rand(n_noise, 2) + 6))), axis=0)
+
+n = n + n_noise
+
+a, b = np.ones((n,)) / n, np.ones((n,)) / n  # uniform distribution on samples
+
+# loss matrix
+M = ot.dist(xs, xt)
+M /= M.max()
+
+
+##############################################################################
+# Compute entropic kl-regularized UOT, kl- and l2-regularized UOT
+# -----------
+
+reg = 0.005
+reg_m_kl = 0.05
+reg_m_l2 = 5
+mass = 0.7
+
+entropic_kl_uot = ot.unbalanced.sinkhorn_unbalanced(a, b, M, reg, reg_m_kl)
+kl_uot = ot.unbalanced.mm_unbalanced(a, b, M, reg_m_kl, div='kl')
+l2_uot = ot.unbalanced.mm_unbalanced(a, b, M, reg_m_l2, div='l2')
+partial_ot = ot.partial.partial_wasserstein(a, b, M, m=mass)
+
+##############################################################################
+# Plot the results
+# ----------------
+
+pl.figure(2)
+transp = [partial_ot, l2_uot, kl_uot, entropic_kl_uot]
+title = ["partial OT \n m=" + str(mass), "$\ell_2$-UOT \n $\mathrm{reg_m}$=" +
+         str(reg_m_l2), "kl-UOT \n $\mathrm{reg_m}$=" + str(reg_m_kl),
+         "entropic kl-UOT \n $\mathrm{reg_m}$=" + str(reg_m_kl)]
+
+for p in range(4):
+    pl.subplot(2, 4, p + 1)
+    P = transp[p]
+    if P.sum() > 0:
+        P = P / P.max()
+    for i in range(n):
+        for j in range(n):
+            if P[i, j] > 0:
+                pl.plot([xs[i, 0], xt[j, 0]], [xs[i, 1], xt[j, 1]], color='C2',
+                        alpha=P[i, j] * 0.3)
+    pl.scatter(xs[:, 0], xs[:, 1], c='C0', alpha=0.2)
+    pl.scatter(xt[:, 0], xt[:, 1], c='C1', alpha=0.2)
+    pl.scatter(xs[:, 0], xs[:, 1], c='C0', s=P.sum(1).ravel() * (1 + p) * 2)
+    pl.scatter(xt[:, 0], xt[:, 1], c='C1', s=P.sum(0).ravel() * (1 + p) * 2)
+    pl.title(title[p])
+    pl.yticks(())
+    pl.xticks(())
+    if p < 1:
+        pl.ylabel("mappings")
+    pl.subplot(2, 4, p + 5)
+    pl.imshow(P, cmap='jet')
+    pl.yticks(())
+    pl.xticks(())
+    if p < 1:
+        pl.ylabel("transport plans")
+pl.show()
diff --git a/ot/partial.py b/ot/partial.py
index b7093e4..0a9e450 100755
--- a/ot/partial.py
+++ b/ot/partial.py
@@ -7,7 +7,6 @@ Partial OT solvers
 # License: MIT License
 
 import numpy as np
-
 from .lp import emd
 
 
@@ -29,7 +28,8 @@ def partial_wasserstein_lagrange(a, b, M, reg_m=None, nb_dummies=1, log=False,
 
              \gamma &\geq 0
 
-             \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
+             \mathbf{1}^T \gamma^T \mathbf{1} = m &
+             \leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
 
 
     or equivalently (see Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X.
@@ -50,7 +50,8 @@ def partial_wasserstein_lagrange(a, b, M, reg_m=None, nb_dummies=1, log=False,
     - :math:`\lambda` is the lagrangian cost. Tuning its value allows attaining
       a given mass to be transported `m`
 
-    The formulation of the problem has been proposed in :ref:`[28] <references-partial-wasserstein-lagrange>`
+    The formulation of the problem has been proposed in
+    :ref:`[28] <references-partial-wasserstein-lagrange>`
 
 
     Parameters
@@ -261,7 +262,7 @@ def partial_wasserstein(a, b, M, m=None, nb_dummies=1, log=False, **kwargs):
     b_extended = np.append(b, [(np.sum(a) - m) / nb_dummies] * nb_dummies)
     a_extended = np.append(a, [(np.sum(b) - m) / nb_dummies] * nb_dummies)
     M_extended = np.zeros((len(a_extended), len(b_extended)))
-    M_extended[-nb_dummies:, -nb_dummies:] = np.max(M) * 1e5
+    M_extended[-nb_dummies:, -nb_dummies:] = np.max(M) * 2
     M_extended[:len(a), :len(b)] = M
 
     gamma, log_emd = emd(a_extended, b_extended, M_extended, log=True,
@@ -455,7 +456,8 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
     - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights
     - `m` is the amount of mass to be transported
 
-    The formulation of the problem has been proposed in :ref:`[29] <references-partial-gromov-wasserstein>`
+    The formulation of the problem has been proposed in
+    :ref:`[29] <references-partial-gromov-wasserstein>`
 
 
     Parameters
@@ -469,7 +471,8 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
     q : ndarray, shape (nt,)
         Distribution in the target space
     m : float, optional
-        Amount of mass to be transported (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
+        Amount of mass to be transported
+        (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
     nb_dummies : int, optional
         Number of dummy points to add (avoid instabilities in the EMD solver)
     G0 : ndarray, shape (ns, nt), optional
@@ -623,16 +626,19 @@ def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
 
              \gamma &\geq 0
 
-             \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
+             \mathbf{1}^T \gamma^T \mathbf{1} = m
+             &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
 
     where :
 
     - :math:`\mathbf{M}` is the metric cost matrix
-    - :math:`\Omega` is the entropic regularization term, :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - :math:`\Omega` is the entropic regularization term,
+      :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
     - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights
     - `m` is the amount of mass to be transported
 
-    The formulation of the problem has been proposed in :ref:`[29] <references-partial-gromov-wasserstein2>`
+    The formulation of the problem has been proposed in
+    :ref:`[29] <references-partial-gromov-wasserstein2>`
 
 
     Parameters
@@ -646,7 +652,8 @@ def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
     q : ndarray, shape (nt,)
         Distribution in the target space
     m : float, optional
-        Amount of mass to be transported (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
+        Amount of mass to be transported
+        (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
     nb_dummies : int, optional
         Number of dummy points to add (avoid instabilities in the EMD solver)
     G0 : ndarray, shape (ns, nt), optional
@@ -728,21 +735,25 @@ def entropic_partial_wasserstein(a, b, M, reg, m=None, numItermax=1000,
     The function considers the following problem:
 
     .. math::
-        \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F + \mathrm{reg} \cdot\Omega(\gamma)
+        \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma,
+                 \mathbf{M} \rangle_F + \mathrm{reg} \cdot\Omega(\gamma)
 
         s.t. \gamma \mathbf{1} &\leq \mathbf{a} \\
              \gamma^T \mathbf{1} &\leq \mathbf{b} \\
              \gamma &\geq 0 \\
-             \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\} \\
+             \mathbf{1}^T \gamma^T \mathbf{1} = m
+             &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\} \\
 
     where :
 
     - :math:`\mathbf{M}` is the metric cost matrix
-    - :math:`\Omega`  is the entropic regularization term, :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - :math:`\Omega`  is the entropic regularization term,
+      :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
     - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights
     - `m` is the amount of mass to be transported
 
-    The formulation of the problem has been proposed in :ref:`[3] <references-entropic-partial-wasserstein>` (prop. 5)
+    The formulation of the problem has been proposed in
+    :ref:`[3] <references-entropic-partial-wasserstein>` (prop. 5)
 
 
     Parameters
@@ -829,12 +840,23 @@ def entropic_partial_wasserstein(a, b, M, reg, m=None, numItermax=1000,
     np.multiply(K, m / np.sum(K), out=K)
 
     err, cpt = 1, 0
+    q1 = np.ones(K.shape)
+    q2 = np.ones(K.shape)
+    q3 = np.ones(K.shape)
 
     while (err > stopThr and cpt < numItermax):
         Kprev = K
+        K = K * q1
         K1 = np.dot(np.diag(np.minimum(a / np.sum(K, axis=1), dx)), K)
+        q1 = q1 * Kprev / K1
+        K1prev = K1
+        K1 = K1 * q2
         K2 = np.dot(K1, np.diag(np.minimum(b / np.sum(K1, axis=0), dy)))
+        q2 = q2 * K1prev / K2
+        K2prev = K2
+        K2 = K2 * q3
         K = K2 * (m / np.sum(K2))
+        q3 = q3 * K2prev / K
 
         if np.any(np.isnan(K)) or np.any(np.isinf(K)):
             print('Warning: numerical errors at iteration', cpt)
@@ -861,7 +883,8 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None,
                                         numItermax=1000, tol=1e-7, log=False,
                                         verbose=False):
     r"""
-    Returns the partial Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
+    Returns the partial Gromov-Wasserstein transport between
+    :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
 
     The function solves the following optimization problem:
 
@@ -877,7 +900,8 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None,
 
              \gamma^T \mathbf{1} &\leq \mathbf{b}
 
-             \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
+             \mathbf{1}^T \gamma^T \mathbf{1} = m
+             &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
 
     where :
 
@@ -885,10 +909,13 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None,
     - :math:`\mathbf{C_2}` is the metric cost matrix in the target space
     - :math:`\mathbf{p}` and :math:`\mathbf{q}` are the sample weights
     - `L`: quadratic loss function
-    - :math:`\Omega` is the entropic regularization term, :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - :math:`\Omega` is the entropic regularization term,
+      :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
     - `m` is the amount of mass to be transported
 
-    The formulation of the GW problem has been proposed in :ref:`[12] <references-entropic-partial-gromov-wassertein>` and the partial GW in :ref:`[29] <references-entropic-partial-gromov-wassertein>`
+    The formulation of the GW problem has been proposed in
+    :ref:`[12] <references-entropic-partial-gromov-wassertein>` and the
+    partial GW in :ref:`[29] <references-entropic-partial-gromov-wassertein>`
 
     Parameters
     ----------
@@ -903,7 +930,8 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None,
     reg: float
         entropic regularization parameter
     m : float, optional
-        Amount of mass to be transported (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
+        Amount of mass to be transported (default:
+        :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
     G0 : ndarray, shape (ns, nt), optional
         Initialisation of the transportation matrix
     numItermax : int, optional
@@ -1005,13 +1033,15 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None,
                                          numItermax=1000, tol=1e-7, log=False,
                                          verbose=False):
     r"""
-    Returns the partial Gromov-Wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
+    Returns the partial Gromov-Wasserstein discrepancy between
+    :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
 
     The function solves the following optimization problem:
 
     .. math::
-        GW = \min_{\gamma} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l})\cdot
-            \gamma_{i,j}\cdot\gamma_{k,l} + \mathrm{reg} \cdot\Omega(\gamma)
+        GW = \min_{\gamma} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k},
+             \mathbf{C_2}_{j,l})\cdot
+             \gamma_{i,j}\cdot\gamma_{k,l} + \mathrm{reg} \cdot\Omega(\gamma)
 
     .. math::
         s.t. \ \gamma &\geq 0
@@ -1028,10 +1058,13 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None,
     - :math:`\mathbf{C_2}` is the metric cost matrix in the target space
     - :math:`\mathbf{p}` and :math:`\mathbf{q}` are the sample weights
     - `L` : quadratic loss function
-    - :math:`\Omega` is the entropic regularization term, :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - :math:`\Omega` is the entropic regularization term,
+      :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
     - `m` is the amount of mass to be transported
 
-    The formulation of the GW problem has been proposed in :ref:`[12] <references-entropic-partial-gromov-wassertein2>` and the partial GW in :ref:`[29] <references-entropic-partial-gromov-wassertein2>`
+    The formulation of the GW problem has been proposed in
+    :ref:`[12] <references-entropic-partial-gromov-wassertein2>` and the
+    partial GW in :ref:`[29] <references-entropic-partial-gromov-wassertein2>`
 
 
     Parameters
@@ -1047,7 +1080,8 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None,
     reg: float
         entropic regularization parameter
     m : float, optional
-        Amount of mass to be transported (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
+        Amount of mass to be transported (default:
+        :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
     G0 : ndarray, shape (ns, nt), optional
         Initialisation of the transportation matrix
     numItermax : int, optional
diff --git a/ot/regpath.py b/ot/regpath.py
index 269937a..e745288 100644
--- a/ot/regpath.py
+++ b/ot/regpath.py
@@ -11,34 +11,48 @@ import scipy.sparse as sp
 
 
 def recast_ot_as_lasso(a, b, C):
-    r"""This function recasts the l2-penalized UOT problem as a Lasso problem
+    r"""This function recasts the l2-penalized UOT problem as a Lasso problem.
+
+    Recall the l2-penalized UOT problem defined in
+    :ref:`[41] <references-regpath>`
 
-    Recall the l2-penalized UOT problem defined in [Chapel et al., 2021]
     .. math::
-        UOT = \min_T <C, T> + \lambda \|T 1_m - a\|_2^2 +
-                \lambda \|T^T 1_n - b\|_2^2
+        \text{UOT}_{\lambda} = \min_T <C, T> + \lambda \|T 1_m -
+                                \mathbf{a}\|_2^2 +
+                                \lambda \|T^T 1_n - \mathbf{b}\|_2^2
+
         s.t.
             T \geq 0
+
     where :
-    - C is the (dim_a, dim_b) metric cost matrix
-    - :math:`\lambda` is the l2-regularization coefficient
-    - a and b are source and target distributions
-    - T is the transport plan to optimize
 
-    The problem above can be reformulated to a non-negative penalized
+    - :math:`C` is the cost matrix
+    - :math:`\lambda` is the l2-regularization parameter
+    - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the source and target \
+      distributions
+    - :math:`T` is the transport plan to optimize
+
+    The problem above can be reformulated as a non-negative penalized
     linear regression problem, particularly Lasso
+
     .. math::
-        UOT2 = \min_t \gamma c^T t + 0.5 * \|H t - y\|_2^2
+        \text{UOT2}_{\lambda} = \min_{\mathbf{t}} \gamma \mathbf{c}^T
+        \mathbf{t} + 0.5 * \|H \mathbf{t} - \mathbf{y}\|_2^2
+
         s.t.
-            t \geq 0
+            \mathbf{t} \geq 0
+
     where :
-    - c is a (dim_a * dim_b, ) metric cost vector (flattened version of C)
-    - :math:`\gamma = 1/\lambda` is the l2-regularization coefficient
-    - y is the concatenation of vectors a and b, defined as y^T = [a^T b^T]
-    - H is a (dim_a + dim_b, dim_a * dim_b) metric matrix,
-        see [Chapel et al., 2021] for the design of H. The matrix product H t
-        computes both the source marginal and the target marginal.
-    - t is a (dim_a * dim_b, ) metric vector (flattened version of T)
+
+    - :math:`\mathbf{c}` is the flattened version of the cost matrix :math:`C`
+    - :math:`\mathbf{y}` is the concatenation of vectors :math:`\mathbf{a}` \
+      and :math:`\mathbf{b}`
+    - :math:`H` is a  metric matrix, see :ref:`[41] <references-regpath>` for \
+      the design of :math:`H`. The matrix product :math:`H\mathbf{t}` \
+      computes both the source marginal and the target marginals.
+    - :math:`\mathbf{t}` is the flattened version of the transport plan \
+      :math:`T`
+
     Parameters
     ----------
     a : np.ndarray (dim_a,)
@@ -47,14 +61,16 @@ def recast_ot_as_lasso(a, b, C):
         Histogram of dimension dim_b
     C : np.ndarray, shape (dim_a, dim_b)
         Cost matrix
+
     Returns
     -------
     H : np.ndarray (dim_a+dim_b, dim_a*dim_b)
-        Auxiliary matrix constituted by 0 and 1
+        Design matrix that contains only 0 and 1
     y : np.ndarray (ns + nt, )
-        Concatenation of histogram a and histogram b
+        Concatenation of histograms :math:`\mathbf{a}` and :math:`\mathbf{b}`
     c : np.ndarray (ns * nt, )
-        Flattened array of cost matrix
+        Flattened array of the cost matrix
+
     Examples
     --------
     >>> import ot
@@ -73,12 +89,12 @@ def recast_ot_as_lasso(a, b, C):
     >>> c
     array([16., 25., 28., 16., 40., 36.])
 
+
     References
     ----------
-    [Chapel et al., 2021]:
-        Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
+    .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
         Unbalanced optimal transport through non-negative penalized
-        linear regression.
+        linear regression. NeurIPS.
     """
 
     dim_a = np.shape(a)[0]
@@ -97,33 +113,47 @@ def recast_ot_as_lasso(a, b, C):
 
 
 def recast_semi_relaxed_as_lasso(a, b, C):
-    r"""This function recasts the semi-relaxed l2-UOT problem as Lasso problem
+    r"""This function recasts the semi-relaxed l2-UOT problem as Lasso problem.
 
     .. math::
-        semi-relaxed UOT = \min_T <C, T> + \lambda \|T 1_m - a\|_2^2
+
+        \text{semi-relaxed UOT} = \min_T <C, T>
+                                  + \lambda \|T 1_m - \mathbf{a}\|_2^2
+
         s.t.
-            T^T 1_n = b
-            t \geq 0
+            T^T 1_n = \mathbf{b}
+
+            \mathbf{t} \geq 0
+
     where :
-    - C is the (dim_a, dim_b) metric cost matrix
-    - :math:`\lambda` is the l2-regularization coefficient
-    - a and b are source and target distributions
-    - T is the transport plan to optimize
+
+    - :math:`C` is the metric cost matrix
+    - :math:`\lambda` is the l2-regularization parameter
+    - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the source and target \
+      distributions
+    - :math:`T` is the transport plan to optimize
 
     The problem above can be reformulated as follows
+
     .. math::
-        semi-relaxed UOT2 = \min_t \gamma c^T t + 0.5 * \|H_r t - a\|_2^2
+        \text{semi-relaxed UOT2} = \min_t \gamma \mathbf{c}^T t
+                                   + 0.5 * \|H_r \mathbf{t} - \mathbf{a}\|_2^2
+
         s.t.
-            H_c t = b
-            t \geq 0
+            H_c \mathbf{t} = \mathbf{b}
+
+            \mathbf{t} \geq 0
+
     where :
-    - c is a (dim_a * dim_b, ) metric cost vector (flattened version of C)
-    - :math:`\gamma = 1/\lambda` is the l2-regularization coefficient
-    - H_r is  a (dim_a, dim_a * dim_b) metric matrix,
-        which computes the sum along the rows of transport plan T
-    - H_c is a (dim_b, dim_a * dim_b) metric matrix,
-        which computes the sum along the columns of transport plan T
-    - t is a (dim_a * dim_b, ) metric vector (flattened version of T)
+
+    - :math:`\mathbf{c}` is flattened version of the cost matrix :math:`C`
+    - :math:`\gamma = 1/\lambda` is the l2-regularization parameter
+    - :math:`H_r` is  a  metric matrix which computes the sum along the \
+      rows of the transport plan :math:`T`
+    - :math:`H_c` is a  metric matrix which computes the sum along the \
+      columns of the transport plan :math:`T`
+    - :math:`\mathbf{t}` is the flattened version of :math:`T`
+
     Parameters
     ----------
     a : np.ndarray (dim_a,)
@@ -132,16 +162,18 @@ def recast_semi_relaxed_as_lasso(a, b, C):
         Histogram of dimension dim_b
     C : np.ndarray, shape (dim_a, dim_b)
         Cost matrix
+
     Returns
     -------
     Hr : np.ndarray (dim_a, dim_a * dim_b)
         Auxiliary matrix constituted by 0 and 1, which computes
-        the sum along the rows of transport plan T
+        the sum along the rows of transport plan :math:`T`
     Hc : np.ndarray (dim_b, dim_a * dim_b)
         Auxiliary matrix constituted by 0 and 1, which computes
-        the sum along the columns of transport plan T
+        the sum along the columns of transport plan :math:`T`
     c : np.ndarray (ns * nt, )
-        Flattened array of cost matrix
+        Flattened array of the cost matrix
+
     Examples
     --------
     >>> import ot
@@ -179,49 +211,60 @@ def recast_semi_relaxed_as_lasso(a, b, C):
 
 def ot_next_gamma(phi, delta, HtH, Hty, c, active_index, current_gamma):
     r""" This function computes the next value of gamma if a variable
-    will be added in next iteration of the regularization path
+    is added in the next iteration of the regularization path.
 
     We look for the largest value of gamma such that
     the gradient of an inactive variable vanishes
+
     .. math::
-        \max_{i \in \bar{A}} \frac{h_i^T(H_A \phi - y)}{h_i^T H_A \delta - c_i}
+        \max_{i \in \bar{A}} \frac{\mathbf{h}_i^T(H_A \phi - \mathbf{y})}
+        {\mathbf{h}_i^T H_A \delta - \mathbf{c}_i}
+
     where :
+
     - A is the current active set
-    - h_i is the ith column of auxiliary matrix H
-    - H_A is the sub-matrix constructed by the columns of H
-        whose indices belong to the active set A
-    - c_i is the ith element of cost vector c
-    - y is the concatenation of source and target distribution
-    - :math:`\phi` is the intercept of the solutions in current iteration
-    - :math:`\delta` is the slope of the solutions in current iteration
+    - :math:`\mathbf{h}_i` is the :math:`i` th column of the design \
+      matrix :math:`{H}`
+    - :math:`{H}_A` is the sub-matrix constructed by the columns of \
+      :math:`{H}` whose indices belong to the active set A
+    - :math:`\mathbf{c}_i` is the :math:`i` th element of the cost vector \
+      :math:`\mathbf{c}`
+    - :math:`\mathbf{y}` is the concatenation of the source and target \
+      distributions
+    - :math:`\phi` is the intercept of the solutions at the current iteration
+    - :math:`\delta` is the slope of the solutions at the current iteration
+
     Parameters
     ----------
-    phi : np.ndarray (|A|, )
-        Intercept of the solutions in current iteration (t is piecewise linear)
-    delta : np.ndarray (|A|, )
-        Slope of the solutions in current iteration (t is piecewise linear)
+    phi : np.ndarray (size(A), )
+        Intercept of the solutions at the current iteration
+    delta : np.ndarray (size(A), )
+        Slope of the solutions at the current iteration
     HtH : np.ndarray (dim_a * dim_b, dim_a * dim_b)
-        Matrix product of H^T H
+        Matrix product of :math:`{H}^T {H}`
     Hty : np.ndarray (dim_a + dim_b, )
-        Matrix product of H^T y
+        Matrix product of :math:`{H}^T \mathbf{y}`
     c: np.ndarray (dim_a * dim_b, )
-        Flattened array of cost matrix C
+        Flattened array of the cost matrix :math:`{C}`
     active_index : list
         Indices of active variables
     current_gamma : float
-        Value of regularization coefficient at the start of current iteration
+        Value of the regularization parameter at the beginning of the current \
+        iteration
+
     Returns
     -------
     next_gamma : float
         Value of gamma if a variable is added to active set in next iteration
     next_active_index : int
         Index of variable to be activated
+
+
     References
     ----------
-    [Chapel et al., 2021]:
-        Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
+    .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
         Unbalanced optimal transport through non-negative penalized
-        linear regression.
+        linear regression. NeurIPS.
     """
     M = (HtH[:, active_index].dot(phi) - Hty) / \
         (HtH[:, active_index].dot(delta) - c + 1e-16)
@@ -237,56 +280,65 @@ def semi_relaxed_next_gamma(phi, delta, phi_u, delta_u, HrHr, Hc, Hra,
 
     By taking the Lagrangian form of the problem, we obtain a similar update
     as the two-sided relaxed UOT
+
     .. math::
-        \max_{i \in \bar{A}} \frac{h_{r i}^T(H_{r A} \phi - a) + h_{c i}^T
-            \phi_u}{h_{r i}^T H_{r A} \delta + h_{c i} \delta_u - c_i}
+
+        \max_{i \in \bar{A}} \frac{\mathbf{h}_{ri}^T(H_{rA} \phi - \mathbf{a})
+           + \mathbf{h}_{c i}^T\phi_u}{\mathbf{h}_{r i}^T H_{r A} \delta + \
+            \mathbf{h}_{c i} \delta_u - \mathbf{c}_i}
+
     where :
+
     - A is the current active set
-    - h_{r i} is the ith column of the matrix H_r
-    - h_{c i} is the ith column of the matrix H_c
-    - H_{r A} is the sub-matrix constructed by the columns of H_r
-        whose indices belong to the active set A
-    - c_i is the ith element of cost vector c
-    - y is the concatenation of source and target distribution
+    - :math:`\mathbf{h}_{r i}` is the ith column of the matrix :math:`H_r`
+    - :math:`\mathbf{h}_{c i}` is the ith column of the matrix :math:`H_c`
+    - :math:`H_{r A}` is the sub-matrix constructed by the columns of \
+      :math:`H_r` whose indices belong to the active set A
+    - :math:`\mathbf{c}_i` is the :math:`i` th element of cost vector \
+      :math:`\mathbf{c}`
     - :math:`\phi` is the intercept of the solutions in current iteration
     - :math:`\delta` is the slope of the solutions in current iteration
-    - :math:`\phi_u` is the intercept of Lagrange parameter in current
-        iteration
-    - :math:`\delta_u` is the slope of Lagrange parameter in current iteration
+    - :math:`\phi_u` is the intercept of Lagrange parameter at the \
+      current iteration
+    - :math:`\delta_u` is the slope of Lagrange parameter at the \
+      current iteration
+
     Parameters
     ----------
-    phi : np.ndarray (|A|, )
-        Intercept of the solutions in current iteration (t is piecewise linear)
-    delta : np.ndarray (|A|, )
-        Slope of the solutions in current iteration (t is piecewise linear)
+    phi : np.ndarray (size(A), )
+        Intercept of the solutions at the current iteration
+    delta : np.ndarray (size(A), )
+        Slope of the solutions at the current iteration
     phi_u : np.ndarray (dim_b, )
-        Intercept of the Lagrange parameter in current iteration (also linear)
+        Intercept of the Lagrange parameter at the current iteration
     delta_u : np.ndarray (dim_b, )
-        Slope of the Lagrange parameter in current iteration (also linear)
+        Slope of the Lagrange parameter at the current iteration
     HrHr : np.ndarray (dim_a * dim_b, dim_a * dim_b)
-        Matrix product of H_r^T H_r
+        Matrix product of :math:`H_r^T H_r`
     Hc : np.ndarray (dim_b, dim_a * dim_b)
-        Matrix that computes the sum along the columns of transport plan T
+        Matrix that computes the sum along the columns of the transport plan \
+        :math:`T`
     Hra : np.ndarray (dim_a * dim_b, )
-        Matrix product of H_r^T a
+        Matrix product of :math:`H_r^T \mathbf{a}`
     c: np.ndarray (dim_a * dim_b, )
-        Flattened array of cost matrix C
+        Flattened array of cost matrix :math:`C`
     active_index : list
         Indices of active variables
     current_gamma : float
         Value of regularization coefficient at the start of current iteration
+
     Returns
     -------
     next_gamma : float
         Value of gamma if a variable is added to active set in next iteration
     next_active_index : int
         Index of variable to be activated
+
     References
     ----------
-    [Chapel et al., 2021]:
-        Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
+    .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
         Unbalanced optimal transport through non-negative penalized
-        linear regression.
+        linear regression. NeurIPS.
     """
 
     M = (HrHr[:, active_index].dot(phi) - Hra + Hc.T.dot(phi_u)) / \
@@ -297,37 +349,48 @@ def semi_relaxed_next_gamma(phi, delta, phi_u, delta_u, HrHr, Hc, Hra,
 
 
 def compute_next_removal(phi, delta, current_gamma):
-    r""" This function computes the next value of gamma if a variable
-    is removed in next iteration of regularization path
+    r""" This function computes the next gamma value if a variable
+    is removed at the next iteration of the regularization path.
+
+    We look for the largest value of the regularization parameter such that
+    an element of the current solution vanishes
 
-    We look for the largest value of gamma such that
-    an element of current solution vanishes
     .. math::
         \max_{j \in A} \frac{\phi_j}{\delta_j}
+
     where :
+
     - A is the current active set
-    - phi_j is the jth element of the intercept of current solution
-    - delta_j is the jth elemnt of the slope of current solution
+    - :math:`\phi_j` is the :math:`j` th element of the intercept of the \
+      current solution
+    - :math:`\delta_j` is the :math:`j` th element of the slope of the \
+      current solution
+
+
     Parameters
     ----------
-    phi : np.ndarray (|A|, )
-        Intercept of the solutions in current iteration (t is piecewise linear)
-    delta : np.ndarray (|A|, )
-        Slope of the solutions in current iteration (t is piecewise linear)
+    phi : ndarray, shape (size(A), )
+        Intercept of the solution at the current iteration
+    delta : ndarray, shape (size(A), )
+        Slope of the solution at the current iteration
     current_gamma : float
-        Value of regularization coefficient at the start of current iteration
+        Value of the regularization parameter at the beginning of the \
+        current iteration
+
     Returns
     -------
     next_removal_gamma : float
-        Value of gamma if a variable is removed in next iteration
+        Gamma value if a variable is removed at the next iteration
     next_removal_index : int
-        Index of the variable to remove in next iteration
+        Index of the variable to be removed at the next iteration
+
+
+    .. _references-regpath:
     References
     ----------
-    [Chapel et al., 2021]:
-        Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
+    .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
         Unbalanced optimal transport through non-negative penalized
-        linear regression.
+        linear regression. NeurIPS.
     """
     r_candidate = phi / (delta - 1e-16)
     r_candidate[r_candidate >= (1 - 1e-8) * current_gamma] = 0
@@ -335,56 +398,74 @@ def compute_next_removal(phi, delta, current_gamma):
 
 
 def complement_schur(M_current, b, d, id_pop):
-    r""" This function computes the inverse of matrix in regularization path
-    using Schur complement
+    r""" This function computes the inverse of the design matrix in the \
+    regularization path using the  Schur complement. Two cases may arise:
+
+    Case 1: one variable is added to the active set
+
 
-    Two cases may arise: Firstly one variable is added to the active set
     .. math::
         M_{k+1}^{-1} =
         \begin{bmatrix}
-            M_{k}^{-1} + s^{-1} M_{k}^{-1} b b^T M_{k}^{-1} & -s^{-1} \\
-            - s^{-1} b^T M_{k}^{-1} & s^{-1}
+            M_{k}^{-1} + s^{-1} M_{k}^{-1} \mathbf{b} \mathbf{b}^T M_{k}^{-1} \
+            & - M_{k}^{-1} \mathbf{b} s^{-1}   \\
+            - s^{-1} \mathbf{b}^T M_{k}^{-1} & s^{-1}
         \end{bmatrix}
+
+
     where :
-    - :math:`M_k^{-1}` is the inverse of matrix in previous iteration and
-        :math:`M_k` is the upper left block matrix in Schur formulation
-    - b is the upper right block matrix in Schur formulation. In our case,
-        b is reduced to a column vector and b^T is the lower left block matrix
-    - s is the Schur complement, given by
-        :math:`s = d - b^T M_{k}^{-1} b` in our case
-
-    Secondly, one variable is removed from the active set
+
+    - :math:`M_k^{-1}` is the inverse of the design matrix :math:`H_A^tH_A` \
+      of the previous iteration
+    - :math:`\mathbf{b}` is the last column of :math:`M_{k}`
+    - :math:`s` is the Schur complement, given by  \
+      :math:`s = \mathbf{d} - \mathbf{b}^T M_{k}^{-1} \mathbf{b}`
+
+    Case 2: one variable is removed from the active set.
+
     .. math::
-        M_{k+1}^{-1} = M^{-1}_{A_k \backslash q} -
+        M_{k+1}^{-1} = M^{-1}_{k \backslash q} -
                        \frac{r_{-q,q} r^{T}_{-q,q}}{r_{q,q}}
+
     where :
-    - q is the index of column and row to delete
-    - :math:`M^{-1}_{A_k \backslash q}` is the previous inverse matrix
-        without qth column and qth row
-    - r_{-q,q} is the qth column of :math:`M^{-1}_{k}` without the qth element
-    - r_{q, q} is the element of qth column and qth row in :math:`M^{-1}_{k}`
+
+    - :math:`q` is the index of column and row to delete
+    - :math:`M^{-1}_{k \backslash q}` is the previous inverse matrix deprived \
+      of the :math:`q` th column and :math:`q` th row
+    - :math:`r_{-q,q}` is the :math:`q` th column of :math:`M^{-1}_{k}` \
+      without the :math:`q` th element
+    - :math:`r_{q, q}` is the element of :math:`q` th column and :math:`q` th \
+      row in :math:`M^{-1}_{k}`
+
+
     Parameters
     ----------
-    M_current : np.ndarray (|A|-1, |A|-1)
-        Inverse matrix in previous iteration
-    b : np.ndarray (|A|-1, )
-        Upper right matrix in Schur complement, a column vector in our case
+    M_current : ndarray, shape (size(A)-1, size(A)-1)
+        Inverse matrix of :math:`H_A^tH_A` at the previous iteration, with \
+        size(A) the size of the active set
+    b : ndarray, shape (size(A)-1, )
+        None for case 2 (removal), last column of :math:`M_{k}` for case 1 \
+        (addition)
     d : float
-        Lower right matrix in Schur complement, a scalar in our case
-    id_pop
+        should be equal to 2 when UOT and 1 for the semi-relaxed OT
+    id_pop : int
         Index of the variable to be removed,  equal to -1
-        if none of the variables is deleted in current iteration
+        if no variable is deleted at the current iteration
+
+
     Returns
     -------
-    M : np.ndarray (|A|, |A|)
-        Inverse matrix needed in current iteration
+    M : ndarray, shape (size(A), size(A))
+        Inverse matrix of :math:`H_A^tH_A` of the current iteration
+
+
     References
     ----------
-    [Chapel et al., 2021]:
-        Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
+    .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
         Unbalanced optimal transport through non-negative penalized
-        linear regression.
+        linear regression. NeurIPS.
     """
+
     if b is None:
         b = M_current[id_pop, :]
         b = np.delete(b, id_pop)
@@ -409,33 +490,39 @@ def complement_schur(M_current, b, d, id_pop):
 
 
 def construct_augmented_H(active_index, m, Hc, HrHr):
-    r""" This function construct an augmented matrix for the first iteration of
-    semi-relaxed regularization path
+    r""" This function constructs an augmented matrix for the first iteration
+    of the semi-relaxed regularization path
 
     .. math::
-        Augmented_H =
+        \text{Augmented}_H =
         \begin{bmatrix}
             0 & H_{c A} \\
             H_{c A}^T & H_{r A}^T H_{r A}
         \end{bmatrix}
+
     where :
-    - H_{r A} is the sub-matrix constructed by the columns of H_r
-        whose indices belong to the active set A
-    - H_{c A} is the sub-matrix constructed by the columns of H_c
-        whose indices belong to the active set A
+
+    - :math:`H_{r A}` is the sub-matrix constructed by the columns of \
+      :math:`H_r` whose indices belong to the active set A
+    - :math:`H_{c A}` is the sub-matrix constructed by the columns of \
+      :math:`H_c` whose indices belong to the active set A
+
+
     Parameters
     ----------
     active_index : list
-        Indices of active variables
+        Indices of the active variables
     m : int
         Length of the target distribution
     Hc : np.ndarray (dim_b, dim_a * dim_b)
-        Matrix that computes the sum along the columns of transport plan T
+        Matrix that computes the sum along the columns of the transport plan \
+        :math:`T`
     HrHr : np.ndarray (dim_a * dim_b, dim_a * dim_b)
-        Matrix product of H_r^T H_r
+        Matrix product of :math:`H_r^T H_r`
+
     Returns
     -------
-    H_augmented : np.ndarray (dim_b + |A|, dim_b + |A|)
+    H_augmented : np.ndarray (dim_b + size(A), dim_b + size(A))
         Augmented matrix for the first iteration of the semi-relaxed
         regularization path
     """
@@ -451,18 +538,27 @@ def fully_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4,
     r"""This function gives the regularization path of l2-penalized UOT problem
 
     The problem to optimize is the Lasso reformulation of the l2-penalized UOT:
+
     .. math::
-        \min_t \gamma c^T t + 0.5 * \|H t - y\|_2^2
+        \min_t \gamma \mathbf{c}^T \mathbf{t}
+               + 0.5 * \|{H} \mathbf{t} - \mathbf{y}\|_2^2
+
         s.t.
-            t \geq 0
+            \mathbf{t} \geq 0
+
     where :
-    - c is a (dim_a * dim_b, ) metric cost vector (flattened version of C)
+
+    - :math:`\mathbf{c}` is the flattened version of the cost matrix \
+      :math:`{C}`
     - :math:`\gamma = 1/\lambda` is the l2-regularization coefficient
-    - y is the concatenation of vectors a and b, defined as y^T = [a^T b^T]
-    - H is a (dim_a + dim_b, dim_a * dim_b) metric matrix,
-        see [Chapel et al., 2021] for the design of H. The matrix product Ht
-        computes both the source marginal and the target marginal.
-    - t is a (dim_a * dim_b, ) metric vector (flattened version of T)
+    - :math:`\mathbf{y}` is the concatenation of vectors :math:`\mathbf{a}` \
+      and :math:`\mathbf{b}`, defined as \
+      :math:`\mathbf{y}^T = [\mathbf{a}^T \mathbf{b}^T]`
+    - :math:`{H}` is a design matrix, see :ref:`[41] <references-regpath>` \
+      for the design of :math:`{H}`. The matrix product :math:`H\mathbf{t}` \
+      computes both the source marginal and the target marginals.
+    - :math:`\mathbf{t}` is the flattened version of the transport matrix
+
     Parameters
     ----------
     a : np.ndarray (dim_a,)
@@ -478,11 +574,12 @@ def fully_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4,
     Returns
     -------
     t : np.ndarray (dim_a*dim_b, )
-        Flattened vector of optimal transport matrix
+        Flattened vector of the optimal transport matrix
     t_list : list
-        List of solutions in regularization path
+        List of solutions in the regularization path
     gamma_list : list
-        List of regularization coefficient in regularization path
+        List of regularization coefficients in the regularization path
+
     Examples
     --------
     >>> import ot
@@ -502,10 +599,9 @@ def fully_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4,
 
     References
     ----------
-    [Chapel et al., 2021]:
-        Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
+    .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
         Unbalanced optimal transport through non-negative penalized
-        linear regression.
+        linear regression. NeurIPS.
     """
 
     n = np.shape(a)[0]
@@ -580,22 +676,32 @@ def fully_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4,
 def semi_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4,
                       itmax=50000):
     r"""This function gives the regularization path of semi-relaxed
-    l2-UOT problem
+    l2-UOT problem.
 
     The problem to optimize is the Lasso reformulation of the l2-penalized UOT:
+
     .. math::
-        \min_t \gamma c^T t + 0.5 * \|H_r t - a\|_2^2
+
+        \min_t \gamma \mathbf{c}^T t
+               + 0.5 * \|H_r \mathbf{t} - \mathbf{a}\|_2^2
+
         s.t.
-            H_c t = b
-            t \geq 0
+            H_c \mathbf{t} = \mathbf{b}
+
+            \mathbf{t} \geq 0
+
     where :
-    - c is a (dim_a * dim_b, ) metric cost vector (flattened version of C)
-    - :math:`\gamma = 1/\lambda` is the l2-regularization coefficient
-    - H_r is  a (dim_a, dim_a * dim_b) metric matrix,
-        which computes the sum along the rows of transport plan T
-    - H_c is a (dim_b, dim_a * dim_b) metric matrix,
-        which computes the sum along the columns of transport plan T
-    - t is a (dim_a * dim_b, ) metric vector (flattened version of T)
+
+    - :math:`\mathbf{c}` is the flattened version of the cost matrix \
+      :math:`C`
+    - :math:`\gamma = 1/\lambda` is the l2-regularization parameter
+    - :math:`H_r` is  a  matrix that computes the sum along the rows of \
+      the transport plan :math:`T`
+    - :math:`H_c` is  a  matrix that computes the sum along the columns of \
+      the transport plan :math:`T`
+    - :math:`\mathbf{t}` is the flattened version of the transport plan \
+      :math:`T`
+
     Parameters
     ----------
     a : np.ndarray (dim_a,)
@@ -608,14 +714,16 @@ def semi_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4,
         l2-regularization coefficient
     itmax: int (optional)
         Maximum number of iteration
+
     Returns
     -------
     t : np.ndarray (dim_a*dim_b, )
-        Flattened vector of optimal transport matrix
+        Flattened vector of the (unregularized) optimal transport matrix
     t_list : list
-        List of solutions in regularization path
+        List of all the optimal transport vectors of the regularization path
     gamma_list : list
-        List of regularization coefficient in regularization path
+        List of the regularization parameters in the path
+
     Examples
     --------
     >>> import ot
@@ -635,10 +743,9 @@ def semi_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4,
 
     References
     ----------
-    [Chapel et al., 2021]:
-        Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
+    .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
         Unbalanced optimal transport through non-negative penalized
-        linear regression.
+        linear regression. NeurIPS.
     """
 
     n = np.shape(a)[0]
@@ -722,8 +829,44 @@ def semi_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4,
 
 def regularization_path(a: np.array, b: np.array, C: np.array, reg=1e-4,
                         semi_relaxed=False, itmax=50000):
-    r"""This function combines both the semi-relaxed and the fully-relaxed
-    regularization paths of l2-UOT problem
+    r"""This function provides all the solutions of the regularization path \
+    of the l2-UOT problem :ref:`[41] <references-regpath>`.
+
+    The problem to optimize is the Lasso reformulation of the l2-penalized UOT:
+
+    .. math::
+        \min_t \gamma \mathbf{c}^T \mathbf{t}
+                + 0.5 * \|{H} \mathbf{t} - \mathbf{y}\|_2^2
+
+        s.t.
+            \mathbf{t} \geq 0
+
+    where :
+
+    - :math:`\mathbf{c}` is the flattened version of the cost matrix \
+      :math:`{C}`
+    - :math:`\gamma = 1/\lambda` is the l2-regularization coefficient
+    - :math:`\mathbf{y}` is the concatenation of vectors :math:`\mathbf{a}` \
+      and :math:`\mathbf{b}`, defined as \
+      :math:`\mathbf{y}^T = [\mathbf{a}^T \mathbf{b}^T]`
+    - :math:`{H}` is a design matrix, see :ref:`[41] <references-regpath>` \
+      for the design of :math:`{H}`. The matrix product :math:`H\mathbf{t}` \
+      computes both the source marginal and the target marginals.
+    - :math:`\mathbf{t}` is the flattened version of the transport matrix
+
+    For the semi-relaxed problem, it optimizes the Lasso reformulation of the
+    l2-penalized UOT:
+
+    .. math::
+
+        \min_t \gamma \mathbf{c}^T \mathbf{t}
+                + 0.5 * \|H_r \mathbf{t} - \mathbf{a}\|_2^2
+
+        s.t.
+            H_c \mathbf{t} = \mathbf{b}
+
+            \mathbf{t} \geq 0
+
 
     Parameters
     ----------
@@ -736,23 +879,24 @@ def regularization_path(a: np.array, b: np.array, C: np.array, reg=1e-4,
     reg: float (optional)
         l2-regularization coefficient
     semi_relaxed : bool (optional)
-        Give the semi-relaxed path if true
+        Give the semi-relaxed path if True
     itmax: int (optional)
         Maximum number of iteration
+
     Returns
     -------
     t : np.ndarray (dim_a*dim_b, )
-        Flattened vector of optimal transport matrix
+        Flattened vector of the (unregularized) optimal transport matrix
     t_list : list
-        List of solutions in regularization path
+        List of all the optimal transport vectors of the regularization path
     gamma_list : list
-        List of regularization coefficient in regularization path
+        List of the regularization parameters in the path
+
     References
     ----------
-    [Chapel et al., 2021]:
-        Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
+    .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
         Unbalanced optimal transport through non-negative penalized
-        linear regression.
+        linear regression. NeurIPS.
     """
     if semi_relaxed:
         t, t_list, gamma_list = semi_relaxed_path(a, b, C, reg=reg,
@@ -765,27 +909,33 @@ def regularization_path(a: np.array, b: np.array, C: np.array, reg=1e-4,
 
 def compute_transport_plan(gamma, gamma_list, Pi_list):
     r""" Given the regularization path, this function computes the transport
-    plan for any value of gamma by the piecewise linearity of the path
+    plan for any value of gamma thanks to the piecewise linearity of the path.
 
     .. math::
         t(\gamma) = \phi(\gamma) - \gamma \delta(\gamma)
-    where :
-    - :math:`\gamma` is the regularization coefficient
+
+    where:
+
+    - :math:`\gamma` is the regularization parameter
     - :math:`\phi(\gamma)` is the corresponding intercept
     - :math:`\delta(\gamma)` is the corresponding slope
-    - t is a (dim_a * dim_b, ) vector (flattened version of transport matrix)
+    - :math:`\mathbf{t}` is the flattened version of the transport matrix
+
     Parameters
     ----------
     gamma : float
         Regularization coefficient
     gamma_list : list
-        List of regularization coefficients in regularization path
+        List of regularization parameters of the regularization path
     Pi_list : list
-        List of solutions in regularization path
+        List of all the solutions of the regularization path
+
     Returns
     -------
     t : np.ndarray (dim_a*dim_b, )
-        Transport vector corresponding to the given value of gamma
+        Vectorization of the transport plan corresponding to the given value
+        of gamma
+
     Examples
     --------
     >>> import ot
@@ -804,12 +954,13 @@ def compute_transport_plan(gamma, gamma_list, Pi_list):
     array([0.        , 0.        , 0.        , 0.19722222, 0.05555556,
            0.        , 0.        , 0.24722222, 0.        ])
 
+
+    .. _references-regpath:
     References
     ----------
-    [Chapel et al., 2021]:
-        Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
+    .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
         Unbalanced optimal transport through non-negative penalized
-        linear regression.
+        linear regression. NeurIPS.
     """
 
     if gamma >= gamma_list[0]:
diff --git a/ot/unbalanced.py b/ot/unbalanced.py
index 503cc1e..90c920c 100644
--- a/ot/unbalanced.py
+++ b/ot/unbalanced.py
@@ -4,6 +4,7 @@ Regularized Unbalanced OT solvers
 """
 
 # Author: Hicham Janati <hicham.janati@inria.fr>
+#         Laetitia Chapel <laetitia.chapel@univ-ubs.fr>
 # License: MIT License
 
 from __future__ import division
@@ -1029,3 +1030,225 @@ def barycenter_unbalanced(A, M, reg, reg_m, method="sinkhorn", weights=None,
                                      log=log, **kwargs)
     else:
         raise ValueError("Unknown method '%s'." % method)
+
+
+def mm_unbalanced(a, b, M, reg_m, div='kl', G0=None, numItermax=1000,
+                  stopThr=1e-15, verbose=False, log=False):
+    r"""
+    Solve the unbalanced optimal transport problem and return the OT plan.
+    The function solves the following optimization problem:
+
+    .. math::
+        W = \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+        \mathrm{reg_m} \cdot \mathrm{div}(\gamma \mathbf{1}, \mathbf{a}) +
+        \mathrm{reg_m} \cdot \mathrm{div}(\gamma^T \mathbf{1}, \mathbf{b})
+        s.t.
+             \gamma \geq 0
+
+    where:
+
+    - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
+    - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target
+      unbalanced distributions
+    - div is a divergence, either Kullback-Leibler or :math:`\ell_2` divergence
+
+    The algorithm used for solving the problem is a maximization-
+    minimization algorithm as proposed in :ref:`[41] <references-regpath>`
+
+    Parameters
+    ----------
+    a : array-like (dim_a,)
+        Unnormalized histogram of dimension `dim_a`
+    b : array-like (dim_b,)
+        Unnormalized histogram of dimension `dim_b`
+    M : array-like (dim_a, dim_b)
+        loss matrix
+    reg_m: float
+        Marginal relaxation term > 0
+    div: string, optional
+        Divergence to quantify the difference between the marginals.
+        Can take two values: 'kl' (Kullback-Leibler) or 'l2' (quadratic)
+    G0: array-like (dim_a, dim_b)
+        Initialization of the transport matrix
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshold on error (> 0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+    Returns
+    -------
+    gamma : (dim_a, dim_b) array-like
+            Optimal transportation matrix for the given parameters
+    log : dict
+            log dictionary returned only if `log` is `True`
+
+    Examples
+    --------
+    >>> import ot
+    >>> import numpy as np
+    >>> a=[.5, .5]
+    >>> b=[.5, .5]
+    >>> M=[[1., 36.],[9., 4.]]
+    >>> np.round(ot.unbalanced.mm_unbalanced(a, b, M, 1, 'kl'), 2)
+    array([[0.3 , 0.  ],
+           [0.  , 0.07]])
+    >>> np.round(ot.unbalanced.mm_unbalanced(a, b, M, 1, 'l2'), 2)
+    array([[0.25, 0.  ],
+           [0.  , 0.  ]])
+
+
+    .. _references-regpath:
+    References
+    ----------
+    .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
+        Unbalanced optimal transport through non-negative penalized
+        linear regression. NeurIPS.
+    See Also
+    --------
+    ot.lp.emd : Unregularized OT
+    ot.unbalanced.sinkhorn_unbalanced : Entropic regularized OT
+    """
+    M, a, b = list_to_array(M, a, b)
+    nx = get_backend(M, a, b)
+
+    dim_a, dim_b = M.shape
+
+    if len(a) == 0:
+        a = nx.ones(dim_a, type_as=M) / dim_a
+    if len(b) == 0:
+        b = nx.ones(dim_b, type_as=M) / dim_b
+
+    if G0 is None:
+        G = a[:, None] * b[None, :]
+    else:
+        G = G0
+
+    if log:
+        log = {'err': [], 'G': []}
+
+    if div == 'kl':
+        K = nx.exp(M / - reg_m / 2)
+    elif div == 'l2':
+        K = nx.maximum(a[:, None] + b[None, :] - M / reg_m / 2,
+                       nx.zeros((dim_a, dim_b), type_as=M))
+    else:
+        warnings.warn("The div parameter should be either equal to 'kl' or \
+                      'l2': it has been set to 'kl'.")
+        div = 'kl'
+        K = nx.exp(M / - reg_m / 2)
+
+    for i in range(numItermax):
+        Gprev = G
+
+        if div == 'kl':
+            u = nx.sqrt(a / (nx.sum(G, 1) + 1e-16))
+            v = nx.sqrt(b / (nx.sum(G, 0) + 1e-16))
+            G = G * K * u[:, None] * v[None, :]
+        elif div == 'l2':
+            Gd = nx.sum(G, 0, keepdims=True) + nx.sum(G, 1, keepdims=True) + 1e-16
+            G = G * K / Gd
+
+        err = nx.sqrt(nx.sum((G - Gprev) ** 2))
+        if log:
+            log['err'].append(err)
+            log['G'].append(G)
+        if verbose:
+            print('{:5d}|{:8e}|'.format(i, err))
+        if err < stopThr:
+            break
+
+    if log:
+        log['cost'] = nx.sum(G * M)
+        return G, log
+    else:
+        return G
+
+
+def mm_unbalanced2(a, b, M, reg_m, div='kl', G0=None, numItermax=1000,
+                   stopThr=1e-15, verbose=False, log=False):
+    r"""
+    Solve the unbalanced optimal transport problem and return the OT plan.
+    The function solves the following optimization problem:
+
+    .. math::
+        W = \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+        \mathrm{reg_m} \cdot \mathrm{div}(\gamma \mathbf{1}, \mathbf{a}) +
+        \mathrm{reg_m} \cdot \mathrm{div}(\gamma^T \mathbf{1}, \mathbf{b})
+
+        s.t.
+             \gamma \geq 0
+
+    where:
+
+    - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
+    - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target
+      unbalanced distributions
+    - :math:`\mathrm{div}` is a divergence, either Kullback-Leibler or :math:`\ell_2` divergence
+
+    The algorithm used for solving the problem is a maximization-
+    minimization algorithm as proposed in :ref:`[41] <references-regpath>`
+
+    Parameters
+    ----------
+    a : array-like (dim_a,)
+        Unnormalized histogram of dimension `dim_a`
+    b : array-like (dim_b,)
+        Unnormalized histogram of dimension `dim_b`
+    M : array-like (dim_a, dim_b)
+        loss matrix
+    reg_m: float
+        Marginal relaxation term > 0
+    div: string, optional
+        Divergence to quantify the difference between the marginals.
+        Can take two values: 'kl' (Kullback-Leibler) or 'l2' (quadratic)
+    G0: array-like (dim_a, dim_b)
+        Initialization of the transport matrix
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshold on error (> 0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+    Returns
+    -------
+    ot_distance : array-like
+        the OT distance between :math:`\mathbf{a}` and :math:`\mathbf{b}`
+    log : dict
+        log dictionary returned only if `log` is `True`
+
+    Examples
+    --------
+    >>> import ot
+    >>> import numpy as np
+    >>> a=[.5, .5]
+    >>> b=[.5, .5]
+    >>> M=[[1., 36.],[9., 4.]]
+    >>> np.round(ot.unbalanced.mm_unbalanced2(a, b, M, 1, 'l2'),2)
+    0.25
+    >>> np.round(ot.unbalanced.mm_unbalanced2(a, b, M, 1, 'kl'),2)
+    0.57
+
+    References
+    ----------
+    .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021).
+        Unbalanced optimal transport through non-negative penalized
+        linear regression. NeurIPS.
+    See Also
+    --------
+    ot.lp.emd2 : Unregularized OT loss
+    ot.unbalanced.sinkhorn_unbalanced2 : Entropic regularized OT loss
+    """
+    _, log_mm = mm_unbalanced(a, b, M, reg_m, div=div, G0=G0,
+                              numItermax=numItermax, stopThr=stopThr,
+                              verbose=verbose, log=True)
+
+    if log:
+        return log_mm['cost'], log_mm
+    else:
+        return log_mm['cost']
diff --git a/test/test_unbalanced.py b/test/test_unbalanced.py
index db59504..02b3fc3 100644
--- a/test/test_unbalanced.py
+++ b/test/test_unbalanced.py
@@ -1,6 +1,7 @@
 """Tests for module Unbalanced OT with entropy regularization"""
 
 # Author: Hicham Janati <hicham.janati@inria.fr>
+#         Laetitia Chapel <laetitia.chapel@univ-ubs.fr>
 #
 # License: MIT License
 
@@ -286,3 +287,52 @@ def test_implemented_methods(nx):
                                                method=method)
             barycenter_unbalanced(A, M, reg=epsilon, reg_m=reg_m,
                                   method=method)
+
+
+def test_mm_convergence(nx):
+    n = 100
+    rng = np.random.RandomState(42)
+    x = rng.randn(n, 2)
+    rng = np.random.RandomState(75)
+    y = rng.randn(n, 2)
+    a = ot.utils.unif(n)
+    b = ot.utils.unif(n)
+
+    M = ot.dist(x, y)
+    M = M / M.max()
+    reg_m = 100
+    a, b, M = nx.from_numpy(a, b, M)
+
+    G_kl, _ = ot.unbalanced.mm_unbalanced(a, b, M, reg_m=reg_m, div='kl',
+                                          verbose=True, log=True)
+    loss_kl = nx.to_numpy(ot.unbalanced.mm_unbalanced2(
+                          a, b, M, reg_m, div='kl', verbose=True))
+    G_l2, _ = ot.unbalanced.mm_unbalanced(a, b, M, reg_m=reg_m, div='l2',
+                                          verbose=False, log=True)
+
+    # check if the marginals come close to the true ones when large reg
+    np.testing.assert_allclose(np.sum(nx.to_numpy(G_kl), 1), a, atol=1e-03)
+    np.testing.assert_allclose(np.sum(nx.to_numpy(G_kl), 0), b, atol=1e-03)
+    np.testing.assert_allclose(np.sum(nx.to_numpy(G_l2), 1), a, atol=1e-03)
+    np.testing.assert_allclose(np.sum(nx.to_numpy(G_l2), 0), b, atol=1e-03)
+
+    # check if mm_unbalanced2 returns the correct loss
+    np.testing.assert_allclose(nx.to_numpy(nx.sum(G_kl * M)), loss_kl,
+                               atol=1e-5)
+
+    # check in case no histogram is provided
+    a_np, b_np = np.array([]), np.array([])
+    a, b = nx.from_numpy(a_np, b_np)
+
+    G_kl_null = ot.unbalanced.mm_unbalanced(a, b, M, reg_m=reg_m, div='kl')
+    G_l2_null = ot.unbalanced.mm_unbalanced(a, b, M, reg_m=reg_m, div='l2')
+    np.testing.assert_allclose(G_kl_null, G_kl)
+    np.testing.assert_allclose(G_l2_null, G_l2)
+
+    # test when G0 is given
+    G0 = ot.emd(a, b, M)
+    reg_m = 10000
+    G_kl = ot.unbalanced.mm_unbalanced(a, b, M, reg_m=reg_m, div='kl', G0=G0)
+    G_l2 = ot.unbalanced.mm_unbalanced(a, b, M, reg_m=reg_m, div='l2', G0=G0)
+    np.testing.assert_allclose(G0, G_kl, atol=1e-05)
+    np.testing.assert_allclose(G0, G_l2, atol=1e-05)
-- 
cgit v1.2.3


From eccb1386eea52b94b82456d126bd20cbe3198e05 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Thu, 21 Apr 2022 16:34:01 +0200
Subject: [MRG] Release 8.2 (#365)

* release text and number

* add examples in release fil build wheels

* switch gallery to release

* add much needed contributors file

* debug circleci

* une line of logos

* working logo

* back to stable sphinx galery
---
 .circleci/config.yml                     |   5 ++-
 CONTRIBUTORS.md                          |  52 ++++++++++++++++++++++++++++
 README.md                                |  35 ++++---------------
 RELEASES.md                              |  57 ++++++++++++++++++++++++++-----
 docs/source/_static/images/logo_3ia.jpg  | Bin 0 -> 25029 bytes
 docs/source/_static/images/logo_anr.jpg  | Bin 0 -> 23493 bytes
 docs/source/_static/images/logo_cnrs.jpg | Bin 0 -> 6918 bytes
 docs/source/contributors.rst             |   6 ++++
 docs/source/index.rst                    |   1 +
 docs/source/releases.rst                 |   2 +-
 examples/others/plot_logo.py             |  10 +++---
 ot/__init__.py                           |   2 +-
 12 files changed, 122 insertions(+), 48 deletions(-)
 create mode 100644 CONTRIBUTORS.md
 create mode 100644 docs/source/_static/images/logo_3ia.jpg
 create mode 100644 docs/source/_static/images/logo_anr.jpg
 create mode 100644 docs/source/_static/images/logo_cnrs.jpg
 create mode 100644 docs/source/contributors.rst

(limited to 'docs/source')

diff --git a/.circleci/config.yml b/.circleci/config.yml
index 77ab45c..7e15a65 100644
--- a/.circleci/config.yml
+++ b/.circleci/config.yml
@@ -48,9 +48,8 @@ jobs:
               python -m pip install --user -e .
               python -m pip install --user --upgrade --no-cache-dir --progress-bar off -r requirements.txt 
               python -m pip install --user --upgrade --progress-bar off -r docs/requirements.txt
-              python -m pip install --user --upgrade --progress-bar off ipython "https://api.github.com/repos/sphinx-gallery/sphinx-gallery/zipball/master" memory_profiler
-              
-
+              python -m pip install --user --upgrade --progress-bar off ipython sphinx-gallery memory_profiler              
+          # python -m pip install --user --upgrade --progress-bar off ipython "https://api.github.com/repos/sphinx-gallery/sphinx-gallery/zipball/master" memory_profiler
         - save_cache:
             key: pip-cache
             paths:
diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md
new file mode 100644
index 0000000..ab64fba
--- /dev/null
+++ b/CONTRIBUTORS.md
@@ -0,0 +1,52 @@
+
+
+## Creators and Maintainers 
+
+This toolbox has been created and is maintained by:
+
+* [Rémi Flamary](http://remi.flamary.com/)
+* [Nicolas Courty](http://people.irisa.fr/Nicolas.Courty/)
+
+## Contributors
+
+The contributors to this library are:
+
+* [Rémi Flamary](http://remi.flamary.com/) (EMD wrapper, Pytorch backend, DA
+  classes, conditional gradients, WDA, weak OT, linear OT mapping, documentation)
+* [Nicolas Courty](http://people.irisa.fr/Nicolas.Courty/) (Original sinkhorn,
+  Wasserstein barycenters and convolutional barycenters, 1D wasserstein)
+* [Alexandre Gramfort](http://alexandre.gramfort.net/) (CI, documentation)
+* [Laetitia Chapel](http://people.irisa.fr/Laetitia.Chapel/) (Partial OT,
+  Unbalanced OT non-regularized)
+* [Michael Perrot](http://perso.univ-st-etienne.fr/pem82055/) (Mapping estimation)
+* [Léo Gautheron](https://github.com/aje) (Initial GPU implementation)
+* [Nathalie Gayraud](https://www.linkedin.com/in/nathalie-t-h-gayraud/?ppe=1) (DA classes)
+* [Stanislas Chambon](https://slasnista.github.io/) (DA classes)
+* [Antoine Rolet](https://arolet.github.io/) (EMD solver debug)
+* Erwan Vautier (Gromov-Wasserstein)
+* [Kilian Fatras](https://kilianfatras.github.io/) (Stochastic solvers,
+  empirical sinkhorn)
+* [Alain Rakotomamonjy](https://sites.google.com/site/alainrakotomamonjy/home) (Greenkhorn)
+* [Vayer Titouan](https://tvayer.github.io/) (Gromov-Wasserstein, Fused-Gromov-Wasserstein)
+* [Hicham Janati](https://hichamjanati.github.io/) (Unbalanced OT, Debiased barycenters)
+* [Romain Tavenard](https://rtavenar.github.io/) (1D Wasserstein)
+* [Mokhtar Z. Alaya](http://mzalaya.github.io/) (Screenkhorn)
+* [Ievgen Redko](https://ievred.github.io/) (Laplacian DA, JCPOT)
+* [Adrien Corenflos](https://adriencorenflos.github.io/) (Sliced Wasserstein Distance)
+* [Tanguy Kerdoncuff](https://hv0nnus.github.io/) (Sampled Gromov Wasserstein)
+* [Minhui Huang](https://mhhuang95.github.io) (Projection Robust Wasserstein Distance)
+* [Nathan Cassereau](https://github.com/ncassereau-idris) (Backends)
+* [Cédric Vincent-Cuaz](https://github.com/cedricvincentcuaz) (Graph Dictionary Learning)
+
+## Acknowledgments
+
+This toolbox benefit a lot from open source research and we would like to thank the following persons for providing some code (in various languages):
+
+* [Gabriel Peyré](http://gpeyre.github.io/) (Wasserstein Barycenters in Matlab)
+* [Mathieu Blondel](https://mblondel.org/) (original implementation smooth OT)
+* [Nicolas Bonneel](http://liris.cnrs.fr/~nbonneel/) (C++ code for EMD)
+* [Marco Cuturi](http://marcocuturi.net/) (Sinkhorn Knopp in Matlab/Cuda)
+
+POT has benefited from the financing or manpower from the following partners:
+
+<img src="https://pythonot.github.io/master/_static/images/logo_anr.jpg" alt="ANR" style="height:60px;"/><img src="https://pythonot.github.io/master/_static/images/logo_cnrs.jpg" alt="CNRS" style="height:60px;"/><img src="https://pythonot.github.io/master/_static/images/logo_3ia.jpg" alt="3IA" style="height:60px;"/>
\ No newline at end of file
diff --git a/README.md b/README.md
index 1b50aeb..e2b33d9 100644
--- a/README.md
+++ b/README.md
@@ -180,35 +180,12 @@ This toolbox has been created and is maintained by
 * [Rémi Flamary](http://remi.flamary.com/)
 * [Nicolas Courty](http://people.irisa.fr/Nicolas.Courty/)
 
-The contributors to this library are 
-
-* [Alexandre Gramfort](http://alexandre.gramfort.net/) (CI, documentation)
-* [Laetitia Chapel](http://people.irisa.fr/Laetitia.Chapel/) (Partial OT)
-* [Michael Perrot](http://perso.univ-st-etienne.fr/pem82055/) (Mapping estimation)
-* [Léo Gautheron](https://github.com/aje) (Initial GPU implementation)
-* [Nathalie Gayraud](https://www.linkedin.com/in/nathalie-t-h-gayraud/?ppe=1) (DA classes)
-* [Stanislas Chambon](https://slasnista.github.io/) (DA classes)
-* [Antoine Rolet](https://arolet.github.io/) (EMD solver debug)
-* Erwan Vautier (Gromov-Wasserstein)
-* [Kilian Fatras](https://kilianfatras.github.io/) (Stochastic solvers)
-* [Alain Rakotomamonjy](https://sites.google.com/site/alainrakotomamonjy/home)
-* [Vayer Titouan](https://tvayer.github.io/) (Gromov-Wasserstein -, Fused-Gromov-Wasserstein)
-* [Hicham Janati](https://hichamjanati.github.io/) (Unbalanced OT, Debiased barycenters)
-* [Romain Tavenard](https://rtavenar.github.io/) (1d Wasserstein)
-* [Mokhtar Z. Alaya](http://mzalaya.github.io/) (Screenkhorn)
-* [Ievgen Redko](https://ievred.github.io/) (Laplacian DA, JCPOT)
-* [Adrien Corenflos](https://adriencorenflos.github.io/) (Sliced Wasserstein Distance)
-* [Tanguy Kerdoncuff](https://hv0nnus.github.io/) (Sampled Gromov Wasserstein)
-* [Minhui Huang](https://mhhuang95.github.io) (Projection Robust Wasserstein Distance)
-* [Nathan Cassereau](https://github.com/ncassereau-idris) (Backends)
-* [Cédric Vincent-Cuaz](https://github.com/cedricvincentcuaz) (Graph Dictionary Learning)
-
-This toolbox benefit a lot from open source research and we would like to thank the following persons for providing some code (in various languages):
-
-* [Gabriel Peyré](http://gpeyre.github.io/) (Wasserstein Barycenters in Matlab)
-* [Mathieu Blondel](https://mblondel.org/) (original implementation smooth OT)
-* [Nicolas Bonneel](http://liris.cnrs.fr/~nbonneel/) (C++ code for EMD)
-* [Marco Cuturi](http://marcocuturi.net/) (Sinkhorn Knopp in Matlab/Cuda)
+The numerous contributors to this library are listed [here](CONTRIBUTORS.md).
+
+POT has benefited from the financing or manpower from the following partners:
+
+<img src="https://pythonot.github.io/master/_static/images/logo_anr.jpg" alt="ANR" style="height:60px;"/><img src="https://pythonot.github.io/master/_static/images/logo_cnrs.jpg" alt="CNRS" style="height:60px;"/><img src="https://pythonot.github.io/master/_static/images/logo_3ia.jpg" alt="3IA" style="height:60px;"/>
+
 
 ## Contributions and code of conduct
 
diff --git a/RELEASES.md b/RELEASES.md
index 33d1ab6..be2192e 100644
--- a/RELEASES.md
+++ b/RELEASES.md
@@ -1,22 +1,61 @@
 # Releases
 
 
-## 0.8.2dev Development
+## 0.8.2
+
+This releases introduces several new notable features. The less important
+but most exiting one being that we now have a logo for the toolbox (color
+and dark background) :
+
+![](https://pythonot.github.io/master/_images/logo.svg)![](https://pythonot.github.io/master/_static/logo_dark.svg)
+
+This logo is generated using with matplotlib and using the solution of an OT
+problem provided by POT (with `ot.emd`). Generating the logo can be done with a
+simple python script also provided in the [documentation gallery](https://pythonot.github.io/auto_examples/others/plot_logo.html#sphx-glr-auto-examples-others-plot-logo-py).
+
+New OT solvers include [Weak
+OT](https://pythonot.github.io/gen_modules/ot.weak.html#ot.weak.weak_optimal_transport)
+ and [OT with factored
+coupling](https://pythonot.github.io/gen_modules/ot.factored.html#ot.factored.factored_optimal_transport)
+that can be used on large datasets. The [Majorization Minimization](https://pythonot.github.io/gen_modules/ot.unbalanced.html?highlight=mm_#ot.unbalanced.mm_unbalanced) solvers for
+non-regularized Unbalanced OT are now also available. We also now provide an
+implementation of [GW and FGW unmixing](https://pythonot.github.io/gen_modules/ot.gromov.html#ot.gromov.gromov_wasserstein_linear_unmixing) and [dictionary learning](https://pythonot.github.io/gen_modules/ot.gromov.html#ot.gromov.gromov_wasserstein_dictionary_learning). It is now
+possible to use autodiff to solve entropic an quadratic regularized OT in the
+dual for full or stochastic optimization thanks to the new functions to compute
+the dual loss for [entropic](https://pythonot.github.io/gen_modules/ot.stochastic.html#ot.stochastic.loss_dual_entropic) and [quadratic](https://pythonot.github.io/gen_modules/ot.stochastic.html#ot.stochastic.loss_dual_quadratic) regularized OT and reconstruct the [OT
+plan](https://pythonot.github.io/gen_modules/ot.stochastic.html#ot.stochastic.plan_dual_entropic) on part or all of the data. They can be used for instance to solve OT
+problems with stochastic gradient or for estimating the [dual potentials as
+neural networks](https://pythonot.github.io/auto_examples/backends/plot_stoch_continuous_ot_pytorch.html#sphx-glr-auto-examples-backends-plot-stoch-continuous-ot-pytorch-py).
+
+On the backend front, we now have backend compatible functions and classes in
+the domain adaptation [`ot.da`](https://pythonot.github.io/gen_modules/ot.da.html#module-ot.da) and unbalanced OT [`ot.unbalanced`](https://pythonot.github.io/gen_modules/ot.unbalanced.html) modules. This
+means that the DA classes can be used on tensors from all compatible backends.
+The [free support Wasserstein barycenter](https://pythonot.github.io/gen_modules/ot.lp.html?highlight=free%20support#ot.lp.free_support_barycenter) solver is now also backend compatible.
+
+Finally we have worked on the documentation to provide an update of existing
+examples in the gallery and and several new examples including [GW dictionary
+learning](https://pythonot.github.io/auto_examples/gromov/plot_gromov_wasserstein_dictionary_learning.html#sphx-glr-auto-examples-gromov-plot-gromov-wasserstein-dictionary-learning-py)
+[weak Optimal
+Transport](https://pythonot.github.io/auto_examples/others/plot_WeakOT_VS_OT.html#sphx-glr-auto-examples-others-plot-weakot-vs-ot-py),
+[NN based dual potentials
+estimation](https://pythonot.github.io/auto_examples/backends/plot_stoch_continuous_ot_pytorch.html#sphx-glr-auto-examples-backends-plot-stoch-continuous-ot-pytorch-py)
+and [Factored coupling OT](https://pythonot.github.io/auto_examples/others/plot_factored_coupling.html#sphx-glr-auto-examples-others-plot-factored-coupling-py).
+.
 
 #### New features
 
 - Remove deprecated `ot.gpu` submodule (PR #361)
-- Update examples in the gallery (PR #359).
+- Update examples in the gallery (PR #359)
 - Add stochastic loss and OT plan computation for regularized OT and 
-  backend examples(PR #360).
-- Implementation of factored OT with emd and sinkhorn (PR #358).
+  backend examples(PR #360)
+- Implementation of factored OT with emd and sinkhorn (PR #358)
 - A brand new logo for POT (PR #357)
-- Better list of related examples in quick start guide with `minigallery` (PR #334).
+- Better list of related examples in quick start guide with `minigallery` (PR #334)
 - Add optional log-domain Sinkhorn implementation in WDA to support smaller values
-  of the regularization parameter (PR #336).
-- Backend implementation for `ot.lp.free_support_barycenter` (PR #340).
-- Add weak OT solver + example  (PR #341).
-- Add backend support for Domain Adaptation and Unbalanced solvers (PR #343).
+  of the regularization parameter (PR #336)
+- Backend implementation for `ot.lp.free_support_barycenter` (PR #340)
+- Add weak OT solver + example  (PR #341)
+- Add backend support for Domain Adaptation and Unbalanced solvers (PR #343)
 - Add (F)GW linear dictionary learning solvers + example  (PR #319)
 - Add links to related PR and Issues in the doc release page (PR #350)
 - Add new minimization-maximization algorithms for solving exact Unbalanced OT + example (PR #362)
diff --git a/docs/source/_static/images/logo_3ia.jpg b/docs/source/_static/images/logo_3ia.jpg
new file mode 100644
index 0000000..ecc56b2
Binary files /dev/null and b/docs/source/_static/images/logo_3ia.jpg differ
diff --git a/docs/source/_static/images/logo_anr.jpg b/docs/source/_static/images/logo_anr.jpg
new file mode 100644
index 0000000..dcef212
Binary files /dev/null and b/docs/source/_static/images/logo_anr.jpg differ
diff --git a/docs/source/_static/images/logo_cnrs.jpg b/docs/source/_static/images/logo_cnrs.jpg
new file mode 100644
index 0000000..902cf6f
Binary files /dev/null and b/docs/source/_static/images/logo_cnrs.jpg differ
diff --git a/docs/source/contributors.rst b/docs/source/contributors.rst
new file mode 100644
index 0000000..f0acea6
--- /dev/null
+++ b/docs/source/contributors.rst
@@ -0,0 +1,6 @@
+Contributors
+============
+
+.. include:: ../../CONTRIBUTORS.md
+    :parser: myst_parser.sphinx_
+    :start-line: 2
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 7ff7d22..3d53ef4 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -22,6 +22,7 @@ Contents
    auto_examples/index
    releases
    .github/CONTRIBUTING
+   contributors
    .github/CODE_OF_CONDUCT
 
 
diff --git a/docs/source/releases.rst b/docs/source/releases.rst
index 8250a4d..b2c7a44 100644
--- a/docs/source/releases.rst
+++ b/docs/source/releases.rst
@@ -3,4 +3,4 @@ Releases
 
 .. include:: ../../RELEASES.md
     :parser: myst_parser.sphinx_
-    :start-line: 3
+    :start-line: 2
diff --git a/examples/others/plot_logo.py b/examples/others/plot_logo.py
index 9414371..bb4f640 100644
--- a/examples/others/plot_logo.py
+++ b/examples/others/plot_logo.py
@@ -18,7 +18,7 @@ matplotlib and ploting teh solution of the EMD solver from POT.
 
 # sphinx_gallery_thumbnail_number = 1
 
-# %%
+# %% Load modules
 import numpy as np
 import matplotlib.pyplot as pl
 import ot
@@ -36,21 +36,21 @@ p2 = np.array([[1.5, 6], [2, 4], [2, 5], [1.5, 3], [0.5, 2], [.5, 1], ])
 o1 = np.array([[0, 6.], [-1, 5], [-1.5, 4], [-1.5, 3], [-1, 2], [0, 1], ])
 o2 = np.array([[1, 6.], [2, 5], [2.5, 4], [2.5, 3], [2, 2], [1, 1], ])
 
-# scaling and translation for letter O
+# Scaling and translation for letter O
 o1[:, 0] += 6.4
 o2[:, 0] += 6.4
 o1[:, 0] *= 0.6
 o2[:, 0] *= 0.6
 
-# letter T
+# Letter T
 t1 = np.array([[-1, 6.], [-1, 5], [0, 4], [0, 3], [0, 2], [0, 1], ])
 t2 = np.array([[1.5, 6.], [1.5, 5], [0.5, 4], [0.5, 3], [0.5, 2], [0.5, 1], ])
 
-# translatin the T
+# Translating the T
 t1[:, 0] += 7.1
 t2[:, 0] += 7.1
 
-# Cocatenate all letters
+# Concatenate all letters
 x1 = np.concatenate((p1, o1, t1), axis=0)
 x2 = np.concatenate((p2, o2, t2), axis=0)
 
diff --git a/ot/__init__.py b/ot/__init__.py
index c5e1967..86ed94e 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -51,7 +51,7 @@ from .factored import factored_optimal_transport
 # utils functions
 from .utils import dist, unif, tic, toc, toq
 
-__version__ = "0.8.2dev"
+__version__ = "0.8.2"
 
 __all__ = ['emd', 'emd2', 'emd_1d', 'sinkhorn', 'sinkhorn2', 'utils',
            'datasets', 'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov',
-- 
cgit v1.2.3