From 0fc6938dc15e8888b0a73fa4b6a421f39f0e0697 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Thu, 6 Jun 2019 12:09:34 +0200
Subject: update conf + readme

---
 docs/source/conf.py    | 14 +++++++++++---
 docs/source/readme.rst | 30 +++++++++++++++++++-----------
 2 files changed, 30 insertions(+), 14 deletions(-)

(limited to 'docs')

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 433eca6..d29b829 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -15,7 +15,10 @@
 import sys
 import os
 import re
-import sphinx_gallery
+try:
+    import sphinx_gallery
+except ImportError:
+    print("warning sphinx-gallery not installed")
 
 # !!!! allow readthedoc compilation
 try:
@@ -65,6 +68,8 @@ extensions = [
     #'sphinx_gallery.gen_gallery',
 ]
 
+napoleon_numpy_docstring = True
+
 # Add any paths that contain templates here, relative to this directory.
 templates_path = ['_templates']
 
@@ -81,7 +86,7 @@ master_doc = 'index'
 
 # General information about the project.
 project = u'POT Python Optimal Transport'
-copyright = u'2016-2018, Rémi Flamary, Nicolas Courty'
+copyright = u'2016-2019, Rémi Flamary, Nicolas Courty'
 author = u'Rémi Flamary, Nicolas Courty'
 
 # The version info for the project you're documenting, acts as replacement for
@@ -323,7 +328,10 @@ texinfo_documents = [
 
 
 # Example configuration for intersphinx: refer to the Python standard library.
-intersphinx_mapping = {'https://docs.python.org/': None}
+intersphinx_mapping = {'python': ('https://docs.python.org/3', None),
+                       'numpy': ('http://docs.scipy.org/doc/numpy/', None),
+                       'scipy': ('http://docs.scipy.org/doc/scipy/reference/', None),
+                       'matplotlib': ('http://matplotlib.sourceforge.net/', None)}
 
 sphinx_gallery_conf = {
     'examples_dirs': ['../../examples','../../examples/da'],
diff --git a/docs/source/readme.rst b/docs/source/readme.rst
index e7c2bd1..d1063e8 100644
--- a/docs/source/readme.rst
+++ b/docs/source/readme.rst
@@ -12,9 +12,11 @@ It provides the following solvers:
 
 -  OT Network Flow solver for the linear program/ Earth Movers Distance
    [1].
--  Entropic regularization OT solver with Sinkhorn Knopp Algorithm [2]
-   and stabilized version [9][10] and greedy SInkhorn [22] with optional
-   GPU implementation (requires cudamat).
+-  Entropic regularization OT solver with Sinkhorn Knopp Algorithm [2],
+   stabilized version [9][10] and greedy Sinkhorn [22] with optional GPU
+   implementation (requires cupy).
+-  Sinkhorn divergence [23] and entropic regularization OT from
+   empirical data.
 -  Smooth optimal transport solvers (dual and semi-dual) for KL and
    squared L2 regularizations [17].
 -  Non regularized Wasserstein barycenters [16] with LP solver (only
@@ -115,14 +117,9 @@ below
 
        pip install pymanopt autograd
 
--  **ot.gpu** (GPU accelerated OT) depends on cudamat that have to be
-   installed with:
-
-   ::
-
-       git clone https://github.com/cudamat/cudamat.git
-       cd cudamat
-       python setup.py install --user # for user install (no root)
+-  **ot.gpu** (GPU accelerated OT) depends on cupy that have to be
+   installed following instructions on `this
+   page <https://docs-cupy.chainer.org/en/stable/install.html>`__.
 
 obviously you need CUDA installed and a compatible GPU.
 
@@ -226,6 +223,7 @@ The contributors to this library are:
 -  `Kilian Fatras <https://kilianfatras.github.io/>`__
 -  `Alain
    Rakotomamonjy <https://sites.google.com/site/alainrakotomamonjy/home>`__
+-  `Vayer Titouan <https://tvayer.github.io/>`__
 
 This toolbox benefit a lot from open source research and we would like
 to thank the following persons for providing some code (in various
@@ -366,6 +364,16 @@ approximation algorithms for optimal transport via Sinkhorn
 iteration <https://papers.nips.cc/paper/6792-near-linear-time-approximation-algorithms-for-optimal-transport-via-sinkhorn-iteration.pdf>`__,
 Advances in Neural Information Processing Systems (NIPS) 31
 
+[23] Aude, G., Peyré, G., Cuturi, M., `Learning Generative Models with
+Sinkhorn Divergences <https://arxiv.org/abs/1706.00292>`__, Proceedings
+of the Twenty-First International Conference on Artficial Intelligence
+and Statistics, (AISTATS) 21, 2018
+
+[24] Vayer, T., Chapel, L., Flamary, R., Tavenard, R. and Courty, N.
+(2019). `Optimal Transport for structured data with application on
+graphs <http://proceedings.mlr.press/v97/titouan19a.html>`__ Proceedings
+of the 36th International Conference on Machine Learning (ICML).
+
 .. |PyPI version| image:: https://badge.fury.io/py/POT.svg
    :target: https://badge.fury.io/py/POT
 .. |Anaconda Cloud| image:: https://anaconda.org/conda-forge/pot/badges/version.svg
-- 
cgit v1.2.3


From 4ae7e98b255bef3522e76b2c321b6938378d8dc7 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Fri, 7 Jun 2019 19:11:21 +0200
Subject: debut doc

---
 docs/source/howto.rst | 25 +++++++++++++++++++++++++
 docs/source/index.rst |  1 +
 2 files changed, 26 insertions(+)
 create mode 100644 docs/source/howto.rst

(limited to 'docs')

diff --git a/docs/source/howto.rst b/docs/source/howto.rst
new file mode 100644
index 0000000..48b1532
--- /dev/null
+++ b/docs/source/howto.rst
@@ -0,0 +1,25 @@
+
+How to ?
+========
+
+In the following we provide some pointers about which functions and classes 
+to use for different problems related to optimal transport (OTs).
+
+1. **How to solve a discrete optimal transport problem ?**
+
+    The solver for discrete  is the function :py:mod:`ot.emd` that returns
+    the OT transport matrix. If you want to solve a regularized OT you can 
+    use :py:mod:`ot.sinkhorn`.
+
+    More detailed examples can be seen on this :ref:`auto_examples/plot_OT_2D_samples`
+
+    Here is a simple use case:
+
+   .. code:: python
+
+       # a,b are 1D histograms (sum to 1 and positive)
+       # M is the ground cost matrix
+       T=ot.emd(a,b,M) # exact linear program
+       T_reg=ot.sinkhorn(a,b,M,reg) # entropic regularized OT
+
+
diff --git a/docs/source/index.rst b/docs/source/index.rst
index b8eabcb..d92f50f 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -13,6 +13,7 @@ Contents
    :maxdepth: 3
 
    self
+   howto
    all
    auto_examples/index
 
-- 
cgit v1.2.3


From 4e2f6b45662fe206414652ccc8f715c420f3b9cd Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Mon, 24 Jun 2019 17:13:33 +0200
Subject: first shot part OT Wass

---
 docs/source/howto.rst      |  25 ----------
 docs/source/index.rst      |   2 +-
 docs/source/quickstart.rst | 119 +++++++++++++++++++++++++++++++++++++++++++++
 docs/source/readme.rst     |   7 ++-
 4 files changed, 126 insertions(+), 27 deletions(-)
 delete mode 100644 docs/source/howto.rst
 create mode 100644 docs/source/quickstart.rst

(limited to 'docs')

diff --git a/docs/source/howto.rst b/docs/source/howto.rst
deleted file mode 100644
index 48b1532..0000000
--- a/docs/source/howto.rst
+++ /dev/null
@@ -1,25 +0,0 @@
-
-How to ?
-========
-
-In the following we provide some pointers about which functions and classes 
-to use for different problems related to optimal transport (OTs).
-
-1. **How to solve a discrete optimal transport problem ?**
-
-    The solver for discrete  is the function :py:mod:`ot.emd` that returns
-    the OT transport matrix. If you want to solve a regularized OT you can 
-    use :py:mod:`ot.sinkhorn`.
-
-    More detailed examples can be seen on this :ref:`auto_examples/plot_OT_2D_samples`
-
-    Here is a simple use case:
-
-   .. code:: python
-
-       # a,b are 1D histograms (sum to 1 and positive)
-       # M is the ground cost matrix
-       T=ot.emd(a,b,M) # exact linear program
-       T_reg=ot.sinkhorn(a,b,M,reg) # entropic regularized OT
-
-
diff --git a/docs/source/index.rst b/docs/source/index.rst
index d92f50f..03943e8 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -13,7 +13,7 @@ Contents
    :maxdepth: 3
 
    self
-   howto
+   quickstart
    all
    auto_examples/index
 
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
new file mode 100644
index 0000000..3d3ce98
--- /dev/null
+++ b/docs/source/quickstart.rst
@@ -0,0 +1,119 @@
+
+Quick start
+===========
+
+
+
+In the following we provide some pointers about which functions and classes 
+to use for different problems related to optimal transport (OT).
+
+
+Optimal transport and Wasserstein distance
+------------------------------------------
+
+The optimal transport problem between discrete distributions is often expressed
+as
+    .. math::
+        \gamma^* = arg\min_\gamma \sum_{i,j}\gamma_{i,j}M_{i,j}
+
+        s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
+
+where :
+
+- :math:`M\in\mathbb{R}_+^{m\times n}` is the metric cost matrix defining the cost to move mass from bin :math:`a_i` to bin :math:`b_j`.
+- :math:`a` and :math:`b` are histograms (positive, sum to 1) that represent the weights of each samples in the source an target distributions.
+
+Solving the linear program above can be done using the function :any:`ot.emd`
+that will return the optimal transport matrix :math:`\gamma^*`:
+
+.. code:: python
+
+    # a,b are 1D histograms (sum to 1 and positive)
+    # M is the ground cost matrix
+    T=ot.emd(a,b,M) # exact linear program
+
+.. hint::
+    Examples of use for :any:`ot.emd` are available in the following examples:
+
+    - :any:`auto_examples/plot_OT_2D_samples`
+    - :any:`auto_examples/plot_OT_1D` 
+    - :any:`auto_examples/plot_OT_L1_vs_L2` 
+
+
+The value of the OT solution is often more of interest that the OT matrix :
+
+    .. math::
+        W(a,b)=\min_\gamma \sum_{i,j}\gamma_{i,j}M_{i,j}
+
+        s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
+
+
+where :math:`W(a,b)` is the  `Wasserstein distance
+<https://en.wikipedia.org/wiki/Wasserstein_metric>`_ between distributions a and b
+It is a metrix that has nice statistical
+properties. It can computed from an already estimated OT matrix with
+:code:`np.sum(T*M)` or directly with the function :any:`ot.emd2`.
+
+.. code:: python
+
+    # a,b are 1D histograms (sum to 1 and positive)
+    # M is the ground cost matrix
+    W=ot.emd2(a,b,M) # Wasserstein distance / EMD value
+
+.. note::
+    In POT, most functions that solve OT or regularized OT problems have two
+    versions that return the OT matrix or the value of the optimal solution. Fir
+    instance :any:`ot.emd` return the OT matrix and :any:`ot.emd2` return the
+    Wassertsein distance.
+
+
+Regularized Optimal Transport
+-----------------------------
+
+Wasserstein Barycenters
+-----------------------
+
+Monge mapping and Domain adaptation with Optimal transport
+----------------------------------------
+
+
+Other applications
+------------------
+
+
+GPU acceleration
+----------------
+
+
+
+How to?
+-------
+
+
+
+1. **How to solve a discrete optimal transport problem ?**
+
+    The solver for discrete  is the function :py:mod:`ot.emd` that returns
+    the OT transport matrix. If you want to solve a regularized OT you can 
+    use :py:mod:`ot.sinkhorn`.
+
+    
+
+    Here is a simple use case:
+
+    .. code:: python
+
+       # a,b are 1D histograms (sum to 1 and positive)
+       # M is the ground cost matrix
+       T=ot.emd(a,b,M) # exact linear program
+       T_reg=ot.sinkhorn(a,b,M,reg) # entropic regularized OT
+
+    More detailed examples can be seen on this
+    :doc:`auto_examples/plot_OT_2D_samples`
+    
+
+2. **Compute a Wasserstein distance**
+
+
+
+
diff --git a/docs/source/readme.rst b/docs/source/readme.rst
index d1063e8..b7828d3 100644
--- a/docs/source/readme.rst
+++ b/docs/source/readme.rst
@@ -206,7 +206,12 @@ nbviewer <https://nbviewer.jupyter.org/github/rflamary/POT/tree/master/notebooks
 Acknowledgements
 ----------------
 
-The contributors to this library are:
+This toolbox has been created and is maintained by
+
+-  `Rémi Flamary <http://remi.flamary.com/>`__
+-  `Nicolas Courty <http://people.irisa.fr/Nicolas.Courty/>`__
+
+The contributors to this library are
 
 -  `Rémi Flamary <http://remi.flamary.com/>`__
 -  `Nicolas Courty <http://people.irisa.fr/Nicolas.Courty/>`__
-- 
cgit v1.2.3


From 7f0739f73fa6a8c7fa22269c727b48d3640627be Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Tue, 25 Jun 2019 07:41:47 +0200
Subject: first shot part OT Wass

---
 docs/source/quickstart.rst | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'docs')

diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index 3d3ce98..ac96f26 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -11,6 +11,10 @@ to use for different problems related to optimal transport (OT).
 Optimal transport and Wasserstein distance
 ------------------------------------------
 
+
+Solving optimal transport
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
 The optimal transport problem between discrete distributions is often expressed
 as
     .. math::
@@ -39,6 +43,8 @@ that will return the optimal transport matrix :math:`\gamma^*`:
     - :any:`auto_examples/plot_OT_1D` 
     - :any:`auto_examples/plot_OT_L1_vs_L2` 
 
+Computing Wasserstein distance
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 The value of the OT solution is often more of interest that the OT matrix :
 
@@ -60,6 +66,13 @@ properties. It can computed from an already estimated OT matrix with
     # M is the ground cost matrix
     W=ot.emd2(a,b,M) # Wasserstein distance / EMD value
 
+
+.. hint::
+    Examples of use for :any:`ot.emd2` are available in the following examples:
+
+    - :any:`auto_examples/plot_compute_emd`
+ 
+
 .. note::
     In POT, most functions that solve OT or regularized OT problems have two
     versions that return the OT matrix or the value of the optimal solution. Fir
-- 
cgit v1.2.3


From 1e0977fd346d91c837ef90dff8c75a65b182d021 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Tue, 25 Jun 2019 08:34:59 +0200
Subject: cleaunup gromov + stat guide

---
 docs/source/index.rst      |   2 +-
 docs/source/quickstart.rst | 156 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 142 insertions(+), 16 deletions(-)

(limited to 'docs')

diff --git a/docs/source/index.rst b/docs/source/index.rst
index 03943e8..9078d35 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -10,7 +10,7 @@ Contents
 --------
 
 .. toctree::
-   :maxdepth: 3
+   :maxdepth: 2
 
    self
    quickstart
diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index ac96f26..d8d4838 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -1,8 +1,6 @@
 
-Quick start
-===========
-
-
+Quick start guide
+=================
 
 In the following we provide some pointers about which functions and classes 
 to use for different problems related to optimal transport (OT).
@@ -11,6 +9,11 @@ to use for different problems related to optimal transport (OT).
 Optimal transport and Wasserstein distance
 ------------------------------------------
 
+.. note::
+    In POT, most functions that solve OT or regularized OT problems have two
+    versions that return the OT matrix or the value of the optimal solution. For
+    instance :any:`ot.emd` return the OT matrix and :any:`ot.emd2` return the
+    Wassertsein distance.
 
 Solving optimal transport
 ^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -36,6 +39,10 @@ that will return the optimal transport matrix :math:`\gamma^*`:
     # M is the ground cost matrix
     T=ot.emd(a,b,M) # exact linear program
 
+The method used for solving the OT problem is the network simplex, it is
+implemented in C from  [1]_. It has a complexity of :math:`O(n^3)` but the
+solver is quite efficient and uses sparsity of the solution.
+
 .. hint::
     Examples of use for :any:`ot.emd` are available in the following examples:
 
@@ -73,16 +80,19 @@ properties. It can computed from an already estimated OT matrix with
     - :any:`auto_examples/plot_compute_emd`
  
 
-.. note::
-    In POT, most functions that solve OT or regularized OT problems have two
-    versions that return the OT matrix or the value of the optimal solution. Fir
-    instance :any:`ot.emd` return the OT matrix and :any:`ot.emd2` return the
-    Wassertsein distance.
-
-
 Regularized Optimal Transport
 -----------------------------
 
+Entropic regularized OT
+^^^^^^^^^^^^^^^^^^^^^^^
+
+
+Other regularization
+^^^^^^^^^^^^^^^^^^^^
+
+Stochastic gradient decsent
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
 Wasserstein Barycenters
 -----------------------
 
@@ -99,8 +109,8 @@ GPU acceleration
 
 
-How to?
--------
+FAQ
+---
 
 
@@ -128,5 +138,121 @@ How to?
 2. **Compute a Wasserstein distance**
 
 
-
-
+References
+----------
+
+.. [1] Bonneel, N., Van De Panne, M., Paris, S., & Heidrich, W. (2011,
+    December). `Displacement  nterpolation using Lagrangian mass transport
+    <https://people.csail.mit.edu/sparis/publi/2011/sigasia/Bonneel_11_Displacement_Interpolation.pdf>`__.
+    In ACM Transactions on Graphics (TOG) (Vol. 30, No. 6, p. 158). ACM. 
+
+.. [2] Cuturi, M. (2013). `Sinkhorn distances: Lightspeed computation of
+    optimal transport <https://arxiv.org/pdf/1306.0895.pdf>`__. In Advances
+    in Neural Information Processing Systems (pp. 2292-2300).
+
+.. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G.
+    (2015). `Iterative Bregman projections for regularized transportation
+    problems <https://arxiv.org/pdf/1412.5154.pdf>`__. SIAM Journal on
+    Scientific Computing, 37(2), A1111-A1138.
+
+.. [4] S. Nakhostin, N. Courty, R. Flamary, D. Tuia, T. Corpetti,
+    `Supervised planetary unmixing with optimal
+    transport <https://hal.archives-ouvertes.fr/hal-01377236/document>`__,
+    Whorkshop on Hyperspectral Image and Signal Processing : Evolution in
+    Remote Sensing (WHISPERS), 2016.
+
+.. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, `Optimal Transport
+    for Domain Adaptation <https://arxiv.org/pdf/1507.00504.pdf>`__, in IEEE
+    Transactions on Pattern Analysis and Machine Intelligence , vol.PP,
+    no.99, pp.1-1
+
+.. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
+    `Regularized discrete optimal
+    transport <https://arxiv.org/pdf/1307.5551.pdf>`__. SIAM Journal on
+    Imaging Sciences, 7(3), 1853-1882.
+
+.. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). `Generalized
+    conditional gradient: analysis of convergence and
+    applications <https://arxiv.org/pdf/1510.06567.pdf>`__. arXiv preprint
+    arXiv:1510.06567.
+
+.. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard (2016), `Mapping
+    estimation for discrete optimal
+    transport <http://remi.flamary.com/biblio/perrot2016mapping.pdf>`__,
+    Neural Information Processing Systems (NIPS).
+
+.. [9] Schmitzer, B. (2016). `Stabilized Sparse Scaling Algorithms for
+    Entropy Regularized Transport
+    Problems <https://arxiv.org/pdf/1610.06519.pdf>`__. arXiv preprint
+    arXiv:1610.06519.
+
+.. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016).
+    `Scaling algorithms for unbalanced transport
+    problems <https://arxiv.org/pdf/1607.05816.pdf>`__. arXiv preprint
+    arXiv:1607.05816.
+
+.. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016).
+    `Wasserstein Discriminant
+    Analysis <https://arxiv.org/pdf/1608.08063.pdf>`__. arXiv preprint
+    arXiv:1608.08063.
+
+.. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon (2016),
+    `Gromov-Wasserstein averaging of kernel and distance
+    matrices <http://proceedings.mlr.press/v48/peyre16.html>`__
+    International Conference on Machine Learning (ICML).
+
+.. [13] Mémoli, Facundo (2011). `Gromov–Wasserstein distances and the
+    metric approach to object
+    matching <https://media.adelaide.edu.au/acvt/Publications/2011/2011-Gromov%E2%80%93Wasserstein%20Distances%20and%20the%20Metric%20Approach%20to%20Object%20Matching.pdf>`__.
+    Foundations of computational mathematics 11.4 : 417-487.
+
+.. [14] Knott, M. and Smith, C. S. (1984).`On the optimal mapping of
+    distributions <https://link.springer.com/article/10.1007/BF00934745>`__,
+    Journal of Optimization Theory and Applications Vol 43.
+
+.. [15] Peyré, G., & Cuturi, M. (2018). `Computational Optimal
+    Transport <https://arxiv.org/pdf/1803.00567.pdf>`__ .
+
+.. [16] Agueh, M., & Carlier, G. (2011). `Barycenters in the Wasserstein
+    space <https://hal.archives-ouvertes.fr/hal-00637399/document>`__. SIAM
+    Journal on Mathematical Analysis, 43(2), 904-924.
+
+.. [17] Blondel, M., Seguy, V., & Rolet, A. (2018). `Smooth and Sparse
+    Optimal Transport <https://arxiv.org/abs/1710.06276>`__. Proceedings of
+    the Twenty-First International Conference on Artificial Intelligence and
+    Statistics (AISTATS).
+
+.. [18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) `Stochastic
+    Optimization for Large-scale Optimal
+    Transport <https://arxiv.org/abs/1605.08527>`__. Advances in Neural
+    Information Processing Systems (2016).
+
+.. [19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet,
+    A.& Blondel, M. `Large-scale Optimal Transport and Mapping
+    Estimation <https://arxiv.org/pdf/1711.02283.pdf>`__. International
+    Conference on Learning Representation (2018)
+
+.. [20] Cuturi, M. and Doucet, A. (2014) `Fast Computation of Wasserstein
+    Barycenters <http://proceedings.mlr.press/v32/cuturi14.html>`__.
+    International Conference in Machine Learning
+
+.. [21] Solomon, J., De Goes, F., Peyré, G., Cuturi, M., Butscher, A.,
+    Nguyen, A. & Guibas, L. (2015). `Convolutional wasserstein distances:
+    Efficient optimal transportation on geometric
+    domains <https://dl.acm.org/citation.cfm?id=2766963>`__. ACM
+    Transactions on Graphics (TOG), 34(4), 66.
+
+.. [22] J. Altschuler, J.Weed, P. Rigollet, (2017) `Near-linear time
+    approximation algorithms for optimal transport via Sinkhorn
+    iteration <https://papers.nips.cc/paper/6792-near-linear-time-approximation-algorithms-for-optimal-transport-via-sinkhorn-iteration.pdf>`__,
+    Advances in Neural Information Processing Systems (NIPS) 31
+
+.. [23] Aude, G., Peyré, G., Cuturi, M., `Learning Generative Models with
+    Sinkhorn Divergences <https://arxiv.org/abs/1706.00292>`__, Proceedings
+    of the Twenty-First International Conference on Artficial Intelligence
+    and Statistics, (AISTATS) 21, 2018
+
+.. [24] Vayer, T., Chapel, L., Flamary, R., Tavenard, R. and Courty, N.
+    (2019). `Optimal Transport for structured data with application on
+    graphs <http://proceedings.mlr.press/v97/titouan19a.html>`__ Proceedings
+    of the 36th International Conference on Machine Learning (ICML).
\ No newline at end of file
-- 
cgit v1.2.3


From c4b0aeb20d920ba366a656a9aee7afe78871c9c7 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Tue, 25 Jun 2019 14:26:16 +0200
Subject: add fgw examples in doc

---
 docs/cache_nbrun                                   |   2 +-
 .../source/auto_examples/auto_examples_jupyter.zip | Bin 122957 -> 139016 bytes
 docs/source/auto_examples/auto_examples_python.zip | Bin 81905 -> 93470 bytes
 .../images/sphx_glr_plot_OT_2D_samples_001.png     | Bin 22281 -> 20785 bytes
 .../images/sphx_glr_plot_OT_2D_samples_002.png     | Bin 20743 -> 21134 bytes
 .../images/sphx_glr_plot_OT_2D_samples_005.png     | Bin 9695 -> 9704 bytes
 .../images/sphx_glr_plot_OT_2D_samples_006.png     | Bin 90088 -> 79153 bytes
 .../images/sphx_glr_plot_OT_2D_samples_009.png     | Bin 15036 -> 14611 bytes
 .../images/sphx_glr_plot_OT_2D_samples_010.png     | Bin 103143 -> 97487 bytes
 .../images/sphx_glr_plot_OT_2D_samples_013.png     | Bin 0 -> 10846 bytes
 .../images/sphx_glr_plot_OT_2D_samples_014.png     | Bin 0 -> 20361 bytes
 .../images/sphx_glr_plot_barycenter_fgw_001.png    | Bin 0 -> 131827 bytes
 .../images/sphx_glr_plot_barycenter_fgw_002.png    | Bin 0 -> 29423 bytes
 .../auto_examples/images/sphx_glr_plot_fgw_004.png | Bin 0 -> 19490 bytes
 .../auto_examples/images/sphx_glr_plot_fgw_010.png | Bin 0 -> 44747 bytes
 .../auto_examples/images/sphx_glr_plot_fgw_011.png | Bin 0 -> 21337 bytes
 .../thumb/sphx_glr_plot_OT_2D_samples_thumb.png    | Bin 19155 -> 17987 bytes
 .../thumb/sphx_glr_plot_barycenter_fgw_thumb.png   | Bin 0 -> 28694 bytes
 .../images/thumb/sphx_glr_plot_fgw_thumb.png       | Bin 0 -> 17541 bytes
 docs/source/auto_examples/index.rst                |  80 ++++--
 docs/source/auto_examples/plot_OT_2D_samples.ipynb |  22 +-
 docs/source/auto_examples/plot_OT_2D_samples.py    |  26 ++
 docs/source/auto_examples/plot_OT_2D_samples.rst   |  56 +++-
 .../source/auto_examples/plot_barycenter_fgw.ipynb | 126 +++++++++
 docs/source/auto_examples/plot_barycenter_fgw.py   | 184 +++++++++++++
 docs/source/auto_examples/plot_barycenter_fgw.rst  | 268 +++++++++++++++++++
 docs/source/auto_examples/plot_fgw.ipynb           | 162 +++++++++++
 docs/source/auto_examples/plot_fgw.py              | 173 ++++++++++++
 docs/source/auto_examples/plot_fgw.rst             | 297 +++++++++++++++++++++
 29 files changed, 1372 insertions(+), 24 deletions(-)
 create mode 100644 docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_013.png
 create mode 100644 docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_014.png
 create mode 100644 docs/source/auto_examples/images/sphx_glr_plot_barycenter_fgw_001.png
 create mode 100644 docs/source/auto_examples/images/sphx_glr_plot_barycenter_fgw_002.png
 create mode 100644 docs/source/auto_examples/images/sphx_glr_plot_fgw_004.png
 create mode 100644 docs/source/auto_examples/images/sphx_glr_plot_fgw_010.png
 create mode 100644 docs/source/auto_examples/images/sphx_glr_plot_fgw_011.png
 create mode 100644 docs/source/auto_examples/images/thumb/sphx_glr_plot_barycenter_fgw_thumb.png
 create mode 100644 docs/source/auto_examples/images/thumb/sphx_glr_plot_fgw_thumb.png
 create mode 100644 docs/source/auto_examples/plot_barycenter_fgw.ipynb
 create mode 100644 docs/source/auto_examples/plot_barycenter_fgw.py
 create mode 100644 docs/source/auto_examples/plot_barycenter_fgw.rst
 create mode 100644 docs/source/auto_examples/plot_fgw.ipynb
 create mode 100644 docs/source/auto_examples/plot_fgw.py
 create mode 100644 docs/source/auto_examples/plot_fgw.rst

(limited to 'docs')

diff --git a/docs/cache_nbrun b/docs/cache_nbrun
index 6f10375..04f6fce 100644
--- a/docs/cache_nbrun
+++ b/docs/cache_nbrun
@@ -1 +1 @@
-{"plot_otda_mapping_colors_images.ipynb": "cc8bf9a857f52e4a159fe71dfda19018", "plot_optim_OTreg.ipynb": "481801bb0d133ef350a65179cf8f739a", "plot_otda_color_images.ipynb": "f804d5806c7ac1a0901e4542b1eaa77b", "plot_stochastic.ipynb": "e18253354c8c1d72567a4259eb1094f7", "plot_WDA.ipynb": "27f8de4c6d7db46497076523673eedfb", "plot_otda_linear_mapping.ipynb": "a472c767abe82020e0a58125a528785c", "plot_OT_1D_smooth.ipynb": "3a059103652225a0c78ea53895cf79e5", "plot_OT_L1_vs_L2.ipynb": "5d565b8aaf03be4309eba731127851dc", "plot_barycenter_1D.ipynb": "5f6fb8aebd8e2e91ebc77c923cb112b3", "plot_otda_classes.ipynb": "39087b6e98217851575f2271c22853a4", "plot_otda_d2.ipynb": "e6feae588103f2a8fab942e5f4eff483", "plot_otda_mapping.ipynb": "2f1ebbdc0f855d9e2b7adf9edec24d25", "plot_gromov.ipynb": "24f2aea489714d34779521f46d5e2c47", "plot_compute_emd.ipynb": "f5cd71cad882ec157dc8222721e9820c", "plot_OT_1D.ipynb": "b5348bdc561c07ec168a1622e5af4b93", "plot_gromov_barycenter.ipynb": "953e5047b886ec69ec621ec52f5e21d1", "plot_free_support_barycenter.ipynb": "246dd2feff4b233a4f1a553c5a202fdc", "plot_convolutional_barycenter.ipynb": "a72bb3716a1baaffd81ae267a673f9b6", "plot_otda_semi_supervised.ipynb": "f6dfb02ba2bbd939408ffcd22a3b007c", "plot_OT_2D_samples.ipynb": "07dbc14859fa019a966caa79fa0825bd", "plot_barycenter_lp_vs_entropic.ipynb": "51833e8c76aaedeba9599ac7a30eb357"}
\ No newline at end of file
+{"plot_otda_color_images.ipynb": "f804d5806c7ac1a0901e4542b1eaa77b", "plot_WDA.ipynb": "27f8de4c6d7db46497076523673eedfb", "plot_OT_L1_vs_L2.ipynb": "5d565b8aaf03be4309eba731127851dc", "plot_otda_semi_supervised.ipynb": "f6dfb02ba2bbd939408ffcd22a3b007c", "plot_fgw.ipynb": "2ba3e100e92ecf4dfbeb605de20b40ab", "plot_otda_d2.ipynb": "e6feae588103f2a8fab942e5f4eff483", "plot_compute_emd.ipynb": "f5cd71cad882ec157dc8222721e9820c", "plot_barycenter_fgw.ipynb": "e14100dd276bff3ffdfdf176f1b6b070", "plot_convolutional_barycenter.ipynb": "a72bb3716a1baaffd81ae267a673f9b6", "plot_optim_OTreg.ipynb": "481801bb0d133ef350a65179cf8f739a", "plot_barycenter_lp_vs_entropic.ipynb": "51833e8c76aaedeba9599ac7a30eb357", "plot_OT_1D_smooth.ipynb": "3a059103652225a0c78ea53895cf79e5", "plot_barycenter_1D.ipynb": "5f6fb8aebd8e2e91ebc77c923cb112b3", "plot_otda_mapping.ipynb": "2f1ebbdc0f855d9e2b7adf9edec24d25", "plot_OT_1D.ipynb": "b5348bdc561c07ec168a1622e5af4b93", "plot_gromov_barycenter.ipynb": "953e5047b886ec69ec621ec52f5e21d1", "plot_otda_mapping_colors_images.ipynb": "cc8bf9a857f52e4a159fe71dfda19018", "plot_stochastic.ipynb": "e18253354c8c1d72567a4259eb1094f7", "plot_otda_linear_mapping.ipynb": "a472c767abe82020e0a58125a528785c", "plot_otda_classes.ipynb": "39087b6e98217851575f2271c22853a4", "plot_free_support_barycenter.ipynb": "246dd2feff4b233a4f1a553c5a202fdc", "plot_gromov.ipynb": "24f2aea489714d34779521f46d5e2c47", "plot_OT_2D_samples.ipynb": "912a77c5dd0fc0fafa03fac3d86f1502"}
\ No newline at end of file
diff --git a/docs/source/auto_examples/auto_examples_jupyter.zip b/docs/source/auto_examples/auto_examples_jupyter.zip
index 88e1e9b..a3a7c29 100644
Binary files a/docs/source/auto_examples/auto_examples_jupyter.zip and b/docs/source/auto_examples/auto_examples_jupyter.zip differ
diff --git a/docs/source/auto_examples/auto_examples_python.zip b/docs/source/auto_examples/auto_examples_python.zip
index 120a586..86a6841 100644
Binary files a/docs/source/auto_examples/auto_examples_python.zip and b/docs/source/auto_examples/auto_examples_python.zip differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_001.png b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_001.png
index 2e93ed1..a5bded7 100644
Binary files a/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_001.png and b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_001.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_002.png b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_002.png
index d6db0ed..1d90c2d 100644
Binary files a/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_002.png and b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_002.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_005.png b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_005.png
index 9a215ab..ea6a405 100644
Binary files a/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_005.png and b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_005.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_006.png b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_006.png
index 81c4ddb..8bc46dc 100644
Binary files a/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_006.png and b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_006.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_009.png b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_009.png
index 892b2a2..56d18ef 100644
Binary files a/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_009.png and b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_009.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_010.png b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_010.png
index c53717f..5aef7d2 100644
Binary files a/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_010.png and b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_010.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_013.png b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_013.png
new file mode 100644
index 0000000..bb8bd7c
Binary files /dev/null and b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_013.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_014.png b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_014.png
new file mode 100644
index 0000000..30cec7b
Binary files /dev/null and b/docs/source/auto_examples/images/sphx_glr_plot_OT_2D_samples_014.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_barycenter_fgw_001.png b/docs/source/auto_examples/images/sphx_glr_plot_barycenter_fgw_001.png
new file mode 100644
index 0000000..77e1282
Binary files /dev/null and b/docs/source/auto_examples/images/sphx_glr_plot_barycenter_fgw_001.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_barycenter_fgw_002.png b/docs/source/auto_examples/images/sphx_glr_plot_barycenter_fgw_002.png
new file mode 100644
index 0000000..ca6d7f8
Binary files /dev/null and b/docs/source/auto_examples/images/sphx_glr_plot_barycenter_fgw_002.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_fgw_004.png b/docs/source/auto_examples/images/sphx_glr_plot_fgw_004.png
new file mode 100644
index 0000000..4e0df9f
Binary files /dev/null and b/docs/source/auto_examples/images/sphx_glr_plot_fgw_004.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_fgw_010.png b/docs/source/auto_examples/images/sphx_glr_plot_fgw_010.png
new file mode 100644
index 0000000..d0e36e8
Binary files /dev/null and b/docs/source/auto_examples/images/sphx_glr_plot_fgw_010.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_fgw_011.png b/docs/source/auto_examples/images/sphx_glr_plot_fgw_011.png
new file mode 100644
index 0000000..6d7e630
Binary files /dev/null and b/docs/source/auto_examples/images/sphx_glr_plot_fgw_011.png differ
diff --git a/docs/source/auto_examples/images/thumb/sphx_glr_plot_OT_2D_samples_thumb.png b/docs/source/auto_examples/images/thumb/sphx_glr_plot_OT_2D_samples_thumb.png
index b9135dd..ae33588 100644
Binary files a/docs/source/auto_examples/images/thumb/sphx_glr_plot_OT_2D_samples_thumb.png and b/docs/source/auto_examples/images/thumb/sphx_glr_plot_OT_2D_samples_thumb.png differ
diff --git a/docs/source/auto_examples/images/thumb/sphx_glr_plot_barycenter_fgw_thumb.png b/docs/source/auto_examples/images/thumb/sphx_glr_plot_barycenter_fgw_thumb.png
new file mode 100644
index 0000000..9c3244e
Binary files /dev/null and b/docs/source/auto_examples/images/thumb/sphx_glr_plot_barycenter_fgw_thumb.png differ
diff --git a/docs/source/auto_examples/images/thumb/sphx_glr_plot_fgw_thumb.png b/docs/source/auto_examples/images/thumb/sphx_glr_plot_fgw_thumb.png
new file mode 100644
index 0000000..609339d
Binary files /dev/null and b/docs/source/auto_examples/images/thumb/sphx_glr_plot_fgw_thumb.png differ
diff --git a/docs/source/auto_examples/index.rst b/docs/source/auto_examples/index.rst
index 17a9710..9f02da4 100644
--- a/docs/source/auto_examples/index.rst
+++ b/docs/source/auto_examples/index.rst
@@ -107,26 +107,6 @@ This is a gallery of all the POT example files.
 
    /auto_examples/plot_gromov
 
-.. raw:: html
-
-    <div class="sphx-glr-thumbcontainer" tooltip="Illustration of 2D optimal transport between discributions that are weighted sum of diracs. The...">
-
-.. only:: html
-
-    .. figure:: /auto_examples/images/thumb/sphx_glr_plot_OT_2D_samples_thumb.png
-
-        :ref:`sphx_glr_auto_examples_plot_OT_2D_samples.py`
-
-.. raw:: html
-
-    </div>
-
-
-.. toctree::
-   :hidden:
-
-   /auto_examples/plot_OT_2D_samples
-
 .. raw:: html
 
     <div class="sphx-glr-thumbcontainer" tooltip="Shows how to compute multiple EMD and Sinkhorn with two differnt ground metrics and plot their ...">
@@ -207,6 +187,26 @@ This is a gallery of all the POT example files.
 
    /auto_examples/plot_WDA
 
+.. raw:: html
+
+    <div class="sphx-glr-thumbcontainer" tooltip="Illustration of 2D optimal transport between discributions that are weighted sum of diracs. The...">
+
+.. only:: html
+
+    .. figure:: /auto_examples/images/thumb/sphx_glr_plot_OT_2D_samples_thumb.png
+
+        :ref:`sphx_glr_auto_examples_plot_OT_2D_samples.py`
+
+.. raw:: html
+
+    </div>
+
+
+.. toctree::
+   :hidden:
+
+   /auto_examples/plot_OT_2D_samples
+
 .. raw:: html
 
     <div class="sphx-glr-thumbcontainer" tooltip="This example is designed to show how to use the stochatic optimization algorithms for descrete ...">
@@ -327,6 +327,26 @@ This is a gallery of all the POT example files.
 
    /auto_examples/plot_otda_semi_supervised
 
+.. raw:: html
+
+    <div class="sphx-glr-thumbcontainer" tooltip="This example illustrates the computation of FGW for 1D measures[18].">
+
+.. only:: html
+
+    .. figure:: /auto_examples/images/thumb/sphx_glr_plot_fgw_thumb.png
+
+        :ref:`sphx_glr_auto_examples_plot_fgw.py`
+
+.. raw:: html
+
+    </div>
+
+
+.. toctree::
+   :hidden:
+
+   /auto_examples/plot_fgw
+
 .. raw:: html
 
     <div class="sphx-glr-thumbcontainer" tooltip="This example introduces a domain adaptation in a 2D setting and the 4 OTDA approaches currently...">
@@ -407,6 +427,26 @@ This is a gallery of all the POT example files.
 
    /auto_examples/plot_barycenter_lp_vs_entropic
 
+.. raw:: html
+
+    <div class="sphx-glr-thumbcontainer" tooltip="This example illustrates the computation barycenter of labeled graphs using FGW">
+
+.. only:: html
+
+    .. figure:: /auto_examples/images/thumb/sphx_glr_plot_barycenter_fgw_thumb.png
+
+        :ref:`sphx_glr_auto_examples_plot_barycenter_fgw.py`
+
+.. raw:: html
+
+    </div>
+
+
+.. toctree::
+   :hidden:
+
+   /auto_examples/plot_barycenter_fgw
+
 .. raw:: html
 
     <div class="sphx-glr-thumbcontainer" tooltip="This example is designed to show how to use the Gromov-Wasserstein distance computation in POT....">
diff --git a/docs/source/auto_examples/plot_OT_2D_samples.ipynb b/docs/source/auto_examples/plot_OT_2D_samples.ipynb
index 26831f9..dad138b 100644
--- a/docs/source/auto_examples/plot_OT_2D_samples.ipynb
+++ b/docs/source/auto_examples/plot_OT_2D_samples.ipynb
@@ -26,7 +26,7 @@
       },
       "outputs": [],
       "source": [
-        "# Author: Remi Flamary <remi.flamary@unice.fr>\n#\n# License: MIT License\n\nimport numpy as np\nimport matplotlib.pylab as pl\nimport ot\nimport ot.plot"
+        "# Author: Remi Flamary <remi.flamary@unice.fr>\n#         Kilian Fatras <kilian.fatras@irisa.fr>\n#\n# License: MIT License\n\nimport numpy as np\nimport matplotlib.pylab as pl\nimport ot\nimport ot.plot"
       ]
     },
     {
@@ -100,6 +100,24 @@
       "source": [
         "#%% sinkhorn\n\n# reg term\nlambd = 1e-3\n\nGs = ot.sinkhorn(a, b, M, lambd)\n\npl.figure(5)\npl.imshow(Gs, interpolation='nearest')\npl.title('OT matrix sinkhorn')\n\npl.figure(6)\not.plot.plot2D_samples_mat(xs, xt, Gs, color=[.5, .5, 1])\npl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')\npl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')\npl.legend(loc=0)\npl.title('OT matrix Sinkhorn with samples')\n\npl.show()"
       ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Emprirical Sinkhorn\n----------------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "#%% sinkhorn\n\n# reg term\nlambd = 1e-3\n\nGes = ot.bregman.empirical_sinkhorn(xs, xt, lambd)\n\npl.figure(7)\npl.imshow(Ges, interpolation='nearest')\npl.title('OT matrix empirical sinkhorn')\n\npl.figure(8)\not.plot.plot2D_samples_mat(xs, xt, Ges, color=[.5, .5, 1])\npl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')\npl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')\npl.legend(loc=0)\npl.title('OT matrix Sinkhorn from samples')\n\npl.show()"
+      ]
     }
   ],
   "metadata": {
@@ -118,7 +136,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.6.5"
+      "version": "3.6.8"
     }
   },
   "nbformat": 4,
diff --git a/docs/source/auto_examples/plot_OT_2D_samples.py b/docs/source/auto_examples/plot_OT_2D_samples.py
index bb952a0..63126ba 100644
--- a/docs/source/auto_examples/plot_OT_2D_samples.py
+++ b/docs/source/auto_examples/plot_OT_2D_samples.py
@@ -10,6 +10,7 @@ sum of diracs. The OT matrix is plotted with the samples.
 """
 
 # Author: Remi Flamary <remi.flamary@unice.fr>
+#         Kilian Fatras <kilian.fatras@irisa.fr>
 #
 # License: MIT License
 
@@ -100,3 +101,28 @@ pl.legend(loc=0)
 pl.title('OT matrix Sinkhorn with samples')
 
 pl.show()
+
+
+##############################################################################
+# Emprirical Sinkhorn
+# ----------------
+
+#%% sinkhorn
+
+# reg term
+lambd = 1e-3
+
+Ges = ot.bregman.empirical_sinkhorn(xs, xt, lambd)
+
+pl.figure(7)
+pl.imshow(Ges, interpolation='nearest')
+pl.title('OT matrix empirical sinkhorn')
+
+pl.figure(8)
+ot.plot.plot2D_samples_mat(xs, xt, Ges, color=[.5, .5, 1])
+pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
+pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
+pl.legend(loc=0)
+pl.title('OT matrix Sinkhorn from samples')
+
+pl.show()
diff --git a/docs/source/auto_examples/plot_OT_2D_samples.rst b/docs/source/auto_examples/plot_OT_2D_samples.rst
index 624ae3e..1f1d713 100644
--- a/docs/source/auto_examples/plot_OT_2D_samples.rst
+++ b/docs/source/auto_examples/plot_OT_2D_samples.rst
@@ -17,6 +17,7 @@ sum of diracs. The OT matrix is plotted with the samples.
 
 
     # Author: Remi Flamary <remi.flamary@unice.fr>
+    #         Kilian Fatras <kilian.fatras@irisa.fr>
     #
     # License: MIT License
 
@@ -176,6 +177,8 @@ Compute Sinkhorn
 
 
+
+
 .. rst-class:: sphx-glr-horizontal
 
 
@@ -192,7 +195,58 @@ Compute Sinkhorn
 
 
-**Total running time of the script:** ( 0 minutes  3.027 seconds)
+Emprirical Sinkhorn
+----------------
+
+
+
+.. code-block:: python
+
+
+    #%% sinkhorn
+
+    # reg term
+    lambd = 1e-3
+
+    Ges = ot.bregman.empirical_sinkhorn(xs, xt, lambd)
+
+    pl.figure(7)
+    pl.imshow(Ges, interpolation='nearest')
+    pl.title('OT matrix empirical sinkhorn')
+
+    pl.figure(8)
+    ot.plot.plot2D_samples_mat(xs, xt, Ges, color=[.5, .5, 1])
+    pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
+    pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples')
+    pl.legend(loc=0)
+    pl.title('OT matrix Sinkhorn from samples')
+
+    pl.show()
+
+
+
+.. rst-class:: sphx-glr-horizontal
+
+
+    *
+
+      .. image:: /auto_examples/images/sphx_glr_plot_OT_2D_samples_013.png
+            :scale: 47
+
+    *
+
+      .. image:: /auto_examples/images/sphx_glr_plot_OT_2D_samples_014.png
+            :scale: 47
+
+
+.. rst-class:: sphx-glr-script-out
+
+ Out::
+
+    Warning: numerical errors at iteration 0
+
+
+**Total running time of the script:** ( 0 minutes  2.616 seconds)
 
 
diff --git a/docs/source/auto_examples/plot_barycenter_fgw.ipynb b/docs/source/auto_examples/plot_barycenter_fgw.ipynb
new file mode 100644
index 0000000..28229b2
--- /dev/null
+++ b/docs/source/auto_examples/plot_barycenter_fgw.ipynb
@@ -0,0 +1,126 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n=================================\nPlot graphs' barycenter using FGW\n=================================\n\nThis example illustrates the computation barycenter of labeled graphs using FGW\n\nRequires networkx >=2\n\n.. [18] Vayer Titouan, Chapel Laetitia, Flamary R{'e}mi, Tavenard Romain\n      and Courty Nicolas\n    \"Optimal Transport for structured data with application on graphs\"\n    International Conference on Machine Learning (ICML). 2019.\n\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Author: Titouan Vayer <titouan.vayer@irisa.fr>\n#\n# License: MIT License\n\n#%% load libraries\nimport numpy as np\nimport matplotlib.pyplot as plt\nimport networkx as nx\nimport math\nfrom scipy.sparse.csgraph import shortest_path\nimport matplotlib.colors as mcol\nfrom matplotlib import cm\nfrom ot.gromov import fgw_barycenters\n#%% Graph functions\n\n\ndef find_thresh(C, inf=0.5, sup=3, step=10):\n    \"\"\" Trick to find the adequate thresholds from where value of the C matrix are considered close enough to say that nodes are connected\n        Tthe threshold is found by a linesearch between values \"inf\" and \"sup\" with \"step\" thresholds tested.\n        The optimal threshold is the one which minimizes the reconstruction error between the shortest_path matrix coming from the thresholded adjency matrix\n        and the original matrix.\n    Parameters\n    ----------\n    C : ndarray, shape (n_nodes,n_nodes)\n            The structure matrix to threshold\n    inf : float\n          The beginning of the linesearch\n    sup : float\n          The end of the linesearch\n    step : integer\n            Number of thresholds tested\n    \"\"\"\n    dist = []\n    search = np.linspace(inf, sup, step)\n    for thresh in search:\n        Cprime = sp_to_adjency(C, 0, thresh)\n        SC = shortest_path(Cprime, method='D')\n        SC[SC == float('inf')] = 100\n        dist.append(np.linalg.norm(SC - C))\n    return search[np.argmin(dist)], dist\n\n\ndef sp_to_adjency(C, threshinf=0.2, threshsup=1.8):\n    \"\"\" Thresholds the structure matrix in order to compute an adjency matrix.\n    All values between threshinf and threshsup are considered representing connected nodes and set to 1. Else are set to 0\n    Parameters\n    ----------\n    C : ndarray, shape (n_nodes,n_nodes)\n        The structure matrix to threshold\n    threshinf : float\n        The minimum value of distance from which the new value is set to 1\n    threshsup : float\n        The maximum value of distance from which the new value is set to 1\n    Returns\n    -------\n    C : ndarray, shape (n_nodes,n_nodes)\n        The threshold matrix. Each element is in {0,1}\n    \"\"\"\n    H = np.zeros_like(C)\n    np.fill_diagonal(H, np.diagonal(C))\n    C = C - H\n    C = np.minimum(np.maximum(C, threshinf), threshsup)\n    C[C == threshsup] = 0\n    C[C != 0] = 1\n\n    return C\n\n\ndef build_noisy_circular_graph(N=20, mu=0, sigma=0.3, with_noise=False, structure_noise=False, p=None):\n    \"\"\" Create a noisy circular graph\n    \"\"\"\n    g = nx.Graph()\n    g.add_nodes_from(list(range(N)))\n    for i in range(N):\n        noise = float(np.random.normal(mu, sigma, 1))\n        if with_noise:\n            g.add_node(i, attr_name=math.sin((2 * i * math.pi / N)) + noise)\n        else:\n            g.add_node(i, attr_name=math.sin(2 * i * math.pi / N))\n        g.add_edge(i, i + 1)\n        if structure_noise:\n            randomint = np.random.randint(0, p)\n            if randomint == 0:\n                if i <= N - 3:\n                    g.add_edge(i, i + 2)\n                if i == N - 2:\n                    g.add_edge(i, 0)\n                if i == N - 1:\n                    g.add_edge(i, 1)\n    g.add_edge(N, 0)\n    noise = float(np.random.normal(mu, sigma, 1))\n    if with_noise:\n        g.add_node(N, attr_name=math.sin((2 * N * math.pi / N)) + noise)\n    else:\n        g.add_node(N, attr_name=math.sin(2 * N * math.pi / N))\n    return g\n\n\ndef graph_colors(nx_graph, vmin=0, vmax=7):\n    cnorm = mcol.Normalize(vmin=vmin, vmax=vmax)\n    cpick = cm.ScalarMappable(norm=cnorm, cmap='viridis')\n    cpick.set_array([])\n    val_map = {}\n    for k, v in nx.get_node_attributes(nx_graph, 'attr_name').items():\n        val_map[k] = cpick.to_rgba(v)\n    colors = []\n    for node in nx_graph.nodes():\n        colors.append(val_map[node])\n    return colors"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Generate data\n-------------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "#%% circular dataset\n# We build a dataset of noisy circular graphs.\n# Noise is added on the structures by random connections and on the features by gaussian noise.\n\n\nnp.random.seed(30)\nX0 = []\nfor k in range(9):\n    X0.append(build_noisy_circular_graph(np.random.randint(15, 25), with_noise=True, structure_noise=True, p=3))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Plot data\n---------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "#%% Plot graphs\n\nplt.figure(figsize=(8, 10))\nfor i in range(len(X0)):\n    plt.subplot(3, 3, i + 1)\n    g = X0[i]\n    pos = nx.kamada_kawai_layout(g)\n    nx.draw(g, pos=pos, node_color=graph_colors(g, vmin=-1, vmax=1), with_labels=False, node_size=100)\nplt.suptitle('Dataset of noisy graphs. Color indicates the label', fontsize=20)\nplt.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Barycenter computation\n----------------------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "#%% We compute the barycenter using FGW. Structure matrices are computed using the shortest_path distance in the graph\n# Features distances are the euclidean distances\nCs = [shortest_path(nx.adjacency_matrix(x)) for x in X0]\nps = [np.ones(len(x.nodes())) / len(x.nodes()) for x in X0]\nYs = [np.array([v for (k, v) in nx.get_node_attributes(x, 'attr_name').items()]).reshape(-1, 1) for x in X0]\nlambdas = np.array([np.ones(len(Ys)) / len(Ys)]).ravel()\nsizebary = 15  # we choose a barycenter with 15 nodes\n\nA, C, log = fgw_barycenters(sizebary, Ys, Cs, ps, lambdas, alpha=0.95, log=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Plot Barycenter\n-------------------------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "#%% Create the barycenter\nbary = nx.from_numpy_matrix(sp_to_adjency(C, threshinf=0, threshsup=find_thresh(C, sup=100, step=100)[0]))\nfor i, v in enumerate(A.ravel()):\n    bary.add_node(i, attr_name=v)\n\n#%%\npos = nx.kamada_kawai_layout(bary)\nnx.draw(bary, pos=pos, node_color=graph_colors(bary, vmin=-1, vmax=1), with_labels=False)\nplt.suptitle('Barycenter', fontsize=20)\nplt.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.8"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/docs/source/auto_examples/plot_barycenter_fgw.py b/docs/source/auto_examples/plot_barycenter_fgw.py
new file mode 100644
index 0000000..77b0370
--- /dev/null
+++ b/docs/source/auto_examples/plot_barycenter_fgw.py
@@ -0,0 +1,184 @@
+# -*- coding: utf-8 -*-
+"""
+=================================
+Plot graphs' barycenter using FGW
+=================================
+
+This example illustrates the computation barycenter of labeled graphs using FGW
+
+Requires networkx >=2
+
+.. [18] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
+      and Courty Nicolas
+    "Optimal Transport for structured data with application on graphs"
+    International Conference on Machine Learning (ICML). 2019.
+
+"""
+
+# Author: Titouan Vayer <titouan.vayer@irisa.fr>
+#
+# License: MIT License
+
+#%% load libraries
+import numpy as np
+import matplotlib.pyplot as plt
+import networkx as nx
+import math
+from scipy.sparse.csgraph import shortest_path
+import matplotlib.colors as mcol
+from matplotlib import cm
+from ot.gromov import fgw_barycenters
+#%% Graph functions
+
+
+def find_thresh(C, inf=0.5, sup=3, step=10):
+    """ Trick to find the adequate thresholds from where value of the C matrix are considered close enough to say that nodes are connected
+        Tthe threshold is found by a linesearch between values "inf" and "sup" with "step" thresholds tested.
+        The optimal threshold is the one which minimizes the reconstruction error between the shortest_path matrix coming from the thresholded adjency matrix
+        and the original matrix.
+    Parameters
+    ----------
+    C : ndarray, shape (n_nodes,n_nodes)
+            The structure matrix to threshold
+    inf : float
+          The beginning of the linesearch
+    sup : float
+          The end of the linesearch
+    step : integer
+            Number of thresholds tested
+    """
+    dist = []
+    search = np.linspace(inf, sup, step)
+    for thresh in search:
+        Cprime = sp_to_adjency(C, 0, thresh)
+        SC = shortest_path(Cprime, method='D')
+        SC[SC == float('inf')] = 100
+        dist.append(np.linalg.norm(SC - C))
+    return search[np.argmin(dist)], dist
+
+
+def sp_to_adjency(C, threshinf=0.2, threshsup=1.8):
+    """ Thresholds the structure matrix in order to compute an adjency matrix.
+    All values between threshinf and threshsup are considered representing connected nodes and set to 1. Else are set to 0
+    Parameters
+    ----------
+    C : ndarray, shape (n_nodes,n_nodes)
+        The structure matrix to threshold
+    threshinf : float
+        The minimum value of distance from which the new value is set to 1
+    threshsup : float
+        The maximum value of distance from which the new value is set to 1
+    Returns
+    -------
+    C : ndarray, shape (n_nodes,n_nodes)
+        The threshold matrix. Each element is in {0,1}
+    """
+    H = np.zeros_like(C)
+    np.fill_diagonal(H, np.diagonal(C))
+    C = C - H
+    C = np.minimum(np.maximum(C, threshinf), threshsup)
+    C[C == threshsup] = 0
+    C[C != 0] = 1
+
+    return C
+
+
+def build_noisy_circular_graph(N=20, mu=0, sigma=0.3, with_noise=False, structure_noise=False, p=None):
+    """ Create a noisy circular graph
+    """
+    g = nx.Graph()
+    g.add_nodes_from(list(range(N)))
+    for i in range(N):
+        noise = float(np.random.normal(mu, sigma, 1))
+        if with_noise:
+            g.add_node(i, attr_name=math.sin((2 * i * math.pi / N)) + noise)
+        else:
+            g.add_node(i, attr_name=math.sin(2 * i * math.pi / N))
+        g.add_edge(i, i + 1)
+        if structure_noise:
+            randomint = np.random.randint(0, p)
+            if randomint == 0:
+                if i <= N - 3:
+                    g.add_edge(i, i + 2)
+                if i == N - 2:
+                    g.add_edge(i, 0)
+                if i == N - 1:
+                    g.add_edge(i, 1)
+    g.add_edge(N, 0)
+    noise = float(np.random.normal(mu, sigma, 1))
+    if with_noise:
+        g.add_node(N, attr_name=math.sin((2 * N * math.pi / N)) + noise)
+    else:
+        g.add_node(N, attr_name=math.sin(2 * N * math.pi / N))
+    return g
+
+
+def graph_colors(nx_graph, vmin=0, vmax=7):
+    cnorm = mcol.Normalize(vmin=vmin, vmax=vmax)
+    cpick = cm.ScalarMappable(norm=cnorm, cmap='viridis')
+    cpick.set_array([])
+    val_map = {}
+    for k, v in nx.get_node_attributes(nx_graph, 'attr_name').items():
+        val_map[k] = cpick.to_rgba(v)
+    colors = []
+    for node in nx_graph.nodes():
+        colors.append(val_map[node])
+    return colors
+
+##############################################################################
+# Generate data
+# -------------
+
+#%% circular dataset
+# We build a dataset of noisy circular graphs.
+# Noise is added on the structures by random connections and on the features by gaussian noise.
+
+
+np.random.seed(30)
+X0 = []
+for k in range(9):
+    X0.append(build_noisy_circular_graph(np.random.randint(15, 25), with_noise=True, structure_noise=True, p=3))
+
+##############################################################################
+# Plot data
+# ---------
+
+#%% Plot graphs
+
+plt.figure(figsize=(8, 10))
+for i in range(len(X0)):
+    plt.subplot(3, 3, i + 1)
+    g = X0[i]
+    pos = nx.kamada_kawai_layout(g)
+    nx.draw(g, pos=pos, node_color=graph_colors(g, vmin=-1, vmax=1), with_labels=False, node_size=100)
+plt.suptitle('Dataset of noisy graphs. Color indicates the label', fontsize=20)
+plt.show()
+
+##############################################################################
+# Barycenter computation
+# ----------------------
+
+#%% We compute the barycenter using FGW. Structure matrices are computed using the shortest_path distance in the graph
+# Features distances are the euclidean distances
+Cs = [shortest_path(nx.adjacency_matrix(x)) for x in X0]
+ps = [np.ones(len(x.nodes())) / len(x.nodes()) for x in X0]
+Ys = [np.array([v for (k, v) in nx.get_node_attributes(x, 'attr_name').items()]).reshape(-1, 1) for x in X0]
+lambdas = np.array([np.ones(len(Ys)) / len(Ys)]).ravel()
+sizebary = 15  # we choose a barycenter with 15 nodes
+
+A, C, log = fgw_barycenters(sizebary, Ys, Cs, ps, lambdas, alpha=0.95, log=True)
+
+##############################################################################
+# Plot Barycenter
+# -------------------------
+
+#%% Create the barycenter
+bary = nx.from_numpy_matrix(sp_to_adjency(C, threshinf=0, threshsup=find_thresh(C, sup=100, step=100)[0]))
+for i, v in enumerate(A.ravel()):
+    bary.add_node(i, attr_name=v)
+
+#%%
+pos = nx.kamada_kawai_layout(bary)
+nx.draw(bary, pos=pos, node_color=graph_colors(bary, vmin=-1, vmax=1), with_labels=False)
+plt.suptitle('Barycenter', fontsize=20)
+plt.show()
diff --git a/docs/source/auto_examples/plot_barycenter_fgw.rst b/docs/source/auto_examples/plot_barycenter_fgw.rst
new file mode 100644
index 0000000..2c44a65
--- /dev/null
+++ b/docs/source/auto_examples/plot_barycenter_fgw.rst
@@ -0,0 +1,268 @@
+
+
+.. _sphx_glr_auto_examples_plot_barycenter_fgw.py:
+
+
+=================================
+Plot graphs' barycenter using FGW
+=================================
+
+This example illustrates the computation barycenter of labeled graphs using FGW
+
+Requires networkx >=2
+
+.. [18] Vayer Titouan, Chapel Laetitia, Flamary R{'e}mi, Tavenard Romain
+      and Courty Nicolas
+    "Optimal Transport for structured data with application on graphs"
+    International Conference on Machine Learning (ICML). 2019.
+
+
+
+
+.. code-block:: python
+
+
+    # Author: Titouan Vayer <titouan.vayer@irisa.fr>
+    #
+    # License: MIT License
+
+    #%% load libraries
+    import numpy as np
+    import matplotlib.pyplot as plt
+    import networkx as nx
+    import math
+    from scipy.sparse.csgraph import shortest_path
+    import matplotlib.colors as mcol
+    from matplotlib import cm
+    from ot.gromov import fgw_barycenters
+    #%% Graph functions
+
+
+    def find_thresh(C, inf=0.5, sup=3, step=10):
+        """ Trick to find the adequate thresholds from where value of the C matrix are considered close enough to say that nodes are connected
+            Tthe threshold is found by a linesearch between values "inf" and "sup" with "step" thresholds tested.
+            The optimal threshold is the one which minimizes the reconstruction error between the shortest_path matrix coming from the thresholded adjency matrix
+            and the original matrix.
+        Parameters
+        ----------
+        C : ndarray, shape (n_nodes,n_nodes)
+                The structure matrix to threshold
+        inf : float
+              The beginning of the linesearch
+        sup : float
+              The end of the linesearch
+        step : integer
+                Number of thresholds tested
+        """
+        dist = []
+        search = np.linspace(inf, sup, step)
+        for thresh in search:
+            Cprime = sp_to_adjency(C, 0, thresh)
+            SC = shortest_path(Cprime, method='D')
+            SC[SC == float('inf')] = 100
+            dist.append(np.linalg.norm(SC - C))
+        return search[np.argmin(dist)], dist
+
+
+    def sp_to_adjency(C, threshinf=0.2, threshsup=1.8):
+        """ Thresholds the structure matrix in order to compute an adjency matrix.
+        All values between threshinf and threshsup are considered representing connected nodes and set to 1. Else are set to 0
+        Parameters
+        ----------
+        C : ndarray, shape (n_nodes,n_nodes)
+            The structure matrix to threshold
+        threshinf : float
+            The minimum value of distance from which the new value is set to 1
+        threshsup : float
+            The maximum value of distance from which the new value is set to 1
+        Returns
+        -------
+        C : ndarray, shape (n_nodes,n_nodes)
+            The threshold matrix. Each element is in {0,1}
+        """
+        H = np.zeros_like(C)
+        np.fill_diagonal(H, np.diagonal(C))
+        C = C - H
+        C = np.minimum(np.maximum(C, threshinf), threshsup)
+        C[C == threshsup] = 0
+        C[C != 0] = 1
+
+        return C
+
+
+    def build_noisy_circular_graph(N=20, mu=0, sigma=0.3, with_noise=False, structure_noise=False, p=None):
+        """ Create a noisy circular graph
+        """
+        g = nx.Graph()
+        g.add_nodes_from(list(range(N)))
+        for i in range(N):
+            noise = float(np.random.normal(mu, sigma, 1))
+            if with_noise:
+                g.add_node(i, attr_name=math.sin((2 * i * math.pi / N)) + noise)
+            else:
+                g.add_node(i, attr_name=math.sin(2 * i * math.pi / N))
+            g.add_edge(i, i + 1)
+            if structure_noise:
+                randomint = np.random.randint(0, p)
+                if randomint == 0:
+                    if i <= N - 3:
+                        g.add_edge(i, i + 2)
+                    if i == N - 2:
+                        g.add_edge(i, 0)
+                    if i == N - 1:
+                        g.add_edge(i, 1)
+        g.add_edge(N, 0)
+        noise = float(np.random.normal(mu, sigma, 1))
+        if with_noise:
+            g.add_node(N, attr_name=math.sin((2 * N * math.pi / N)) + noise)
+        else:
+            g.add_node(N, attr_name=math.sin(2 * N * math.pi / N))
+        return g
+
+
+    def graph_colors(nx_graph, vmin=0, vmax=7):
+        cnorm = mcol.Normalize(vmin=vmin, vmax=vmax)
+        cpick = cm.ScalarMappable(norm=cnorm, cmap='viridis')
+        cpick.set_array([])
+        val_map = {}
+        for k, v in nx.get_node_attributes(nx_graph, 'attr_name').items():
+            val_map[k] = cpick.to_rgba(v)
+        colors = []
+        for node in nx_graph.nodes():
+            colors.append(val_map[node])
+        return colors
+
+
+
+
+
+
+
+Generate data
+-------------
+
+
+
+.. code-block:: python
+
+
+    #%% circular dataset
+    # We build a dataset of noisy circular graphs.
+    # Noise is added on the structures by random connections and on the features by gaussian noise.
+
+
+    np.random.seed(30)
+    X0 = []
+    for k in range(9):
+        X0.append(build_noisy_circular_graph(np.random.randint(15, 25), with_noise=True, structure_noise=True, p=3))
+
+
+
+
+
+
+
+Plot data
+---------
+
+
+
+.. code-block:: python
+
+
+    #%% Plot graphs
+
+    plt.figure(figsize=(8, 10))
+    for i in range(len(X0)):
+        plt.subplot(3, 3, i + 1)
+        g = X0[i]
+        pos = nx.kamada_kawai_layout(g)
+        nx.draw(g, pos=pos, node_color=graph_colors(g, vmin=-1, vmax=1), with_labels=False, node_size=100)
+    plt.suptitle('Dataset of noisy graphs. Color indicates the label', fontsize=20)
+    plt.show()
+
+
+
+
+.. image:: /auto_examples/images/sphx_glr_plot_barycenter_fgw_001.png
+    :align: center
+
+
+
+
+Barycenter computation
+----------------------
+
+
+
+.. code-block:: python
+
+
+    #%% We compute the barycenter using FGW. Structure matrices are computed using the shortest_path distance in the graph
+    # Features distances are the euclidean distances
+    Cs = [shortest_path(nx.adjacency_matrix(x)) for x in X0]
+    ps = [np.ones(len(x.nodes())) / len(x.nodes()) for x in X0]
+    Ys = [np.array([v for (k, v) in nx.get_node_attributes(x, 'attr_name').items()]).reshape(-1, 1) for x in X0]
+    lambdas = np.array([np.ones(len(Ys)) / len(Ys)]).ravel()
+    sizebary = 15  # we choose a barycenter with 15 nodes
+
+    A, C, log = fgw_barycenters(sizebary, Ys, Cs, ps, lambdas, alpha=0.95, log=True)
+
+
+
+
+
+
+
+Plot Barycenter
+-------------------------
+
+
+
+.. code-block:: python
+
+
+    #%% Create the barycenter
+    bary = nx.from_numpy_matrix(sp_to_adjency(C, threshinf=0, threshsup=find_thresh(C, sup=100, step=100)[0]))
+    for i, v in enumerate(A.ravel()):
+        bary.add_node(i, attr_name=v)
+
+    #%%
+    pos = nx.kamada_kawai_layout(bary)
+    nx.draw(bary, pos=pos, node_color=graph_colors(bary, vmin=-1, vmax=1), with_labels=False)
+    plt.suptitle('Barycenter', fontsize=20)
+    plt.show()
+
+
+
+.. image:: /auto_examples/images/sphx_glr_plot_barycenter_fgw_002.png
+    :align: center
+
+
+
+
+**Total running time of the script:** ( 0 minutes  2.065 seconds)
+
+
+
+.. only :: html
+
+ .. container:: sphx-glr-footer
+
+
+  .. container:: sphx-glr-download
+
+     :download:`Download Python source code: plot_barycenter_fgw.py <plot_barycenter_fgw.py>`
+
+
+
+  .. container:: sphx-glr-download
+
+     :download:`Download Jupyter notebook: plot_barycenter_fgw.ipynb <plot_barycenter_fgw.ipynb>`
+
+
+.. only:: html
+
+ .. rst-class:: sphx-glr-signature
+
+    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.readthedocs.io>`_
diff --git a/docs/source/auto_examples/plot_fgw.ipynb b/docs/source/auto_examples/plot_fgw.ipynb
new file mode 100644
index 0000000..1b150bd
--- /dev/null
+++ b/docs/source/auto_examples/plot_fgw.ipynb
@@ -0,0 +1,162 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# Plot Fused-gromov-Wasserstein\n\n\nThis example illustrates the computation of FGW for 1D measures[18].\n\n.. [18] Vayer Titouan, Chapel Laetitia, Flamary R{'e}mi, Tavenard Romain\n      and Courty Nicolas\n    \"Optimal Transport for structured data with application on graphs\"\n    International Conference on Machine Learning (ICML). 2019.\n\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Author: Titouan Vayer <titouan.vayer@irisa.fr>\n#\n# License: MIT License\n\nimport matplotlib.pyplot as pl\nimport numpy as np\nimport ot\nfrom ot.gromov import gromov_wasserstein, fused_gromov_wasserstein"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Generate data\n---------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "#%% parameters\n# We create two 1D random measures\nn = 20  # number of points in the first distribution\nn2 = 30  # number of points in the second distribution\nsig = 1  # std of first distribution\nsig2 = 0.1  # std of second distribution\n\nnp.random.seed(0)\n\nphi = np.arange(n)[:, None]\nxs = phi + sig * np.random.randn(n, 1)\nys = np.vstack((np.ones((n // 2, 1)), 0 * np.ones((n // 2, 1)))) + sig2 * np.random.randn(n, 1)\n\nphi2 = np.arange(n2)[:, None]\nxt = phi2 + sig * np.random.randn(n2, 1)\nyt = np.vstack((np.ones((n2 // 2, 1)), 0 * np.ones((n2 // 2, 1)))) + sig2 * np.random.randn(n2, 1)\nyt = yt[::-1, :]\n\np = ot.unif(n)\nq = ot.unif(n2)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Plot data\n---------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "#%% plot the distributions\n\npl.close(10)\npl.figure(10, (7, 7))\n\npl.subplot(2, 1, 1)\n\npl.scatter(ys, xs, c=phi, s=70)\npl.ylabel('Feature value a', fontsize=20)\npl.title('$\\mu=\\sum_i \\delta_{x_i,a_i}$', fontsize=25, usetex=True, y=1)\npl.xticks(())\npl.yticks(())\npl.subplot(2, 1, 2)\npl.scatter(yt, xt, c=phi2, s=70)\npl.xlabel('coordinates x/y', fontsize=25)\npl.ylabel('Feature value b', fontsize=20)\npl.title('$\\\\nu=\\sum_j \\delta_{y_j,b_j}$', fontsize=25, usetex=True, y=1)\npl.yticks(())\npl.tight_layout()\npl.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Create structure matrices and across-feature distance matrix\n---------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "#%% Structure matrices and across-features distance matrix\nC1 = ot.dist(xs)\nC2 = ot.dist(xt)\nM = ot.dist(ys, yt)\nw1 = ot.unif(C1.shape[0])\nw2 = ot.unif(C2.shape[0])\nGot = ot.emd([], [], M)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Plot matrices\n---------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "#%%\ncmap = 'Reds'\npl.close(10)\npl.figure(10, (5, 5))\nfs = 15\nl_x = [0, 5, 10, 15]\nl_y = [0, 5, 10, 15, 20, 25]\ngs = pl.GridSpec(5, 5)\n\nax1 = pl.subplot(gs[3:, :2])\n\npl.imshow(C1, cmap=cmap, interpolation='nearest')\npl.title(\"$C_1$\", fontsize=fs)\npl.xlabel(\"$k$\", fontsize=fs)\npl.ylabel(\"$i$\", fontsize=fs)\npl.xticks(l_x)\npl.yticks(l_x)\n\nax2 = pl.subplot(gs[:3, 2:])\n\npl.imshow(C2, cmap=cmap, interpolation='nearest')\npl.title(\"$C_2$\", fontsize=fs)\npl.ylabel(\"$l$\", fontsize=fs)\n#pl.ylabel(\"$l$\",fontsize=fs)\npl.xticks(())\npl.yticks(l_y)\nax2.set_aspect('auto')\n\nax3 = pl.subplot(gs[3:, 2:], sharex=ax2, sharey=ax1)\npl.imshow(M, cmap=cmap, interpolation='nearest')\npl.yticks(l_x)\npl.xticks(l_y)\npl.ylabel(\"$i$\", fontsize=fs)\npl.title(\"$M_{AB}$\", fontsize=fs)\npl.xlabel(\"$j$\", fontsize=fs)\npl.tight_layout()\nax3.set_aspect('auto')\npl.show()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Compute FGW/GW\n---------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "#%% Computing FGW and GW\nalpha = 1e-3\n\not.tic()\nGwg, logw = fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=alpha, verbose=True, log=True)\not.toc()\n\n#%reload_ext WGW\nGg, log = gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', verbose=True, log=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Visualize transport matrices\n---------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "#%% visu OT matrix\ncmap = 'Blues'\nfs = 15\npl.figure(2, (13, 5))\npl.clf()\npl.subplot(1, 3, 1)\npl.imshow(Got, cmap=cmap, interpolation='nearest')\n#pl.xlabel(\"$y$\",fontsize=fs)\npl.ylabel(\"$i$\", fontsize=fs)\npl.xticks(())\n\npl.title('Wasserstein ($M$ only)')\n\npl.subplot(1, 3, 2)\npl.imshow(Gg, cmap=cmap, interpolation='nearest')\npl.title('Gromov ($C_1,C_2$ only)')\npl.xticks(())\npl.subplot(1, 3, 3)\npl.imshow(Gwg, cmap=cmap, interpolation='nearest')\npl.title('FGW  ($M+C_1,C_2$)')\n\npl.xlabel(\"$j$\", fontsize=fs)\npl.ylabel(\"$i$\", fontsize=fs)\n\npl.tight_layout()\npl.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.8"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/docs/source/auto_examples/plot_fgw.py b/docs/source/auto_examples/plot_fgw.py
new file mode 100644
index 0000000..43efc94
--- /dev/null
+++ b/docs/source/auto_examples/plot_fgw.py
@@ -0,0 +1,173 @@
+# -*- coding: utf-8 -*-
+"""
+==============================
+Plot Fused-gromov-Wasserstein
+==============================
+
+This example illustrates the computation of FGW for 1D measures[18].
+
+.. [18] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
+      and Courty Nicolas
+    "Optimal Transport for structured data with application on graphs"
+    International Conference on Machine Learning (ICML). 2019.
+
+"""
+
+# Author: Titouan Vayer <titouan.vayer@irisa.fr>
+#
+# License: MIT License
+
+import matplotlib.pyplot as pl
+import numpy as np
+import ot
+from ot.gromov import gromov_wasserstein, fused_gromov_wasserstein
+
+##############################################################################
+# Generate data
+# ---------
+
+#%% parameters
+# We create two 1D random measures
+n = 20  # number of points in the first distribution
+n2 = 30  # number of points in the second distribution
+sig = 1  # std of first distribution
+sig2 = 0.1  # std of second distribution
+
+np.random.seed(0)
+
+phi = np.arange(n)[:, None]
+xs = phi + sig * np.random.randn(n, 1)
+ys = np.vstack((np.ones((n // 2, 1)), 0 * np.ones((n // 2, 1)))) + sig2 * np.random.randn(n, 1)
+
+phi2 = np.arange(n2)[:, None]
+xt = phi2 + sig * np.random.randn(n2, 1)
+yt = np.vstack((np.ones((n2 // 2, 1)), 0 * np.ones((n2 // 2, 1)))) + sig2 * np.random.randn(n2, 1)
+yt = yt[::-1, :]
+
+p = ot.unif(n)
+q = ot.unif(n2)
+
+##############################################################################
+# Plot data
+# ---------
+
+#%% plot the distributions
+
+pl.close(10)
+pl.figure(10, (7, 7))
+
+pl.subplot(2, 1, 1)
+
+pl.scatter(ys, xs, c=phi, s=70)
+pl.ylabel('Feature value a', fontsize=20)
+pl.title('$\mu=\sum_i \delta_{x_i,a_i}$', fontsize=25, usetex=True, y=1)
+pl.xticks(())
+pl.yticks(())
+pl.subplot(2, 1, 2)
+pl.scatter(yt, xt, c=phi2, s=70)
+pl.xlabel('coordinates x/y', fontsize=25)
+pl.ylabel('Feature value b', fontsize=20)
+pl.title('$\\nu=\sum_j \delta_{y_j,b_j}$', fontsize=25, usetex=True, y=1)
+pl.yticks(())
+pl.tight_layout()
+pl.show()
+
+##############################################################################
+# Create structure matrices and across-feature distance matrix
+# ---------
+
+#%% Structure matrices and across-features distance matrix
+C1 = ot.dist(xs)
+C2 = ot.dist(xt)
+M = ot.dist(ys, yt)
+w1 = ot.unif(C1.shape[0])
+w2 = ot.unif(C2.shape[0])
+Got = ot.emd([], [], M)
+
+##############################################################################
+# Plot matrices
+# ---------
+
+#%%
+cmap = 'Reds'
+pl.close(10)
+pl.figure(10, (5, 5))
+fs = 15
+l_x = [0, 5, 10, 15]
+l_y = [0, 5, 10, 15, 20, 25]
+gs = pl.GridSpec(5, 5)
+
+ax1 = pl.subplot(gs[3:, :2])
+
+pl.imshow(C1, cmap=cmap, interpolation='nearest')
+pl.title("$C_1$", fontsize=fs)
+pl.xlabel("$k$", fontsize=fs)
+pl.ylabel("$i$", fontsize=fs)
+pl.xticks(l_x)
+pl.yticks(l_x)
+
+ax2 = pl.subplot(gs[:3, 2:])
+
+pl.imshow(C2, cmap=cmap, interpolation='nearest')
+pl.title("$C_2$", fontsize=fs)
+pl.ylabel("$l$", fontsize=fs)
+#pl.ylabel("$l$",fontsize=fs)
+pl.xticks(())
+pl.yticks(l_y)
+ax2.set_aspect('auto')
+
+ax3 = pl.subplot(gs[3:, 2:], sharex=ax2, sharey=ax1)
+pl.imshow(M, cmap=cmap, interpolation='nearest')
+pl.yticks(l_x)
+pl.xticks(l_y)
+pl.ylabel("$i$", fontsize=fs)
+pl.title("$M_{AB}$", fontsize=fs)
+pl.xlabel("$j$", fontsize=fs)
+pl.tight_layout()
+ax3.set_aspect('auto')
+pl.show()
+
+##############################################################################
+# Compute FGW/GW
+# ---------
+
+#%% Computing FGW and GW
+alpha = 1e-3
+
+ot.tic()
+Gwg, logw = fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=alpha, verbose=True, log=True)
+ot.toc()
+
+#%reload_ext WGW
+Gg, log = gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', verbose=True, log=True)
+
+##############################################################################
+# Visualize transport matrices
+# ---------
+
+#%% visu OT matrix
+cmap = 'Blues'
+fs = 15
+pl.figure(2, (13, 5))
+pl.clf()
+pl.subplot(1, 3, 1)
+pl.imshow(Got, cmap=cmap, interpolation='nearest')
+#pl.xlabel("$y$",fontsize=fs)
+pl.ylabel("$i$", fontsize=fs)
+pl.xticks(())
+
+pl.title('Wasserstein ($M$ only)')
+
+pl.subplot(1, 3, 2)
+pl.imshow(Gg, cmap=cmap, interpolation='nearest')
+pl.title('Gromov ($C_1,C_2$ only)')
+pl.xticks(())
+pl.subplot(1, 3, 3)
+pl.imshow(Gwg, cmap=cmap, interpolation='nearest')
+pl.title('FGW  ($M+C_1,C_2$)')
+
+pl.xlabel("$j$", fontsize=fs)
+pl.ylabel("$i$", fontsize=fs)
+
+pl.tight_layout()
+pl.show()
diff --git a/docs/source/auto_examples/plot_fgw.rst b/docs/source/auto_examples/plot_fgw.rst
new file mode 100644
index 0000000..aec725d
--- /dev/null
+++ b/docs/source/auto_examples/plot_fgw.rst
@@ -0,0 +1,297 @@
+
+
+.. _sphx_glr_auto_examples_plot_fgw.py:
+
+
+==============================
+Plot Fused-gromov-Wasserstein
+==============================
+
+This example illustrates the computation of FGW for 1D measures[18].
+
+.. [18] Vayer Titouan, Chapel Laetitia, Flamary R{'e}mi, Tavenard Romain
+      and Courty Nicolas
+    "Optimal Transport for structured data with application on graphs"
+    International Conference on Machine Learning (ICML). 2019.
+
+
+
+
+.. code-block:: python
+
+
+    # Author: Titouan Vayer <titouan.vayer@irisa.fr>
+    #
+    # License: MIT License
+
+    import matplotlib.pyplot as pl
+    import numpy as np
+    import ot
+    from ot.gromov import gromov_wasserstein, fused_gromov_wasserstein
+
+
+
+
+
+
+
+Generate data
+---------
+
+
+
+.. code-block:: python
+
+
+    #%% parameters
+    # We create two 1D random measures
+    n = 20  # number of points in the first distribution
+    n2 = 30  # number of points in the second distribution
+    sig = 1  # std of first distribution
+    sig2 = 0.1  # std of second distribution
+
+    np.random.seed(0)
+
+    phi = np.arange(n)[:, None]
+    xs = phi + sig * np.random.randn(n, 1)
+    ys = np.vstack((np.ones((n // 2, 1)), 0 * np.ones((n // 2, 1)))) + sig2 * np.random.randn(n, 1)
+
+    phi2 = np.arange(n2)[:, None]
+    xt = phi2 + sig * np.random.randn(n2, 1)
+    yt = np.vstack((np.ones((n2 // 2, 1)), 0 * np.ones((n2 // 2, 1)))) + sig2 * np.random.randn(n2, 1)
+    yt = yt[::-1, :]
+
+    p = ot.unif(n)
+    q = ot.unif(n2)
+
+
+
+
+
+
+
+Plot data
+---------
+
+
+
+.. code-block:: python
+
+
+    #%% plot the distributions
+
+    pl.close(10)
+    pl.figure(10, (7, 7))
+
+    pl.subplot(2, 1, 1)
+
+    pl.scatter(ys, xs, c=phi, s=70)
+    pl.ylabel('Feature value a', fontsize=20)
+    pl.title('$\mu=\sum_i \delta_{x_i,a_i}$', fontsize=25, usetex=True, y=1)
+    pl.xticks(())
+    pl.yticks(())
+    pl.subplot(2, 1, 2)
+    pl.scatter(yt, xt, c=phi2, s=70)
+    pl.xlabel('coordinates x/y', fontsize=25)
+    pl.ylabel('Feature value b', fontsize=20)
+    pl.title('$\\nu=\sum_j \delta_{y_j,b_j}$', fontsize=25, usetex=True, y=1)
+    pl.yticks(())
+    pl.tight_layout()
+    pl.show()
+
+
+
+
+.. image:: /auto_examples/images/sphx_glr_plot_fgw_010.png
+    :align: center
+
+
+
+
+Create structure matrices and across-feature distance matrix
+---------
+
+
+
+.. code-block:: python
+
+
+    #%% Structure matrices and across-features distance matrix
+    C1 = ot.dist(xs)
+    C2 = ot.dist(xt)
+    M = ot.dist(ys, yt)
+    w1 = ot.unif(C1.shape[0])
+    w2 = ot.unif(C2.shape[0])
+    Got = ot.emd([], [], M)
+
+
+
+
+
+
+
+Plot matrices
+---------
+
+
+
+.. code-block:: python
+
+
+    #%%
+    cmap = 'Reds'
+    pl.close(10)
+    pl.figure(10, (5, 5))
+    fs = 15
+    l_x = [0, 5, 10, 15]
+    l_y = [0, 5, 10, 15, 20, 25]
+    gs = pl.GridSpec(5, 5)
+
+    ax1 = pl.subplot(gs[3:, :2])
+
+    pl.imshow(C1, cmap=cmap, interpolation='nearest')
+    pl.title("$C_1$", fontsize=fs)
+    pl.xlabel("$k$", fontsize=fs)
+    pl.ylabel("$i$", fontsize=fs)
+    pl.xticks(l_x)
+    pl.yticks(l_x)
+
+    ax2 = pl.subplot(gs[:3, 2:])
+
+    pl.imshow(C2, cmap=cmap, interpolation='nearest')
+    pl.title("$C_2$", fontsize=fs)
+    pl.ylabel("$l$", fontsize=fs)
+    #pl.ylabel("$l$",fontsize=fs)
+    pl.xticks(())
+    pl.yticks(l_y)
+    ax2.set_aspect('auto')
+
+    ax3 = pl.subplot(gs[3:, 2:], sharex=ax2, sharey=ax1)
+    pl.imshow(M, cmap=cmap, interpolation='nearest')
+    pl.yticks(l_x)
+    pl.xticks(l_y)
+    pl.ylabel("$i$", fontsize=fs)
+    pl.title("$M_{AB}$", fontsize=fs)
+    pl.xlabel("$j$", fontsize=fs)
+    pl.tight_layout()
+    ax3.set_aspect('auto')
+    pl.show()
+
+
+
+
+.. image:: /auto_examples/images/sphx_glr_plot_fgw_011.png
+    :align: center
+
+
+
+
+Compute FGW/GW
+---------
+
+
+
+.. code-block:: python
+
+
+    #%% Computing FGW and GW
+    alpha = 1e-3
+
+    ot.tic()
+    Gwg, logw = fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=alpha, verbose=True, log=True)
+    ot.toc()
+
+    #%reload_ext WGW
+    Gg, log = gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', verbose=True, log=True)
+
+
+
+
+
+.. rst-class:: sphx-glr-script-out
+
+ Out::
+
+    It.  |Loss        |Relative loss|Absolute loss
+    ------------------------------------------------
+        0|4.734462e+01|0.000000e+00|0.000000e+00
+        1|2.508258e+01|8.875498e-01|2.226204e+01
+        2|2.189329e+01|1.456747e-01|3.189297e+00
+        3|2.189329e+01|0.000000e+00|0.000000e+00
+    Elapsed time : 0.0016989707946777344 s
+    It.  |Loss        |Relative loss|Absolute loss
+    ------------------------------------------------
+        0|4.683978e+04|0.000000e+00|0.000000e+00
+        1|3.860061e+04|2.134468e-01|8.239175e+03
+        2|2.182948e+04|7.682787e-01|1.677113e+04
+        3|2.182948e+04|0.000000e+00|0.000000e+00
+
+
+Visualize transport matrices
+---------
+
+
+
+.. code-block:: python
+
+
+    #%% visu OT matrix
+    cmap = 'Blues'
+    fs = 15
+    pl.figure(2, (13, 5))
+    pl.clf()
+    pl.subplot(1, 3, 1)
+    pl.imshow(Got, cmap=cmap, interpolation='nearest')
+    #pl.xlabel("$y$",fontsize=fs)
+    pl.ylabel("$i$", fontsize=fs)
+    pl.xticks(())
+
+    pl.title('Wasserstein ($M$ only)')
+
+    pl.subplot(1, 3, 2)
+    pl.imshow(Gg, cmap=cmap, interpolation='nearest')
+    pl.title('Gromov ($C_1,C_2$ only)')
+    pl.xticks(())
+    pl.subplot(1, 3, 3)
+    pl.imshow(Gwg, cmap=cmap, interpolation='nearest')
+    pl.title('FGW  ($M+C_1,C_2$)')
+
+    pl.xlabel("$j$", fontsize=fs)
+    pl.ylabel("$i$", fontsize=fs)
+
+    pl.tight_layout()
+    pl.show()
+
+
+
+.. image:: /auto_examples/images/sphx_glr_plot_fgw_004.png
+    :align: center
+
+
+
+
+**Total running time of the script:** ( 0 minutes  1.468 seconds)
+
+
+
+.. only :: html
+
+ .. container:: sphx-glr-footer
+
+
+  .. container:: sphx-glr-download
+
+     :download:`Download Python source code: plot_fgw.py <plot_fgw.py>`
+
+
+
+  .. container:: sphx-glr-download
+
+     :download:`Download Jupyter notebook: plot_fgw.ipynb <plot_fgw.ipynb>`
+
+
+.. only:: html
+
+ .. rst-class:: sphx-glr-signature
+
+    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.readthedocs.io>`_
-- 
cgit v1.2.3


From 5e7c6ab04be3dc2035ca2a7f9deab3bb3bfb8faa Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Tue, 25 Jun 2019 14:43:39 +0200
Subject: doc add examples unbalanced

---
 docs/cache_nbrun                                   |   2 +-
 .../source/auto_examples/auto_examples_jupyter.zip | Bin 139016 -> 148147 bytes
 docs/source/auto_examples/auto_examples_python.zip | Bin 93470 -> 99229 bytes
 .../images/sphx_glr_plot_UOT_1D_001.png            | Bin 0 -> 21239 bytes
 .../images/sphx_glr_plot_UOT_1D_002.png            | Bin 0 -> 22051 bytes
 .../images/sphx_glr_plot_UOT_1D_006.png            | Bin 0 -> 21288 bytes
 .../images/sphx_glr_plot_UOT_barycenter_1D_001.png | Bin 0 -> 22177 bytes
 .../images/sphx_glr_plot_UOT_barycenter_1D_003.png | Bin 0 -> 42539 bytes
 .../images/sphx_glr_plot_UOT_barycenter_1D_005.png | Bin 0 -> 105997 bytes
 .../images/sphx_glr_plot_UOT_barycenter_1D_006.png | Bin 0 -> 103234 bytes
 .../images/thumb/sphx_glr_plot_UOT_1D_thumb.png    | Bin 0 -> 14761 bytes
 .../sphx_glr_plot_UOT_barycenter_1D_thumb.png      | Bin 0 -> 15099 bytes
 docs/source/auto_examples/index.rst                |  40 ++++
 docs/source/auto_examples/plot_UOT_1D.ipynb        | 108 +++++++++
 docs/source/auto_examples/plot_UOT_1D.py           |  76 ++++++
 docs/source/auto_examples/plot_UOT_1D.rst          | 173 ++++++++++++++
 .../auto_examples/plot_UOT_barycenter_1D.ipynb     | 126 ++++++++++
 .../source/auto_examples/plot_UOT_barycenter_1D.py | 164 +++++++++++++
 .../auto_examples/plot_UOT_barycenter_1D.rst       | 261 +++++++++++++++++++++
 19 files changed, 949 insertions(+), 1 deletion(-)
 create mode 100644 docs/source/auto_examples/images/sphx_glr_plot_UOT_1D_001.png
 create mode 100644 docs/source/auto_examples/images/sphx_glr_plot_UOT_1D_002.png
 create mode 100644 docs/source/auto_examples/images/sphx_glr_plot_UOT_1D_006.png
 create mode 100644 docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_001.png
 create mode 100644 docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_003.png
 create mode 100644 docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_005.png
 create mode 100644 docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_006.png
 create mode 100644 docs/source/auto_examples/images/thumb/sphx_glr_plot_UOT_1D_thumb.png
 create mode 100644 docs/source/auto_examples/images/thumb/sphx_glr_plot_UOT_barycenter_1D_thumb.png
 create mode 100644 docs/source/auto_examples/plot_UOT_1D.ipynb
 create mode 100644 docs/source/auto_examples/plot_UOT_1D.py
 create mode 100644 docs/source/auto_examples/plot_UOT_1D.rst
 create mode 100644 docs/source/auto_examples/plot_UOT_barycenter_1D.ipynb
 create mode 100644 docs/source/auto_examples/plot_UOT_barycenter_1D.py
 create mode 100644 docs/source/auto_examples/plot_UOT_barycenter_1D.rst

(limited to 'docs')

diff --git a/docs/cache_nbrun b/docs/cache_nbrun
index 04f6fce..8a95023 100644
--- a/docs/cache_nbrun
+++ b/docs/cache_nbrun
@@ -1 +1 @@
-{"plot_otda_color_images.ipynb": "f804d5806c7ac1a0901e4542b1eaa77b", "plot_WDA.ipynb": "27f8de4c6d7db46497076523673eedfb", "plot_OT_L1_vs_L2.ipynb": "5d565b8aaf03be4309eba731127851dc", "plot_otda_semi_supervised.ipynb": "f6dfb02ba2bbd939408ffcd22a3b007c", "plot_fgw.ipynb": "2ba3e100e92ecf4dfbeb605de20b40ab", "plot_otda_d2.ipynb": "e6feae588103f2a8fab942e5f4eff483", "plot_compute_emd.ipynb": "f5cd71cad882ec157dc8222721e9820c", "plot_barycenter_fgw.ipynb": "e14100dd276bff3ffdfdf176f1b6b070", "plot_convolutional_barycenter.ipynb": "a72bb3716a1baaffd81ae267a673f9b6", "plot_optim_OTreg.ipynb": "481801bb0d133ef350a65179cf8f739a", "plot_barycenter_lp_vs_entropic.ipynb": "51833e8c76aaedeba9599ac7a30eb357", "plot_OT_1D_smooth.ipynb": "3a059103652225a0c78ea53895cf79e5", "plot_barycenter_1D.ipynb": "5f6fb8aebd8e2e91ebc77c923cb112b3", "plot_otda_mapping.ipynb": "2f1ebbdc0f855d9e2b7adf9edec24d25", "plot_OT_1D.ipynb": "b5348bdc561c07ec168a1622e5af4b93", "plot_gromov_barycenter.ipynb": "953e5047b886ec69ec621ec52f5e21d1", "plot_otda_mapping_colors_images.ipynb": "cc8bf9a857f52e4a159fe71dfda19018", "plot_stochastic.ipynb": "e18253354c8c1d72567a4259eb1094f7", "plot_otda_linear_mapping.ipynb": "a472c767abe82020e0a58125a528785c", "plot_otda_classes.ipynb": "39087b6e98217851575f2271c22853a4", "plot_free_support_barycenter.ipynb": "246dd2feff4b233a4f1a553c5a202fdc", "plot_gromov.ipynb": "24f2aea489714d34779521f46d5e2c47", "plot_OT_2D_samples.ipynb": "912a77c5dd0fc0fafa03fac3d86f1502"}
\ No newline at end of file
+{"plot_otda_semi_supervised.ipynb": "f6dfb02ba2bbd939408ffcd22a3b007c", "plot_WDA.ipynb": "27f8de4c6d7db46497076523673eedfb", "plot_UOT_1D.ipynb": "fc7dd383e625597bd59fff03a8430c91", "plot_OT_L1_vs_L2.ipynb": "5d565b8aaf03be4309eba731127851dc", "plot_otda_color_images.ipynb": "f804d5806c7ac1a0901e4542b1eaa77b", "plot_fgw.ipynb": "2ba3e100e92ecf4dfbeb605de20b40ab", "plot_otda_d2.ipynb": "e6feae588103f2a8fab942e5f4eff483", "plot_compute_emd.ipynb": "f5cd71cad882ec157dc8222721e9820c", "plot_barycenter_fgw.ipynb": "e14100dd276bff3ffdfdf176f1b6b070", "plot_convolutional_barycenter.ipynb": "a72bb3716a1baaffd81ae267a673f9b6", "plot_optim_OTreg.ipynb": "481801bb0d133ef350a65179cf8f739a", "plot_barycenter_lp_vs_entropic.ipynb": "51833e8c76aaedeba9599ac7a30eb357", "plot_OT_1D_smooth.ipynb": "3a059103652225a0c78ea53895cf79e5", "plot_barycenter_1D.ipynb": "5f6fb8aebd8e2e91ebc77c923cb112b3", "plot_otda_mapping.ipynb": "2f1ebbdc0f855d9e2b7adf9edec24d25", "plot_OT_1D.ipynb": "b5348bdc561c07ec168a1622e5af4b93", "plot_gromov_barycenter.ipynb": "953e5047b886ec69ec621ec52f5e21d1", "plot_UOT_barycenter_1D.ipynb": "c72f0bfb6e1a79710dad3fef9f5c557c", "plot_otda_mapping_colors_images.ipynb": "cc8bf9a857f52e4a159fe71dfda19018", "plot_stochastic.ipynb": "e18253354c8c1d72567a4259eb1094f7", "plot_otda_linear_mapping.ipynb": "a472c767abe82020e0a58125a528785c", "plot_otda_classes.ipynb": "39087b6e98217851575f2271c22853a4", "plot_free_support_barycenter.ipynb": "246dd2feff4b233a4f1a553c5a202fdc", "plot_gromov.ipynb": "24f2aea489714d34779521f46d5e2c47", "plot_OT_2D_samples.ipynb": "912a77c5dd0fc0fafa03fac3d86f1502"}
\ No newline at end of file
diff --git a/docs/source/auto_examples/auto_examples_jupyter.zip b/docs/source/auto_examples/auto_examples_jupyter.zip
index a3a7c29..901195a 100644
Binary files a/docs/source/auto_examples/auto_examples_jupyter.zip and b/docs/source/auto_examples/auto_examples_jupyter.zip differ
diff --git a/docs/source/auto_examples/auto_examples_python.zip b/docs/source/auto_examples/auto_examples_python.zip
index 86a6841..ded2613 100644
Binary files a/docs/source/auto_examples/auto_examples_python.zip and b/docs/source/auto_examples/auto_examples_python.zip differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_UOT_1D_001.png b/docs/source/auto_examples/images/sphx_glr_plot_UOT_1D_001.png
new file mode 100644
index 0000000..69ef5b7
Binary files /dev/null and b/docs/source/auto_examples/images/sphx_glr_plot_UOT_1D_001.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_UOT_1D_002.png b/docs/source/auto_examples/images/sphx_glr_plot_UOT_1D_002.png
new file mode 100644
index 0000000..0407e44
Binary files /dev/null and b/docs/source/auto_examples/images/sphx_glr_plot_UOT_1D_002.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_UOT_1D_006.png b/docs/source/auto_examples/images/sphx_glr_plot_UOT_1D_006.png
new file mode 100644
index 0000000..f58d383
Binary files /dev/null and b/docs/source/auto_examples/images/sphx_glr_plot_UOT_1D_006.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_001.png b/docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_001.png
new file mode 100644
index 0000000..ec8c51e
Binary files /dev/null and b/docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_001.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_003.png b/docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_003.png
new file mode 100644
index 0000000..89ab265
Binary files /dev/null and b/docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_003.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_005.png b/docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_005.png
new file mode 100644
index 0000000..c6c49cb
Binary files /dev/null and b/docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_005.png differ
diff --git a/docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_006.png b/docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_006.png
new file mode 100644
index 0000000..8870b10
Binary files /dev/null and b/docs/source/auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_006.png differ
diff --git a/docs/source/auto_examples/images/thumb/sphx_glr_plot_UOT_1D_thumb.png b/docs/source/auto_examples/images/thumb/sphx_glr_plot_UOT_1D_thumb.png
new file mode 100644
index 0000000..1d048f2
Binary files /dev/null and b/docs/source/auto_examples/images/thumb/sphx_glr_plot_UOT_1D_thumb.png differ
diff --git a/docs/source/auto_examples/images/thumb/sphx_glr_plot_UOT_barycenter_1D_thumb.png b/docs/source/auto_examples/images/thumb/sphx_glr_plot_UOT_barycenter_1D_thumb.png
new file mode 100644
index 0000000..999f175
Binary files /dev/null and b/docs/source/auto_examples/images/thumb/sphx_glr_plot_UOT_barycenter_1D_thumb.png differ
diff --git a/docs/source/auto_examples/index.rst b/docs/source/auto_examples/index.rst
index 9f02da4..fe6702d 100644
--- a/docs/source/auto_examples/index.rst
+++ b/docs/source/auto_examples/index.rst
@@ -27,6 +27,26 @@ This is a gallery of all the POT example files.
 
    /auto_examples/plot_OT_1D
 
+.. raw:: html
+
+    <div class="sphx-glr-thumbcontainer" tooltip="This example illustrates the computation of Unbalanced Optimal transport using a Kullback-Leibl...">
+
+.. only:: html
+
+    .. figure:: /auto_examples/images/thumb/sphx_glr_plot_UOT_1D_thumb.png
+
+        :ref:`sphx_glr_auto_examples_plot_UOT_1D.py`
+
+.. raw:: html
+
+    </div>
+
+
+.. toctree::
+   :hidden:
+
+   /auto_examples/plot_UOT_1D
+
 .. raw:: html
 
     <div class="sphx-glr-thumbcontainer" tooltip="Illustrates the use of the generic solver for regularized OT with user-designed regularization ...">
@@ -287,6 +307,26 @@ This is a gallery of all the POT example files.
 
    /auto_examples/plot_otda_mapping_colors_images
 
+.. raw:: html
+
+    <div class="sphx-glr-thumbcontainer" tooltip="This example illustrates the computation of regularized Wassersyein Barycenter as proposed in [...">
+
+.. only:: html
+
+    .. figure:: /auto_examples/images/thumb/sphx_glr_plot_UOT_barycenter_1D_thumb.png
+
+        :ref:`sphx_glr_auto_examples_plot_UOT_barycenter_1D.py`
+
+.. raw:: html
+
+    </div>
+
+
+.. toctree::
+   :hidden:
+
+   /auto_examples/plot_UOT_barycenter_1D
+
 .. raw:: html
 
     <div class="sphx-glr-thumbcontainer" tooltip="This example presents how to use MappingTransport to estimate at the same time both the couplin...">
diff --git a/docs/source/auto_examples/plot_UOT_1D.ipynb b/docs/source/auto_examples/plot_UOT_1D.ipynb
new file mode 100644
index 0000000..c695306
--- /dev/null
+++ b/docs/source/auto_examples/plot_UOT_1D.ipynb
@@ -0,0 +1,108 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# 1D Unbalanced optimal transport\n\n\nThis example illustrates the computation of Unbalanced Optimal transport\nusing a Kullback-Leibler relaxation.\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Author: Hicham Janati <hicham.janati@inria.fr>\n#\n# License: MIT License\n\nimport numpy as np\nimport matplotlib.pylab as pl\nimport ot\nimport ot.plot\nfrom ot.datasets import make_1D_gauss as gauss"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Generate data\n-------------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "#%% parameters\n\nn = 100  # nb bins\n\n# bin positions\nx = np.arange(n, dtype=np.float64)\n\n# Gaussian distributions\na = gauss(n, m=20, s=5)  # m= mean, s= std\nb = gauss(n, m=60, s=10)\n\n# make distributions unbalanced\nb *= 5.\n\n# loss matrix\nM = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)))\nM /= M.max()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Plot distributions and loss matrix\n----------------------------------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "#%% plot the distributions\n\npl.figure(1, figsize=(6.4, 3))\npl.plot(x, a, 'b', label='Source distribution')\npl.plot(x, b, 'r', label='Target distribution')\npl.legend()\n\n# plot distributions and loss matrix\n\npl.figure(2, figsize=(5, 5))\not.plot.plot1D_mat(a, b, M, 'Cost matrix M')"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Solve Unbalanced Sinkhorn\n--------------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Sinkhorn\n\nepsilon = 0.1  # entropy parameter\nalpha = 1.  # Unbalanced KL relaxation parameter\nGs = ot.unbalanced.sinkhorn_unbalanced(a, b, M, epsilon, alpha, verbose=True)\n\npl.figure(4, figsize=(5, 5))\not.plot.plot1D_mat(a, b, Gs, 'UOT matrix Sinkhorn')\n\npl.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.8"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/docs/source/auto_examples/plot_UOT_1D.py b/docs/source/auto_examples/plot_UOT_1D.py
new file mode 100644
index 0000000..2ea8b05
--- /dev/null
+++ b/docs/source/auto_examples/plot_UOT_1D.py
@@ -0,0 +1,76 @@
+# -*- coding: utf-8 -*-
+"""
+===============================
+1D Unbalanced optimal transport
+===============================
+
+This example illustrates the computation of Unbalanced Optimal transport
+using a Kullback-Leibler relaxation.
+"""
+
+# Author: Hicham Janati <hicham.janati@inria.fr>
+#
+# License: MIT License
+
+import numpy as np
+import matplotlib.pylab as pl
+import ot
+import ot.plot
+from ot.datasets import make_1D_gauss as gauss
+
+##############################################################################
+# Generate data
+# -------------
+
+
+#%% parameters
+
+n = 100  # nb bins
+
+# bin positions
+x = np.arange(n, dtype=np.float64)
+
+# Gaussian distributions
+a = gauss(n, m=20, s=5)  # m= mean, s= std
+b = gauss(n, m=60, s=10)
+
+# make distributions unbalanced
+b *= 5.
+
+# loss matrix
+M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)))
+M /= M.max()
+
+
+##############################################################################
+# Plot distributions and loss matrix
+# ----------------------------------
+
+#%% plot the distributions
+
+pl.figure(1, figsize=(6.4, 3))
+pl.plot(x, a, 'b', label='Source distribution')
+pl.plot(x, b, 'r', label='Target distribution')
+pl.legend()
+
+# plot distributions and loss matrix
+
+pl.figure(2, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, M, 'Cost matrix M')
+
+
+##############################################################################
+# Solve Unbalanced Sinkhorn
+# --------------
+
+
+# Sinkhorn
+
+epsilon = 0.1  # entropy parameter
+alpha = 1.  # Unbalanced KL relaxation parameter
+Gs = ot.unbalanced.sinkhorn_unbalanced(a, b, M, epsilon, alpha, verbose=True)
+
+pl.figure(4, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, Gs, 'UOT matrix Sinkhorn')
+
+pl.show()
diff --git a/docs/source/auto_examples/plot_UOT_1D.rst b/docs/source/auto_examples/plot_UOT_1D.rst
new file mode 100644
index 0000000..8e618b4
--- /dev/null
+++ b/docs/source/auto_examples/plot_UOT_1D.rst
@@ -0,0 +1,173 @@
+
+
+.. _sphx_glr_auto_examples_plot_UOT_1D.py:
+
+
+===============================
+1D Unbalanced optimal transport
+===============================
+
+This example illustrates the computation of Unbalanced Optimal transport
+using a Kullback-Leibler relaxation.
+
+
+
+.. code-block:: python
+
+
+    # Author: Hicham Janati <hicham.janati@inria.fr>
+    #
+    # License: MIT License
+
+    import numpy as np
+    import matplotlib.pylab as pl
+    import ot
+    import ot.plot
+    from ot.datasets import make_1D_gauss as gauss
+
+
+
+
+
+
+
+Generate data
+-------------
+
+
+
+.. code-block:: python
+
+
+
+    #%% parameters
+
+    n = 100  # nb bins
+
+    # bin positions
+    x = np.arange(n, dtype=np.float64)
+
+    # Gaussian distributions
+    a = gauss(n, m=20, s=5)  # m= mean, s= std
+    b = gauss(n, m=60, s=10)
+
+    # make distributions unbalanced
+    b *= 5.
+
+    # loss matrix
+    M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)))
+    M /= M.max()
+
+
+
+
+
+
+
+
+Plot distributions and loss matrix
+----------------------------------
+
+
+
+.. code-block:: python
+
+
+    #%% plot the distributions
+
+    pl.figure(1, figsize=(6.4, 3))
+    pl.plot(x, a, 'b', label='Source distribution')
+    pl.plot(x, b, 'r', label='Target distribution')
+    pl.legend()
+
+    # plot distributions and loss matrix
+
+    pl.figure(2, figsize=(5, 5))
+    ot.plot.plot1D_mat(a, b, M, 'Cost matrix M')
+
+
+
+
+
+.. rst-class:: sphx-glr-horizontal
+
+
+    *
+
+      .. image:: /auto_examples/images/sphx_glr_plot_UOT_1D_001.png
+            :scale: 47
+
+    *
+
+      .. image:: /auto_examples/images/sphx_glr_plot_UOT_1D_002.png
+            :scale: 47
+
+
+
+
+Solve Unbalanced Sinkhorn
+--------------
+
+
+
+.. code-block:: python
+
+
+
+    # Sinkhorn
+
+    epsilon = 0.1  # entropy parameter
+    alpha = 1.  # Unbalanced KL relaxation parameter
+    Gs = ot.unbalanced.sinkhorn_unbalanced(a, b, M, epsilon, alpha, verbose=True)
+
+    pl.figure(4, figsize=(5, 5))
+    ot.plot.plot1D_mat(a, b, Gs, 'UOT matrix Sinkhorn')
+
+    pl.show()
+
+
+
+.. image:: /auto_examples/images/sphx_glr_plot_UOT_1D_006.png
+    :align: center
+
+
+.. rst-class:: sphx-glr-script-out
+
+ Out::
+
+    It.  |Err         
+    -------------------
+        0|1.838786e+00|
+       10|1.242379e-01|
+       20|2.581314e-03|
+       30|5.674552e-05|
+       40|1.252959e-06|
+       50|2.768136e-08|
+       60|6.116090e-10|
+
+
+**Total running time of the script:** ( 0 minutes  0.259 seconds)
+
+
+
+.. only :: html
+
+ .. container:: sphx-glr-footer
+
+
+  .. container:: sphx-glr-download
+
+     :download:`Download Python source code: plot_UOT_1D.py <plot_UOT_1D.py>`
+
+
+
+  .. container:: sphx-glr-download
+
+     :download:`Download Jupyter notebook: plot_UOT_1D.ipynb <plot_UOT_1D.ipynb>`
+
+
+.. only:: html
+
+ .. rst-class:: sphx-glr-signature
+
+    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.readthedocs.io>`_
diff --git a/docs/source/auto_examples/plot_UOT_barycenter_1D.ipynb b/docs/source/auto_examples/plot_UOT_barycenter_1D.ipynb
new file mode 100644
index 0000000..e59cdc2
--- /dev/null
+++ b/docs/source/auto_examples/plot_UOT_barycenter_1D.ipynb
@@ -0,0 +1,126 @@
+{
+  "cells": [
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "%matplotlib inline"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "\n# 1D Wasserstein barycenter demo for Unbalanced distributions\n\n\nThis example illustrates the computation of regularized Wassersyein Barycenter\nas proposed in [10] for Unbalanced inputs.\n\n\n[10] Chizat, L., Peyr\u00e9, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.\n\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# Author: Hicham Janati <hicham.janati@inria.fr>\n#\n# License: MIT License\n\nimport numpy as np\nimport matplotlib.pylab as pl\nimport ot\n# necessary for 3d plot even if not used\nfrom mpl_toolkits.mplot3d import Axes3D  # noqa\nfrom matplotlib.collections import PolyCollection"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Generate data\n-------------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# parameters\n\nn = 100  # nb bins\n\n# bin positions\nx = np.arange(n, dtype=np.float64)\n\n# Gaussian distributions\na1 = ot.datasets.make_1D_gauss(n, m=20, s=5)  # m= mean, s= std\na2 = ot.datasets.make_1D_gauss(n, m=60, s=8)\n\n# make unbalanced dists\na2 *= 3.\n\n# creating matrix A containing all distributions\nA = np.vstack((a1, a2)).T\nn_distributions = A.shape[1]\n\n# loss matrix + normalization\nM = ot.utils.dist0(n)\nM /= M.max()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Plot data\n---------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# plot the distributions\n\npl.figure(1, figsize=(6.4, 3))\nfor i in range(n_distributions):\n    pl.plot(x, A[:, i])\npl.title('Distributions')\npl.tight_layout()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Barycenter computation\n----------------------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# non weighted barycenter computation\n\nweight = 0.5  # 0<=weight<=1\nweights = np.array([1 - weight, weight])\n\n# l2bary\nbary_l2 = A.dot(weights)\n\n# wasserstein\nreg = 1e-3\nalpha = 1.\n\nbary_wass = ot.unbalanced.barycenter_unbalanced(A, M, reg, alpha, weights)\n\npl.figure(2)\npl.clf()\npl.subplot(2, 1, 1)\nfor i in range(n_distributions):\n    pl.plot(x, A[:, i])\npl.title('Distributions')\n\npl.subplot(2, 1, 2)\npl.plot(x, bary_l2, 'r', label='l2')\npl.plot(x, bary_wass, 'g', label='Wasserstein')\npl.legend()\npl.title('Barycenters')\npl.tight_layout()"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "Barycentric interpolation\n-------------------------\n\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "collapsed": false
+      },
+      "outputs": [],
+      "source": [
+        "# barycenter interpolation\n\nn_weight = 11\nweight_list = np.linspace(0, 1, n_weight)\n\n\nB_l2 = np.zeros((n, n_weight))\n\nB_wass = np.copy(B_l2)\n\nfor i in range(0, n_weight):\n    weight = weight_list[i]\n    weights = np.array([1 - weight, weight])\n    B_l2[:, i] = A.dot(weights)\n    B_wass[:, i] = ot.unbalanced.barycenter_unbalanced(A, M, reg, alpha, weights)\n\n\n# plot interpolation\n\npl.figure(3)\n\ncmap = pl.cm.get_cmap('viridis')\nverts = []\nzs = weight_list\nfor i, z in enumerate(zs):\n    ys = B_l2[:, i]\n    verts.append(list(zip(x, ys)))\n\nax = pl.gcf().gca(projection='3d')\n\npoly = PolyCollection(verts, facecolors=[cmap(a) for a in weight_list])\npoly.set_alpha(0.7)\nax.add_collection3d(poly, zs=zs, zdir='y')\nax.set_xlabel('x')\nax.set_xlim3d(0, n)\nax.set_ylabel(r'$\\alpha$')\nax.set_ylim3d(0, 1)\nax.set_zlabel('')\nax.set_zlim3d(0, B_l2.max() * 1.01)\npl.title('Barycenter interpolation with l2')\npl.tight_layout()\n\npl.figure(4)\ncmap = pl.cm.get_cmap('viridis')\nverts = []\nzs = weight_list\nfor i, z in enumerate(zs):\n    ys = B_wass[:, i]\n    verts.append(list(zip(x, ys)))\n\nax = pl.gcf().gca(projection='3d')\n\npoly = PolyCollection(verts, facecolors=[cmap(a) for a in weight_list])\npoly.set_alpha(0.7)\nax.add_collection3d(poly, zs=zs, zdir='y')\nax.set_xlabel('x')\nax.set_xlim3d(0, n)\nax.set_ylabel(r'$\\alpha$')\nax.set_ylim3d(0, 1)\nax.set_zlabel('')\nax.set_zlim3d(0, B_l2.max() * 1.01)\npl.title('Barycenter interpolation with Wasserstein')\npl.tight_layout()\n\npl.show()"
+      ]
+    }
+  ],
+  "metadata": {
+    "kernelspec": {
+      "display_name": "Python 3",
+      "language": "python",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.6.8"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
\ No newline at end of file
diff --git a/docs/source/auto_examples/plot_UOT_barycenter_1D.py b/docs/source/auto_examples/plot_UOT_barycenter_1D.py
new file mode 100644
index 0000000..c8d9d3b
--- /dev/null
+++ b/docs/source/auto_examples/plot_UOT_barycenter_1D.py
@@ -0,0 +1,164 @@
+# -*- coding: utf-8 -*-
+"""
+===========================================================
+1D Wasserstein barycenter demo for Unbalanced distributions
+===========================================================
+
+This example illustrates the computation of regularized Wassersyein Barycenter
+as proposed in [10] for Unbalanced inputs.
+
+
+[10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+
+"""
+
+# Author: Hicham Janati <hicham.janati@inria.fr>
+#
+# License: MIT License
+
+import numpy as np
+import matplotlib.pylab as pl
+import ot
+# necessary for 3d plot even if not used
+from mpl_toolkits.mplot3d import Axes3D  # noqa
+from matplotlib.collections import PolyCollection
+
+##############################################################################
+# Generate data
+# -------------
+
+# parameters
+
+n = 100  # nb bins
+
+# bin positions
+x = np.arange(n, dtype=np.float64)
+
+# Gaussian distributions
+a1 = ot.datasets.make_1D_gauss(n, m=20, s=5)  # m= mean, s= std
+a2 = ot.datasets.make_1D_gauss(n, m=60, s=8)
+
+# make unbalanced dists
+a2 *= 3.
+
+# creating matrix A containing all distributions
+A = np.vstack((a1, a2)).T
+n_distributions = A.shape[1]
+
+# loss matrix + normalization
+M = ot.utils.dist0(n)
+M /= M.max()
+
+##############################################################################
+# Plot data
+# ---------
+
+# plot the distributions
+
+pl.figure(1, figsize=(6.4, 3))
+for i in range(n_distributions):
+    pl.plot(x, A[:, i])
+pl.title('Distributions')
+pl.tight_layout()
+
+##############################################################################
+# Barycenter computation
+# ----------------------
+
+# non weighted barycenter computation
+
+weight = 0.5  # 0<=weight<=1
+weights = np.array([1 - weight, weight])
+
+# l2bary
+bary_l2 = A.dot(weights)
+
+# wasserstein
+reg = 1e-3
+alpha = 1.
+
+bary_wass = ot.unbalanced.barycenter_unbalanced(A, M, reg, alpha, weights)
+
+pl.figure(2)
+pl.clf()
+pl.subplot(2, 1, 1)
+for i in range(n_distributions):
+    pl.plot(x, A[:, i])
+pl.title('Distributions')
+
+pl.subplot(2, 1, 2)
+pl.plot(x, bary_l2, 'r', label='l2')
+pl.plot(x, bary_wass, 'g', label='Wasserstein')
+pl.legend()
+pl.title('Barycenters')
+pl.tight_layout()
+
+##############################################################################
+# Barycentric interpolation
+# -------------------------
+
+# barycenter interpolation
+
+n_weight = 11
+weight_list = np.linspace(0, 1, n_weight)
+
+
+B_l2 = np.zeros((n, n_weight))
+
+B_wass = np.copy(B_l2)
+
+for i in range(0, n_weight):
+    weight = weight_list[i]
+    weights = np.array([1 - weight, weight])
+    B_l2[:, i] = A.dot(weights)
+    B_wass[:, i] = ot.unbalanced.barycenter_unbalanced(A, M, reg, alpha, weights)
+
+
+# plot interpolation
+
+pl.figure(3)
+
+cmap = pl.cm.get_cmap('viridis')
+verts = []
+zs = weight_list
+for i, z in enumerate(zs):
+    ys = B_l2[:, i]
+    verts.append(list(zip(x, ys)))
+
+ax = pl.gcf().gca(projection='3d')
+
+poly = PolyCollection(verts, facecolors=[cmap(a) for a in weight_list])
+poly.set_alpha(0.7)
+ax.add_collection3d(poly, zs=zs, zdir='y')
+ax.set_xlabel('x')
+ax.set_xlim3d(0, n)
+ax.set_ylabel(r'$\alpha$')
+ax.set_ylim3d(0, 1)
+ax.set_zlabel('')
+ax.set_zlim3d(0, B_l2.max() * 1.01)
+pl.title('Barycenter interpolation with l2')
+pl.tight_layout()
+
+pl.figure(4)
+cmap = pl.cm.get_cmap('viridis')
+verts = []
+zs = weight_list
+for i, z in enumerate(zs):
+    ys = B_wass[:, i]
+    verts.append(list(zip(x, ys)))
+
+ax = pl.gcf().gca(projection='3d')
+
+poly = PolyCollection(verts, facecolors=[cmap(a) for a in weight_list])
+poly.set_alpha(0.7)
+ax.add_collection3d(poly, zs=zs, zdir='y')
+ax.set_xlabel('x')
+ax.set_xlim3d(0, n)
+ax.set_ylabel(r'$\alpha$')
+ax.set_ylim3d(0, 1)
+ax.set_zlabel('')
+ax.set_zlim3d(0, B_l2.max() * 1.01)
+pl.title('Barycenter interpolation with Wasserstein')
+pl.tight_layout()
+
+pl.show()
diff --git a/docs/source/auto_examples/plot_UOT_barycenter_1D.rst b/docs/source/auto_examples/plot_UOT_barycenter_1D.rst
new file mode 100644
index 0000000..ac17587
--- /dev/null
+++ b/docs/source/auto_examples/plot_UOT_barycenter_1D.rst
@@ -0,0 +1,261 @@
+
+
+.. _sphx_glr_auto_examples_plot_UOT_barycenter_1D.py:
+
+
+===========================================================
+1D Wasserstein barycenter demo for Unbalanced distributions
+===========================================================
+
+This example illustrates the computation of regularized Wassersyein Barycenter
+as proposed in [10] for Unbalanced inputs.
+
+
+[10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+
+
+
+
+.. code-block:: python
+
+
+    # Author: Hicham Janati <hicham.janati@inria.fr>
+    #
+    # License: MIT License
+
+    import numpy as np
+    import matplotlib.pylab as pl
+    import ot
+    # necessary for 3d plot even if not used
+    from mpl_toolkits.mplot3d import Axes3D  # noqa
+    from matplotlib.collections import PolyCollection
+
+
+
+
+
+
+
+Generate data
+-------------
+
+
+
+.. code-block:: python
+
+
+    # parameters
+
+    n = 100  # nb bins
+
+    # bin positions
+    x = np.arange(n, dtype=np.float64)
+
+    # Gaussian distributions
+    a1 = ot.datasets.make_1D_gauss(n, m=20, s=5)  # m= mean, s= std
+    a2 = ot.datasets.make_1D_gauss(n, m=60, s=8)
+
+    # make unbalanced dists
+    a2 *= 3.
+
+    # creating matrix A containing all distributions
+    A = np.vstack((a1, a2)).T
+    n_distributions = A.shape[1]
+
+    # loss matrix + normalization
+    M = ot.utils.dist0(n)
+    M /= M.max()
+
+
+
+
+
+
+
+Plot data
+---------
+
+
+
+.. code-block:: python
+
+
+    # plot the distributions
+
+    pl.figure(1, figsize=(6.4, 3))
+    for i in range(n_distributions):
+        pl.plot(x, A[:, i])
+    pl.title('Distributions')
+    pl.tight_layout()
+
+
+
+
+.. image:: /auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_001.png
+    :align: center
+
+
+
+
+Barycenter computation
+----------------------
+
+
+
+.. code-block:: python
+
+
+    # non weighted barycenter computation
+
+    weight = 0.5  # 0<=weight<=1
+    weights = np.array([1 - weight, weight])
+
+    # l2bary
+    bary_l2 = A.dot(weights)
+
+    # wasserstein
+    reg = 1e-3
+    alpha = 1.
+
+    bary_wass = ot.unbalanced.barycenter_unbalanced(A, M, reg, alpha, weights)
+
+    pl.figure(2)
+    pl.clf()
+    pl.subplot(2, 1, 1)
+    for i in range(n_distributions):
+        pl.plot(x, A[:, i])
+    pl.title('Distributions')
+
+    pl.subplot(2, 1, 2)
+    pl.plot(x, bary_l2, 'r', label='l2')
+    pl.plot(x, bary_wass, 'g', label='Wasserstein')
+    pl.legend()
+    pl.title('Barycenters')
+    pl.tight_layout()
+
+
+
+
+.. image:: /auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_003.png
+    :align: center
+
+
+
+
+Barycentric interpolation
+-------------------------
+
+
+
+.. code-block:: python
+
+
+    # barycenter interpolation
+
+    n_weight = 11
+    weight_list = np.linspace(0, 1, n_weight)
+
+
+    B_l2 = np.zeros((n, n_weight))
+
+    B_wass = np.copy(B_l2)
+
+    for i in range(0, n_weight):
+        weight = weight_list[i]
+        weights = np.array([1 - weight, weight])
+        B_l2[:, i] = A.dot(weights)
+        B_wass[:, i] = ot.unbalanced.barycenter_unbalanced(A, M, reg, alpha, weights)
+
+
+    # plot interpolation
+
+    pl.figure(3)
+
+    cmap = pl.cm.get_cmap('viridis')
+    verts = []
+    zs = weight_list
+    for i, z in enumerate(zs):
+        ys = B_l2[:, i]
+        verts.append(list(zip(x, ys)))
+
+    ax = pl.gcf().gca(projection='3d')
+
+    poly = PolyCollection(verts, facecolors=[cmap(a) for a in weight_list])
+    poly.set_alpha(0.7)
+    ax.add_collection3d(poly, zs=zs, zdir='y')
+    ax.set_xlabel('x')
+    ax.set_xlim3d(0, n)
+    ax.set_ylabel(r'$\alpha$')
+    ax.set_ylim3d(0, 1)
+    ax.set_zlabel('')
+    ax.set_zlim3d(0, B_l2.max() * 1.01)
+    pl.title('Barycenter interpolation with l2')
+    pl.tight_layout()
+
+    pl.figure(4)
+    cmap = pl.cm.get_cmap('viridis')
+    verts = []
+    zs = weight_list
+    for i, z in enumerate(zs):
+        ys = B_wass[:, i]
+        verts.append(list(zip(x, ys)))
+
+    ax = pl.gcf().gca(projection='3d')
+
+    poly = PolyCollection(verts, facecolors=[cmap(a) for a in weight_list])
+    poly.set_alpha(0.7)
+    ax.add_collection3d(poly, zs=zs, zdir='y')
+    ax.set_xlabel('x')
+    ax.set_xlim3d(0, n)
+    ax.set_ylabel(r'$\alpha$')
+    ax.set_ylim3d(0, 1)
+    ax.set_zlabel('')
+    ax.set_zlim3d(0, B_l2.max() * 1.01)
+    pl.title('Barycenter interpolation with Wasserstein')
+    pl.tight_layout()
+
+    pl.show()
+
+
+
+.. rst-class:: sphx-glr-horizontal
+
+
+    *
+
+      .. image:: /auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_005.png
+            :scale: 47
+
+    *
+
+      .. image:: /auto_examples/images/sphx_glr_plot_UOT_barycenter_1D_006.png
+            :scale: 47
+
+
+
+
+**Total running time of the script:** ( 0 minutes  0.344 seconds)
+
+
+
+.. only :: html
+
+ .. container:: sphx-glr-footer
+
+
+  .. container:: sphx-glr-download
+
+     :download:`Download Python source code: plot_UOT_barycenter_1D.py <plot_UOT_barycenter_1D.py>`
+
+
+
+  .. container:: sphx-glr-download
+
+     :download:`Download Jupyter notebook: plot_UOT_barycenter_1D.ipynb <plot_UOT_barycenter_1D.ipynb>`
+
+
+.. only:: html
+
+ .. rst-class:: sphx-glr-signature
+
+    `Gallery generated by Sphinx-Gallery <https://sphinx-gallery.readthedocs.io>`_
-- 
cgit v1.2.3


From d20d471a1806bde43c23e67c1f805aa3c8908ec3 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Thu, 27 Jun 2019 14:34:23 +0200
Subject: update part 1

---
 docs/source/quickstart.rst | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

(limited to 'docs')

diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index d8d4838..a14358c 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -83,6 +83,29 @@ properties. It can computed from an already estimated OT matrix with
 Regularized Optimal Transport
 -----------------------------
 
+Recent developments have shown the interest of regularized OT both in terms of
+computational and statistical properties.
+
+We address in this section the regularized OT problem that can be expressed as
+
+.. math::
+    \gamma^* = arg\min_\gamma <\gamma,M>_F + reg*\Omega(\gamma)
+
+    s.t. \gamma 1 = a
+
+            \gamma^T 1= b
+
+            \gamma\geq 0
+where :
+
+- :math:`M\in\mathbb{R}_+^{m\times n}` is the metric cost matrix defining the cost to move mass from bin :math:`a_i` to bin :math:`b_j`.
+- :math:`a` and :math:`b` are histograms (positive, sum to 1) that represent the weights of each samples in the source an target distributions.
+- :math:`\Omega` is the regularization term.
+
+We disvuss in the following specific algorithms 
+
+
+
 Entropic regularized OT
 ^^^^^^^^^^^^^^^^^^^^^^^
 
-- 
cgit v1.2.3


From 2d7db0ed112b9349dc0b0c4cc7a9f3ea8da4ebed Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Thu, 27 Jun 2019 15:01:13 +0200
Subject: update readme

---
 docs/source/readme.rst | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'docs')

diff --git a/docs/source/readme.rst b/docs/source/readme.rst
index b7828d3..320ddd5 100644
--- a/docs/source/readme.rst
+++ b/docs/source/readme.rst
@@ -35,6 +35,7 @@ It provides the following solvers:
 -  Stochastic Optimization for Large-scale Optimal Transport (semi-dual
    problem [18] and dual problem [19])
 -  Non regularized free support Wasserstein barycenters [20].
+-  Unbalanced OT with KL relaxation distance and barycenter [10, 25].
 
 Some demonstrations (both in Python and Jupyter Notebook format) are
 available in the examples folder.
@@ -69,6 +70,13 @@ modules:
 Pip installation
 ^^^^^^^^^^^^^^^^
 
+Note that due to a limitation of pip, ``cython`` and ``numpy`` need to
+be installed prior to installing POT. This can be done easily with
+
+::
+
+    pip install numpy cython
+
 You can install the toolbox through PyPI with:
 
 ::
@@ -229,6 +237,8 @@ The contributors to this library are
 -  `Alain
    Rakotomamonjy <https://sites.google.com/site/alainrakotomamonjy/home>`__
 -  `Vayer Titouan <https://tvayer.github.io/>`__
+-  `Hicham Janati <https://hichamjanati.github.io/>`__ (Unbalanced OT)
+-  `Romain Tavenard <https://rtavenar.github.io/>`__ (1d Wasserstein)
 
 This toolbox benefit a lot from open source research and we would like
 to thank the following persons for providing some code (in various
@@ -379,6 +389,10 @@ and Statistics, (AISTATS) 21, 2018
 graphs <http://proceedings.mlr.press/v97/titouan19a.html>`__ Proceedings
 of the 36th International Conference on Machine Learning (ICML).
 
+[25] Frogner C., Zhang C., Mobahi H., Araya-Polo M., Poggio T. (2019).
+`Learning with a Wasserstein Loss <http://cbcl.mit.edu/wasserstein/>`__
+Advances in Neural Information Processing Systems (NIPS).
+
 .. |PyPI version| image:: https://badge.fury.io/py/POT.svg
    :target: https://badge.fury.io/py/POT
 .. |Anaconda Cloud| image:: https://anaconda.org/conda-forge/pot/badges/version.svg
-- 
cgit v1.2.3


From 982ee8345d491d76ac9ba49c6b9a7f5418ed966d Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Thu, 27 Jun 2019 16:40:38 +0200
Subject: start section entropic

---
 docs/source/quickstart.rst | 90 +++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 77 insertions(+), 13 deletions(-)

(limited to 'docs')

diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index a14358c..c122d17 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -21,7 +21,7 @@ Solving optimal transport
 The optimal transport problem between discrete distributions is often expressed
 as
     .. math::
-        \gamma^* = arg\min_\gamma \sum_{i,j}\gamma_{i,j}M_{i,j}
+        \gamma^* = arg\min_\gamma \quad \sum_{i,j}\gamma_{i,j}M_{i,j}
 
         s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
 
@@ -56,15 +56,12 @@ Computing Wasserstein distance
 The value of the OT solution is often more of interest that the OT matrix :
 
     .. math::
-        W(a,b)=\min_\gamma \sum_{i,j}\gamma_{i,j}M_{i,j}
+        OT(a,b)=\min_\gamma \quad \sum_{i,j}\gamma_{i,j}M_{i,j}
 
         s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
 
 
-where :math:`W(a,b)` is the  `Wasserstein distance
-<https://en.wikipedia.org/wiki/Wasserstein_metric>`_ between distributions a and b
-It is a metrix that has nice statistical
-properties. It can computed from an already estimated OT matrix with
+It can computed from an already estimated OT matrix with
 :code:`np.sum(T*M)` or directly with the function :any:`ot.emd2`.
 
 .. code:: python
@@ -73,6 +70,25 @@ properties. It can computed from an already estimated OT matrix with
     # M is the ground cost matrix
     W=ot.emd2(a,b,M) # Wasserstein distance / EMD value
 
+Note that the well known  `Wasserstein distance
+<https://en.wikipedia.org/wiki/Wasserstein_metric>`_ between distributions a and
+b is defined as
+
+
+    .. math::
+
+        W_p(a,b)=(\min_\gamma \sum_{i,j}\gamma_{i,j}\|x_i-y_j\|_p)^\frac{1}{p}
+
+        s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
+
+This means that if you want to compute the :math:`W_2` you need to compute the
+square root of :any:`ot.emd2` when providing
+:code:`M=ot.dist(xs,xt)` that use the squared euclidean distance by default. Computing
+the :math:`W_1` wasserstein distance can be done directly with  :any:`ot.emd2`
+when providing :code:`M=ot.dist(xs,xt, metric='euclidean')` to use the euclidean
+distance.
+
+ 
 
 .. hint::
     Examples of use for :any:`ot.emd2` are available in the following examples:
@@ -80,6 +96,32 @@ properties. It can computed from an already estimated OT matrix with
     - :any:`auto_examples/plot_compute_emd`
  
 
+Special cases
+^^^^^^^^^^^^^
+
+Note that the OT problem and the corresponding Wasserstein distance can in some
+special cases be computed very efficiently. 
+
+For instance when the samples are in 1D, then the OT problem can be solved in
+:math:`O(n\log(n))` by using a simple sorting. In this case we provide the
+function :any:`ot.emd_1d` and   :any:`ot.emd2_1d` to return respectively the OT
+matrix and value. Note that since the solution is very sparse the :code:`sparse`
+parameter of :any:`ot.emd_1d` allows for solving and returning the solution for
+very large problems. Note that in order to computed directly the :math:`W_p`
+Wasserstein distance in 1D we provide the function :any:`ot.wasserstein_1d` that
+takes :code:`p` as a parameter. 
+
+Another specials for estimating OT and Monge mapping is between Gaussian
+distributions. In this case there exists a close form solution given in Remark
+2.29 in [15]_ and the Monge mapping is an affine function and can be
+also computed from the covariances and means of the source and target
+distributions. In this case when the finite sample dataset is supposed gaussian, we provide 
+:any:`ot.da.OT_mapping_linear` that returns the parameters for the Monge
+mapping.
+
+
+
+
 Regularized Optimal Transport
 -----------------------------
 
@@ -89,31 +131,53 @@ computational and statistical properties.
 We address in this section the regularized OT problem that can be expressed as
 
 .. math::
-    \gamma^* = arg\min_\gamma <\gamma,M>_F + reg*\Omega(\gamma)
+    \gamma^* = arg\min_\gamma \quad \sum_{i,j}\gamma_{i,j}M_{i,j} + \lambda\Omega(\gamma)
 
-    s.t. \gamma 1 = a
+        s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
 
-            \gamma^T 1= b
 
-            \gamma\geq 0
 where :
 
 - :math:`M\in\mathbb{R}_+^{m\times n}` is the metric cost matrix defining the cost to move mass from bin :math:`a_i` to bin :math:`b_j`.
 - :math:`a` and :math:`b` are histograms (positive, sum to 1) that represent the weights of each samples in the source an target distributions.
 - :math:`\Omega` is the regularization term.
 
-We disvuss in the following specific algorithms 
-
+We discuss in the following specific algorithms that can be used depending on
+the regularization term.
 
 
 Entropic regularized OT
 ^^^^^^^^^^^^^^^^^^^^^^^
 
+This is the most common regularization used for optimal transport. It has been
+proposed in the ML community by Marco Cuturi in his seminal paper [2]_. This
+regularization has the following expression
+
+.. math::
+    \Omega(\gamma)=\sum_{i,j}\gamma_{i,j}\log(\gamma_{i,j})
+
+
+The use of the regularization term above in the optimization problem has a very
+strong impact. First it makes the problem smooth which leads to new optimization
+procedures such as L-BFGS (see :any:`ot.smooth` ). Next it makes the problem
+strictly convex meaning that there will be a unique solution. Finally the
+solution of the resulting optimization problem can be expressed as:
+
+.. math::
+
+    \gamma_\lambda^*=\text{diag}(u)K\text{diag}(v)
+
+where :math:`u` and :math:`v` are vectors and :math:`K=\exp(-M/\lambda)` where
+the :math:`\exp` is taken component-wise.    
+
+
+
+
 
 Other regularization
 ^^^^^^^^^^^^^^^^^^^^
 
-Stochastic gradient decsent
+Stochastic gradient descent
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Wasserstein Barycenters
-- 
cgit v1.2.3


From 7dcfebbef19e1f94928fc71face612a2f71372b4 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Fri, 28 Jun 2019 08:33:36 +0200
Subject: entropic mostly done, starting general regularization

---
 docs/source/quickstart.rst | 144 +++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 133 insertions(+), 11 deletions(-)

(limited to 'docs')

diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index c122d17..4f2d9bb 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -5,6 +5,9 @@ Quick start guide
 In the following we provide some pointers about which functions and classes 
 to use for different problems related to optimal transport (OT).
 
+This document is not a tutorial on numerical optimal transport. For this we strongly
+recommend to read the very nice book [15]_ . 
+
 
 Optimal transport and Wasserstein distance
 ------------------------------------------
@@ -20,10 +23,11 @@ Solving optimal transport
 
 The optimal transport problem between discrete distributions is often expressed
 as
-    .. math::
-        \gamma^* = arg\min_\gamma \quad \sum_{i,j}\gamma_{i,j}M_{i,j}
 
-        s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
+.. math::
+    \gamma^* = arg\min_\gamma \quad \sum_{i,j}\gamma_{i,j}M_{i,j}
+
+    s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
 
 where :
 
@@ -120,8 +124,6 @@ distributions. In this case when the finite sample dataset is supposed gaussian,
 mapping.
 
 
-
-
 Regularized Optimal Transport
 -----------------------------
 
@@ -146,6 +148,7 @@ We discuss in the following specific algorithms that can be used depending on
 the regularization term.
 
 
+
 Entropic regularized OT
 ^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -168,23 +171,107 @@ solution of the resulting optimization problem can be expressed as:
     \gamma_\lambda^*=\text{diag}(u)K\text{diag}(v)
 
 where :math:`u` and :math:`v` are vectors and :math:`K=\exp(-M/\lambda)` where
-the :math:`\exp` is taken component-wise.    
+the :math:`\exp` is taken component-wise. In order to solve the optimization
+problem, on can use an alternative projection algorithm that can be very
+efficient for large values if regularization. 
+
+The main function is POT are  :any:`ot.sinkhorn` and
+:any:`ot.sinkhorn2` that return respectively the OT matrix and the value of the
+linear term. Note that the regularization parameter :math:`\lambda` in the
+equation above is given to those function with the parameter :code:`reg`.
 
+    >>> import ot
+    >>> a=[.5,.5]
+    >>> b=[.5,.5]
+    >>> M=[[0.,1.],[1.,0.]]
+    >>> ot.sinkhorn(a,b,M,1)
+    array([[ 0.36552929,  0.13447071],
+        [ 0.13447071,  0.36552929]])
 
 
+More details about the algorithm used is given in the following note.
+
+
+.. note::
+    The main function to solve entropic regularized OT is :any:`ot.sinkhorn`.
+    This function is a wrapper and the parameter :code:`method` help you select
+    the actual algorithm used to solve the problem:
+
+    + :code:`method='sinkhorn'` calls :any:`ot.bregman.sinkhorn_knopp`  the
+      classic algorithm [2]_.
+    + :code:`method='sinkhorn_stabilized'` calls :any:`ot.bregman.sinkhorn_stabilized`  the
+      log stabilized version of the algorithm [9]_.    
+    + :code:`method='sinkhorn_epsilon_scaling'` calls
+      :any:`ot.bregman.sinkhorn_epsilon_scaling`  the epsilon scaling version
+      of the algorithm [9]_.   
+    + :code:`method='greenkhorn'` calls :any:`ot.bregman.greenkhorn`  the
+      greedy sinkhorn verison of the algorithm [22]_.   
+
+    In addition to all those variants of sinkhorn, we have another
+    implementation solving the problem in the smooth dual or semi-dual in
+    :any:`ot.smooth`. This solver use the :any:`scipy.optimize.minimize`
+    function to solve the smooth problem with :code:`L-BFGS` algorithm. Tu use
+    this solver, use functions :any:`ot.smooth.smooth_ot_dual` or
+    :any:`ot.smooth.smooth_ot_semi_dual` with parameter :code:`reg_type='kl'` to
+    choose entropic/Kullbach Leibler regularization.
+
+.. hint::
+    Examples of use for :any:`ot.sinkhorn` are available in the following examples:
+
+    - :any:`auto_examples/plot_OT_2D_samples`
+    - :any:`auto_examples/plot_OT_1D` 
+    - :any:`auto_examples/plot_OT_1D_smooth`
+    - :any:`auto_examples/plot_stochastic`
+
+Finally note that we also provide in :any:`ot.stochastic` several implementation
+of stochastic solvers for entropic regularized OT [18]_ [19]_.  
 
 Other regularization
 ^^^^^^^^^^^^^^^^^^^^
 
-Stochastic gradient descent
-^^^^^^^^^^^^^^^^^^^^^^^^^^^
+While entropic OT is the most common and favored in practice, there exist other
+kind of regularization. We provide in POT two specific solvers for other
+regularization terms: namely quadratic regularization and group lasso
+regularization. But we also provide in :any:`ot.optim`  two generic solvers that allows solving any
+smooth regularization in practice. 
+
+The first general regularization term we can solve is the quadratic
+regularization of the form 
+
+.. math::
+    \Omega(\gamma)=\sum_{i,j} \gamma_{i,j}^2
+
+this regularization term has a similar effect to entropic regularization in
+densifying the OT matrix but it keeps some sort of sparsity that is lost with
+entropic regularization as soon as :math:`\lambda>0` [17]_. This problem cen be
+solved with POT using solvers from :any:`ot.smooth`, more specifically
+functions :any:`ot.smooth.smooth_ot_dual` or
+:any:`ot.smooth.smooth_ot_semi_dual` with parameter :code:`reg_type='l2'` to 
+choose the quadratic regularization.
+
+Another regularization that has been used in recent years is the group lasso
+regularization
+
+.. math::
+    \Omega(\gamma)=\sum_{j,G\in\mathcal{G}} \|\gamma_{G,j}\|_p^q
+
+where :math:`\mathcal{G}` contains non overlapping groups of lines in the OT
+matrix. This regularization proposed in [5]_ will promote sparsity at the group level and for
+instance will force target samples to get mass from a small number of groups.
+Note that the exact OT solution is already sparse so this regularization does
+not make sens if it is not combined with others such as entropic. 
+
+
+
+
+
 
 Wasserstein Barycenters
 -----------------------
 
 Monge mapping and Domain adaptation with Optimal transport
-----------------------------------------
+----------------------------------------------------------
 
 
 Other applications
@@ -207,7 +294,6 @@ FAQ
     the OT transport matrix. If you want to solve a regularized OT you can 
     use :py:mod:`ot.sinkhorn`.
 
-    
 
     Here is a simple use case:
 
@@ -222,7 +308,43 @@ FAQ
     :doc:`auto_examples/plot_OT_2D_samples`
     
 
-2. **Compute a Wasserstein distance**
+2. **pip install POT fails with error : ImportError: No module named Cython.Build**
+
+    As discussed shortly in the README file. POT requires to have :code:`numpy`
+    and :code:`cython` installed to build. This corner case is not yet handled
+    by :code:`pip` and for now you need to install both library prior to
+    installing POT.
+
+    Note that this problem do not occur when using conda-forge since the packages
+    there are pre-compiled. 
+
+    See `Issue #59 <https://github.com/rflamary/POT/issues/59>`__ for more
+    details.
+
+3. **Why is Sinkhorn slower than EMD ?**
+
+    This might come from the choice of the regularization term. The speed of
+    convergence of sinkhorn depends directly on this term [22]_ and when the
+    regularization gets very small the problem try and approximate the exact OT
+    which leads to slow convergence in addition to numerical problems. In other
+    words, for large regularization sinkhorn will be very fast to converge, for
+    small regularization (when you need an OT matrix close to the true OT), it
+    might be quicker to use the EMD solver.
+
+    Also note that the numpy implementation of the sinkhorn can use parallel
+    computation depending on the configuration of your system but very important
+    speedup can be obtained by using a GPU implementation since all operations
+    are matrix/vector products.
+
+4. **Using GPU fails with error:  module 'ot' has no attribute 'gpu'**
+
+    In order to limit import time and hard dependencies in POT. we do not import
+    some sub-modules automatically with :code:`import ot`. In order to use the
+    acceleration in :any:`ot.gpu` you need first to import is with
+    :code:`import ot.gpu`.  
+
+    See `Issue #85 <https://github.com/rflamary/POT/issues/85>`__ and :any:`ot.gpu`
+    for more details.
 
 
 References
-- 
cgit v1.2.3


From 56deee6e1a69a087022bf81279419305452f5177 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Fri, 28 Jun 2019 09:39:23 +0200
Subject: update reg OT

---
 docs/source/quickstart.rst | 38 +++++++++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 7 deletions(-)

(limited to 'docs')

diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index 4f2d9bb..62688bc 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -210,7 +210,7 @@ More details about the algorithm used is given in the following note.
 
     In addition to all those variants of sinkhorn, we have another
     implementation solving the problem in the smooth dual or semi-dual in
-    :any:`ot.smooth`. This solver use the :any:`scipy.optimize.minimize`
+    :any:`ot.smooth`. This solver uses the :any:`scipy.optimize.minimize`
     function to solve the smooth problem with :code:`L-BFGS` algorithm. Tu use
     this solver, use functions :any:`ot.smooth.smooth_ot_dual` or
     :any:`ot.smooth.smooth_ot_semi_dual` with parameter :code:`reg_type='kl'` to
@@ -224,6 +224,13 @@ More details about the algorithm used is given in the following note.
     - :any:`auto_examples/plot_OT_1D_smooth`
     - :any:`auto_examples/plot_stochastic`
 
+
+Recently [23]_ introduced the sinkhorn divergence that build from entropic
+regularization to compute fast and differentiable geometric diveregnce between
+empirical distributions.  
+
+
+
 Finally note that we also provide in :any:`ot.stochastic` several implementation
 of stochastic solvers for entropic regularized OT [18]_ [19]_.  
 
@@ -254,33 +261,50 @@ Another regularization that has been used in recent years is the group lasso
 regularization
 
 .. math::
-    \Omega(\gamma)=\sum_{j,G\in\mathcal{G}} \|\gamma_{G,j}\|_p^q
+    \Omega(\gamma)=\sum_{j,G\in\mathcal{G}} \|\gamma_{G,j}\|_q^p
 
 where :math:`\mathcal{G}` contains non overlapping groups of lines in the OT
 matrix. This regularization proposed in [5]_ will promote sparsity at the group level and for
 instance will force target samples to get mass from a small number of groups.
 Note that the exact OT solution is already sparse so this regularization does
-not make sens if it is not combined with others such as entropic. 
+not make sens if it is not combined with others such as entropic. Depending on
+the choice of :code:`p` and :code:`q`, the problem can be solved with different
+approaches.  When :code:`q=1` and :code:`p<1` the problem is non convex but can
+be solved using an efficient majoration minimization approach  with
+:any:`ot.sinkhorn_lpl1_mm`. When :code:`q=2` and :code:`p=1` we recover the
+convex gourp lasso and we provide a solver using generalized conditional
+gradient algorithm [7]_ in function
+:any:`ot.da.sinkhorn_l1l2_gl`.
 
 
+Wasserstein Barycenters
+-----------------------
 
 
-Wasserstein Barycenters
------------------------
 
-Monge mapping and Domain adaptation with Optimal transport
-----------------------------------------------------------
+Monge mapping and Domain adaptation
+-----------------------------------
 
 
 Other applications
 ------------------
 
+Wasserstein Discriminant Analysis
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+Gromov-Wasserstein
+^^^^^^^^^^^^^^^^^^
+
 
 GPU acceleration
 ----------------
 
+We provide several implementation of our OT solvers in :any:`ot.gpu`. Those
+implementation use the :code:`cupy` toolbox.   
+
 
 
 FAQ
-- 
cgit v1.2.3


From bed755904e0fd1d66004877c96127a56aa7e0983 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Tue, 2 Jul 2019 09:42:52 +0200
Subject: regularized OT done

---
 docs/source/quickstart.rst | 58 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

(limited to 'docs')

diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index 62688bc..a005c64 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -243,6 +243,9 @@ regularization terms: namely quadratic regularization and group lasso
 regularization. But we also provide in :any:`ot.optim`  two generic solvers that allows solving any
 smooth regularization in practice. 
 
+Quadratic regularization
+""""""""""""""""""""""""
+
 The first general regularization term we can solve is the quadratic
 regularization of the form 
 
@@ -257,6 +260,17 @@ functions :any:`ot.smooth.smooth_ot_dual` or
 :any:`ot.smooth.smooth_ot_semi_dual` with parameter :code:`reg_type='l2'` to 
 choose the quadratic regularization.
 
+.. hint::
+    Examples of quadratic regularization are available in the following examples:
+
+    - :any:`auto_examples/plot_OT_1D_smooth`
+    - :any:`auto_examples/plot_optim_OTreg`
+
+
+
+Group Lasso regularization
+""""""""""""""""""""""""""
+
 Another regularization that has been used in recent years is the group lasso
 regularization
 
@@ -276,6 +290,50 @@ convex gourp lasso and we provide a solver using generalized conditional
 gradient algorithm [7]_ in function
 :any:`ot.da.sinkhorn_l1l2_gl`.
 
+.. hint::
+    Examples of group Lasso regularization are available in the following examples:
+
+    - :any:`auto_examples/plot_otda_classes` 
+    - :any:`auto_examples/plot_otda_d2`
+
+
+Generic solvers
+"""""""""""""""
+
+Finally we propose in POT generic solvers that can be used to solve any
+regularization as long as you can provide a function computing the
+regularization and a function computing its gradient.
+
+In order to solve 
+
+.. math::
+    \gamma^* = arg\min_\gamma \quad \sum_{i,j}\gamma_{i,j}M_{i,j} + \lambda\Omega(\gamma)
+
+        s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
+
+you can use function :any:`ot.optim.cg` that will use a conditional gradient as
+proposed in [6]_ . you need to provide the regularization function as parameter
+``f`` and its gradient as parameter  ``df``. Note that the conditional gradient relies on
+iterative solving of a linearization of the problem using the exact
+:any:`ot.emd` so it can be  slow in practice. Still it always returns a
+transport matrix that does not violates the marginals.
+
+Another solver is proposed to solve the problem
+
+.. math::
+    \gamma^* = arg\min_\gamma \quad \sum_{i,j}\gamma_{i,j}M_{i,j}+ \lambda_e\Omega_e(\gamma) + \lambda\Omega(\gamma)
+
+        s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
+
+where :math:`\Omega_e` is the entropic regularization. In this case we use a
+generalized conditional gradient [7]_ implemented in :any:`ot.opim.gcg`  that does not linearize the entropic term and
+relies on :any:`ot.sinkhorn` for its iterations. 
+
+.. hint::
+    Example of generic solvers are available in the following example:
+
+    - :any:`auto_examples/plot_optim_OTreg` 
+
 
 
 Wasserstein Barycenters
-- 
cgit v1.2.3


From 82d10f780fc296b9f5548e1fe1da9b20349b1e10 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Tue, 2 Jul 2019 10:55:05 +0200
Subject: wasserstein barycenetr with fixed support

---
 docs/source/quickstart.rst | 86 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 79 insertions(+), 7 deletions(-)

(limited to 'docs')

diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index a005c64..94bc8cd 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -216,13 +216,7 @@ More details about the algorithm used is given in the following note.
     :any:`ot.smooth.smooth_ot_semi_dual` with parameter :code:`reg_type='kl'` to
     choose entropic/Kullbach Leibler regularization.
 
-.. hint::
-    Examples of use for :any:`ot.sinkhorn` are available in the following examples:
 
-    - :any:`auto_examples/plot_OT_2D_samples`
-    - :any:`auto_examples/plot_OT_1D` 
-    - :any:`auto_examples/plot_OT_1D_smooth`
-    - :any:`auto_examples/plot_stochastic`
 
 
 Recently [23]_ introduced the sinkhorn divergence that build from entropic
@@ -234,6 +228,15 @@ empirical distributions.
 Finally note that we also provide in :any:`ot.stochastic` several implementation
 of stochastic solvers for entropic regularized OT [18]_ [19]_.  
 
+.. hint::
+    Examples of use for :any:`ot.sinkhorn` are available in the following examples:
+
+    - :any:`auto_examples/plot_OT_2D_samples`
+    - :any:`auto_examples/plot_OT_1D` 
+    - :any:`auto_examples/plot_OT_1D_smooth`
+    - :any:`auto_examples/plot_stochastic`
+
+
 Other regularization
 ^^^^^^^^^^^^^^^^^^^^
 
@@ -335,10 +338,79 @@ relies on :any:`ot.sinkhorn` for its iterations.
     - :any:`auto_examples/plot_optim_OTreg` 
 
 
-
 Wasserstein Barycenters
 -----------------------
 
+A Wasserstein barycenter is a distribution that minimize its Wasserstein
+distance with respect to other distributions [16]_. It corresponds to minimizing the
+following problem by seaching a distribution :math:`\mu` 
+
+.. math::
+    \min_\mu \quad \sum_{k} w_kW(\mu,\mu_k)
+
+
+In practice we model a distribution with a finite number of support position:
+
+.. math::
+    \mu=\sum_{i=1}^n a_i\delta_{x_i}
+
+where :math:`a` is an histogram on the simplex and the :math:`\{x_i\}` are the
+position of the support. We can clearly see here that optimizing :math:`\mu` can
+be done by searching for optimal weights :math:`a` or optimal support
+:math:`\{x_i\}` (optimizing both is also an option).
+We provide in POT solvers to estimate a discrete
+Wasserstein barycenter in both cases.
+
+Barycenters with fixed support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+When optimizing a barycenter with a fixed support, the optimization problem can
+be expressed as
+
+
+.. math::
+    \min_a \quad \sum_{k} w_k W(a,b_k)
+
+where :math:`b_k` are also weights in the simplex. In the non-regularized case,
+the problem above is a classical linear program. In this case we propose a
+solver :any:`ot.lp.barycenter` that rely on generic LP solvers. By default the
+function uses :any:`scipy.optimize.linprog`, but more efficient LP solvers from
+cvxopt can be also used by changing parameter :code:`solver`. Note that these
+solver require to solve a very large linear program and can be very slow in
+practice. 
+
+Similarly to the OT problem, OT barycenters can be computed in the regularized
+case. When using entropic regularization the problem can be solved with a
+generalization of the sinkhorn algorithm based on bregman projections [3]_. This
+algorithm is provided in function :any:`ot.bregman.barycenter` also available as
+:any:`ot.barycenter`. In this case, the algorithm scales better to large
+distributions and rely only on matrix multiplications that can be performed in
+parallel.
+
+In addition to teh speedup brought by regularization, one can also greatly
+accelerate the estimation of Wasserstein barycenter when the support has a
+separable structure [21]_. In teh case of 2D images for instance one can replace
+the matrix vector production in teh bregman projections by convolution
+operators. We provide an implementation of this algorithm in function
+:any:`ot.bregman.convolutional_barycenter2d`.
+
+.. hint::
+    Example of Wasserstein (:any:`ot.lp.barycenter`) and regularized wassrestein
+    barycenter (:any:`ot.bregman.barycenter`) computation are available in the following examples:
+
+    - :any:`auto_examples/plot_barycenter_1D` 
+    - :any:`auto_examples/plot_barycenter_lp_vs_entropic` 
+
+    Example of convolutional barycenter (:any:`ot.bregman.convolutional_barycenter2d`) computation for 2D images is available
+    in the following example:
+
+    - :any:`auto_examples/plot_convolutional_barycenter`
+
+
+
+Barycenters with free support
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
 
 
-- 
cgit v1.2.3


From b250212448ed3c1d023a6412abf4a3395d5585fb Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Tue, 2 Jul 2019 11:01:53 +0200
Subject: wasserstein barycenetr with fixed support

---
 docs/source/quickstart.rst | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

(limited to 'docs')

diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index 94bc8cd..7cbc962 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -220,8 +220,14 @@ More details about the algorithm used is given in the following note.
 
 
 Recently [23]_ introduced the sinkhorn divergence that build from entropic
-regularization to compute fast and differentiable geometric diveregnce between
-empirical distributions.  
+regularization to compute fast and differentiable geometric divergence between
+empirical distributions.  Note that we provide a function that compute directly
+(with no need to pre compute the :code:`M` matrix)
+the sinkhorn divergence for empirical distributions in
+:any:`ot.bregman.empirical_sinkhorn_divergence`. Similarly one can compute the
+OT matrix and loss for empirical distributions with respectively
+:any:`ot.bregman.empirical_sinkhorn` and :any:`ot.bregman.empirical_sinkhorn2`.
+
 
 
@@ -389,19 +395,21 @@ parallel.
 
 In addition to teh speedup brought by regularization, one can also greatly
 accelerate the estimation of Wasserstein barycenter when the support has a
-separable structure [21]_. In teh case of 2D images for instance one can replace
+separable structure [21]_. In the case of 2D images for instance one can replace
 the matrix vector production in teh bregman projections by convolution
 operators. We provide an implementation of this algorithm in function
 :any:`ot.bregman.convolutional_barycenter2d`.
 
 .. hint::
-    Example of Wasserstein (:any:`ot.lp.barycenter`) and regularized wassrestein
+    Example of Wasserstein (:any:`ot.lp.barycenter`) and regularized Wasserstein
     barycenter (:any:`ot.bregman.barycenter`) computation are available in the following examples:
 
     - :any:`auto_examples/plot_barycenter_1D` 
     - :any:`auto_examples/plot_barycenter_lp_vs_entropic` 
 
-    Example of convolutional barycenter (:any:`ot.bregman.convolutional_barycenter2d`) computation for 2D images is available
+    Example of convolutional barycenter
+    (:any:`ot.bregman.convolutional_barycenter2d`) computation 
+    for 2D images is available
     in the following example:
 
     - :any:`auto_examples/plot_convolutional_barycenter`
-- 
cgit v1.2.3


From 64693f98c22775048222f61f5e495849844e0135 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Tue, 2 Jul 2019 11:09:50 +0200
Subject: quickstart wasserstein barycenter done

---
 docs/source/quickstart.rst | 26 ++++++++++++++++++++++----
 1 file changed, 22 insertions(+), 4 deletions(-)

(limited to 'docs')

diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index 7cbc962..8cce1c9 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -217,8 +217,6 @@ More details about the algorithm used is given in the following note.
     choose entropic/Kullbach Leibler regularization.
 
 
-
-
 Recently [23]_ introduced the sinkhorn divergence that build from entropic
 regularization to compute fast and differentiable geometric divergence between
 empirical distributions.  Note that we provide a function that compute directly
@@ -417,7 +415,27 @@ operators. We provide an implementation of this algorithm in function
 
 
 Barycenters with free support
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Estimating the Wassresein barycenter with free support but fixed weights
+corresponds to  solving the following optimization problem:
+
+.. math::
+    \min_\{x_i\} \quad \sum_{k} w_kW(\mu,\mu_k)
+
+    s.t. \quad \mu=\sum_{i=1}^n a_i\delta_{x_i}
+
+WE provide an alternating solver based on [20]_ in
+:any:`ot.lp.free_support_barycenter`. This function minimize the problem and
+return an optimal support :math:`\{x_i\}` for uniform or given weights
+:math:`a`.
+
+ .. hint::
+
+    Example of the fee support barycenter estimation is available
+    in the following example:
+
+    - :any:`auto_examples/plot_free_support_barycenter`
 
 
@@ -438,7 +456,7 @@ Gromov-Wasserstein
 
 
 GPU acceleration
-----------------
+^^^^^^^^^^^^^^^^
 
 We provide several implementation of our OT solvers in :any:`ot.gpu`. Those
 implementation use the :code:`cupy` toolbox.   
-- 
cgit v1.2.3


From 6fdce8f75000ec6e609371ae39484f7edbb19b2c Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Tue, 2 Jul 2019 13:38:20 +0200
Subject: quickstart wda + start unbalanced

---
 docs/source/quickstart.rst | 148 +++++++++++++++++++++++++++++++++++++++++++--
 docs/source/readme.rst     |   2 -
 2 files changed, 144 insertions(+), 6 deletions(-)

(limited to 'docs')

diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index 8cce1c9..8f4a24e 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -278,7 +278,7 @@ choose the quadratic regularization.
 Group Lasso regularization
 """"""""""""""""""""""""""
 
-Another regularization that has been used in recent years is the group lasso
+Another regularization that has been used in recent years [5]_  is the group lasso
 regularization
 
 .. math::
@@ -333,7 +333,7 @@ Another solver is proposed to solve the problem
         s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
 
 where :math:`\Omega_e` is the entropic regularization. In this case we use a
-generalized conditional gradient [7]_ implemented in :any:`ot.opim.gcg`  that does not linearize the entropic term and
+generalized conditional gradient [7]_ implemented in :any:`ot.optim.gcg`  that does not linearize the entropic term and
 relies on :any:`ot.sinkhorn` for its iterations. 
 
 .. hint::
@@ -421,11 +421,11 @@ Estimating the Wassresein barycenter with free support but fixed weights
 corresponds to  solving the following optimization problem:
 
 .. math::
-    \min_\{x_i\} \quad \sum_{k} w_kW(\mu,\mu_k)
+    \min_{\{x_i\}} \quad \sum_{k} w_kW(\mu,\mu_k)
 
     s.t. \quad \mu=\sum_{i=1}^n a_i\delta_{x_i}
 
-WE provide an alternating solver based on [20]_ in
+We provide an alternating solver based on [20]_ in
 :any:`ot.lp.free_support_barycenter`. This function minimize the problem and
 return an optimal support :math:`\{x_i\}` for uniform or given weights
 :math:`a`.
@@ -443,13 +443,149 @@ return an optimal support :math:`\{x_i\}` for uniform or given weights
 Monge mapping and Domain adaptation
 -----------------------------------
 
+The original transport problem investigated by Gaspard Monge  was seeking for a
+mapping function that maps (or transports) between a source and target
+distribution but that minimizes the transport loss. The existence and uniqueness of this
+optimal mapping is still an open problem in the general case but has been proven
+for smooth distributions by Brenier in his eponym `theorem
+<https://who.rocq.inria.fr/Jean-David.Benamou/demiheure.pdf>`__. We provide in
+:any:`ot.da` several solvers for Monge mapping estimation and domain adaptation. 
+
+Monge Mapping estimation
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+We now discuss several approaches that are implemented in POT to estimate or
+approximate a Monge mapping from finite distributions. 
+
+First note that when the source and target distributions are supposed to be Gaussian
+distributions, there exists a close form solution for the mapping and its an
+affine function [14]_ of the form :math:`T(x)=Ax+b` . In this case we provide the function
+:any:`ot.da.OT_mapping_linear` that return the operator :math:`A` and vector
+:math:`b`. Note that if the number of samples is too small there is a parameter
+:code:`reg` that provide a regularization for the covariance matrix estimation.
+
+For a more general mapping estimation we also provide the barycentric mapping
+proposed in [6]_ . It is implemented in the class :any:`ot.da.EMDTransport` and
+other transport based classes in :any:`ot.da` . Those classes are discussed more
+in the following but follow an interface similar to sklearn classes. Finally a
+method proposed in [8]_ that estimate a continuous mapping approximating the
+barycentric mapping is provided in :any:`ot.da.joint_OT_mapping_linear` for
+linear mapping and :any:`ot.da.joint_OT_mapping_kernel` for non linear mapping.
+
+ .. hint::
+
+    Example of the linear Monge mapping estimation is available
+    in the following example:
+
+    - :any:`auto_examples/plot_otda_linear_mapping`
+
+Domain adaptation classes
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The use of OT for domain adaptation (OTDA) has been first proposed in [5]_ that also
+introduced the group Lasso regularization. The main idea of OTDA is to estimate
+a mapping of the samples between source and target distributions which allows to
+transport labeled source samples onto the target distribution with no labels.
+
+We provide several classes based on :any:`ot.da.BaseTransport` that provide
+several OT and mapping estimations. The interface of those classes is similar to
+classifiers in sklearn toolbox. At initialization several parameters (for
+instance regularization parameter) can be set. Then one needs to estimate the
+mapping with function :any:`ot.da.BaseTransport.fit`. Finally one can map the
+samples from source to target with  :any:`ot.da.BaseTransport.transform` and
+from target to source with :any:`ot.da.BaseTransport.inverse_transform`. Here is
+an example for class :any:`ot.da.EMDTransport` 
+
+.. code::
+
+    ot_emd = ot.da.EMDTransport()
+    ot_emd.fit(Xs=Xs, Xt=Xt)
+
+    Mapped_Xs= ot_emd.transform(Xs=Xs)
+
+A list
+of the provided implementation is given in the following note.
+
+.. note::
+
+    Here is a list of the mapping classes inheriting from
+    :any:`ot.da.BaseTransport`
+    
+    * :any:`ot.da.EMDTransport` : Barycentric mapping with EMD transport
+    * :any:`ot.da.SinkhornTransport` : Barycentric mapping with Sinkhorn transport
+    * :any:`ot.da.SinkhornL1l2Transport` : Barycentric mapping with Sinkhorn +
+      group Lasso regularization [5]_
+    * :any:`ot.da.SinkhornLpl1Transport` : Barycentric mapping with Sinkhorn +
+      non convex group Lasso regularization [5]_      
+    * :any:`ot.da.LinearTransport` : Linear mapping estimation  between Gaussians
+      [14]_
+    * :any:`ot.da.MappingTransport` : Nonlinear mapping estimation [8]_ 
+
+.. hint::
+
+    Example of the use of OTDA classes are available in the following exmaples:
+
+    - :any:`auto_examples/plot_otda_color_images`
+    - :any:`auto_examples/plot_otda_mapping`
+    - :any:`auto_examples/plot_otda_mapping_colors_images`
+    - :any:`auto_examples/plot_otda_semi_supervised`
 
 Other applications
 ------------------
 
+We discuss in the following several implementations that has been used and
+proposed in the OT and machine learning community.
+
 Wasserstein Discriminant Analysis
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+Wasserstein Discriminant Analysis [11]_ is a generalization of `Fisher Linear Discriminant
+Analysis <https://en.wikipedia.org/wiki/Linear_discriminant_analysis>`__ that
+allows discrimination between classes that are not linearly separable. It
+consist in finding a linear projector optimizing the following criterion
+
+.. math::
+    P = \text{arg}\min_P \frac{\sum_i OT_e(\mu_i\#P,\mu_i\#P)}{\sum_{i,j\neq i}
+    OT_e(\mu_i\#P,\mu_j\#P)}
+    
+where :math:`\#` is the push-forward operator, :math:`OT_e` is the entropic OT
+loss  and :math:`\mu_i` is the
+distribution of samples from class :math:`i`.  :math:`P` is also constrained to
+be in the Stiefel manifold. WDA can be solved in pot using function
+:any:`ot.dr.wda`. It requires to have installed :code:`pymanopt` and
+:code:`autograd` for manifold optimization and automatic differentiation
+respectively. Note that we also provide the Fisher discriminant estimator in
+:any:`ot.dr.wda` for easy comparison.
+
+.. warning::
+    Note that due to the hard dependency on  :code:`pymanopt` and
+    :code:`autograd`, :any:`ot.dr` is not imported by default. If you want to
+    use it you have to specifically import it with :code:`import ot.dr` .
+
+.. hint::
+
+    An example of the use of WDA is available in the following example:
+
+    - :any:`auto_examples/plot_WDA`
+
+
+Unbalanced optimal transport
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Unbalanced OT is a relaxation of the original OT problem where the violation of
+the constraint on the marginals is added to the objective of the optimization
+problem:
+ 
+.. math::
+    \min_\gamma \quad \sum_{i,j}\gamma_{i,j}M_{i,j} + reg\cdot\Omega(\gamma) + \alpha KL(\gamma 1, a) + \alpha KL(\gamma^T 1, b)
+
+    s.t. \quad  \gamma\geq 0
+
+
+where KL is the Kullback-Leibler divergence. This formulation allwos for
+computing approximate mapping between distributions that do not have the same
+amount of mass. Interestingly the problem can be solved with a generalization of
+the Bregman projections algorithm [10]_.
 
 Gromov-Wasserstein
 ^^^^^^^^^^^^^^^^^^
@@ -461,6 +597,10 @@ GPU acceleration
 We provide several implementation of our OT solvers in :any:`ot.gpu`. Those
 implementation use the :code:`cupy` toolbox.   
 
+.. warning::
+    Note that due to the hard dependency on  :code:`cupy`, :any:`ot.gpu` is not
+    imported by default. If you want to
+    use it you have to specifically import it with :code:`import ot.gpu` .
 
 
 FAQ
diff --git a/docs/source/readme.rst b/docs/source/readme.rst
index 320ddd5..0871779 100644
--- a/docs/source/readme.rst
+++ b/docs/source/readme.rst
@@ -221,8 +221,6 @@ This toolbox has been created and is maintained by
 
 The contributors to this library are
 
--  `Rémi Flamary <http://remi.flamary.com/>`__
--  `Nicolas Courty <http://people.irisa.fr/Nicolas.Courty/>`__
 -  `Alexandre Gramfort <http://alexandre.gramfort.net/>`__
 -  `Laetitia Chapel <http://people.irisa.fr/Laetitia.Chapel/>`__
 -  `Michael Perrot <http://perso.univ-st-etienne.fr/pem82055/>`__
-- 
cgit v1.2.3


From 85cc12bc7731077846bb77346797165c098fc4ec Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Tue, 2 Jul 2019 16:05:31 +0200
Subject: quickstart gfirst shot done!

---
 docs/source/quickstart.rst | 97 +++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 92 insertions(+), 5 deletions(-)

(limited to 'docs')

diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index 8f4a24e..0dcd7ff 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -417,7 +417,7 @@ operators. We provide an implementation of this algorithm in function
 Barycenters with free support
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Estimating the Wassresein barycenter with free support but fixed weights
+Estimating the Wasserstein barycenter with free support but fixed weights
 corresponds to  solving the following optimization problem:
 
 .. math::
@@ -555,7 +555,7 @@ be in the Stiefel manifold. WDA can be solved in pot using function
 :any:`ot.dr.wda`. It requires to have installed :code:`pymanopt` and
 :code:`autograd` for manifold optimization and automatic differentiation
 respectively. Note that we also provide the Fisher discriminant estimator in
-:any:`ot.dr.wda` for easy comparison.
+:any:`ot.dr.fda` for easy comparison.
 
 .. warning::
     Note that due to the hard dependency on  :code:`pymanopt` and
@@ -585,17 +585,104 @@ problem:
 where KL is the Kullback-Leibler divergence. This formulation allwos for
 computing approximate mapping between distributions that do not have the same
 amount of mass. Interestingly the problem can be solved with a generalization of
-the Bregman projections algorithm [10]_.
+the Bregman projections algorithm [10]_. We provide a solver for unbalanced OT
+in :any:`ot.unbalanced` and more specifically  
+in function :any:`ot.sinkhorn_unbalanced`. A solver for unbalanced OT barycenter
+is available in :any:`ot.barycenter_unbalanced`.
+
+
+.. hint::
+
+    Examples of the use of :any:`ot.sinkhorn_unbalanced` and
+    :any:`ot.barycenter_unbalanced`  are available in:
+
+    - :any:`auto_examples/plot_UOT_1D`
+    - :any:`auto_examples/plot_UOT_barycenter_1D`
+
 
 Gromov-Wasserstein
 ^^^^^^^^^^^^^^^^^^
 
+Gromov Wasserstein (GW) is a generalization of OT to distributions that do not lie in
+the same space [13]_. In this case one cannot compute distance between samples
+from the two distributions. [13]_ proposed instead to realign the metric spaces
+by computing a transport between distance matrices. The Gromow Wasserstein
+alignement between two distributions can be expressed as the one minimizing:
+
+
+.. math::
+    GW = \min_\gamma \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*\gamma_{i,j}*\gamma_{k,l}
+
+    s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
+
+where ::math:`C1` is the distance matrix between samples in the source
+distribution and :math:`C2` the one between samples in the target,   :math:`L(C1_{i,k},C2_{j,l})` is a measure of similarity between
+:math:`C1_{i,k}` and :math:`C2_{j,l}` often chosen as
+:math:`L(C1_{i,k},C2_{j,l})=\|C1_{i,k}-C2_{j,l}\|^2`. The optimization problem
+above is a non-convex quadratic program but we provide a solver that finds a
+local minimum using conditional gradient in :any:`ot.gromov.gromov_wasserstein`.
+There also exist an entropic regularized variant of GW that has been proposed in
+[12]_ and we provide an implementation of their algorithm in
+:any:`ot.gromov.entropic_gromov_wasserstein`.
+
+Note that similarly to Wasserstein distance GW allows for the definition of GW
+barycenters that cen be expressed as
+
+.. math::
+    \min_{C\geq 0} \quad \sum_{k} w_k GW(C,Ck)
+
+where :math:`Ck` is the distance matrix between samples in distribution
+:math:`k`. Note that interestingly the barycenter is defined a a symmetric
+positive matrix. We provide a block coordinate optimization procedure in 
+:any:`ot.gromov.gromov_barycenters` and
+:any:`ot.gromov.entropic_gromov_barycenters` for non-regularized and regularized
+barycenters respectively.
+
+Finally note that recently a fusion between Wasserstein and GW, coined Fused
+Groimov-Wasserstein (FGW) has been proposed
+in [24]_. It allows to compute a similarity between objects that are only partly in
+the same space. As such it can be used to measure similarity between labeled
+graphs for instance and also provide computable barycenters.
+The implementations of FGW is provided in functions
+:any:`ot.gromov.fused_gromov_wasserstein` and :any:`ot.gromov.fgw_barycenters`.
+
+.. hint::
+
+    Examples of computation of GW, regularized G and FGW are provided in :
+
+    - :any:`auto_examples/plot_gromov`
+    - :any:`auto_examples/plot_fgw`   
+
+    Examples of GW, regularized GW and FGW barycenters are available in :
+
+    - :any:`auto_examples/plot_gromov_barycenter`
+    - :any:`auto_examples/plot_barycenter_fgw`   
+
 
 GPU acceleration
 ^^^^^^^^^^^^^^^^
 
 We provide several implementation of our OT solvers in :any:`ot.gpu`. Those
-implementation use the :code:`cupy` toolbox.   
+implementation use the :code:`cupy` toolbox that obviously need to be installed.   
+
+
+.. note::
+
+    Several implementations of POT functions (mainly those relying on linear
+    algebra) have been implemented in :any:`ot.gpu`. Here is a short list on the
+    main entries:
+
+    -  :any:`ot.gpu.dist` : computation of distance matrix
+    -  :any:`ot.gpu.sinkhorn` : computation of sinkhorn
+    -  :any:`ot.gpu.sinkhorn_lpl1_mm` : computation of sinkhorn + group lasso
+
+Note that while the :any:`ot.gpu` module has been designed to be compatible with
+POT,  calling its function with numpy array will incur a large overhead due to
+the memory copy of the array on GPU prior to computation and conversion of the
+array after computation. To avoid this overhead, we provide functions
+:any:`ot.gpu.to_gpu` and :any:`ot.gpu.to_np` that perform the conversion
+explicitly.
+
 
 .. warning::
     Note that due to the hard dependency on  :code:`cupy`, :any:`ot.gpu` is not
@@ -735,7 +822,7 @@ References
     matching <https://media.adelaide.edu.au/acvt/Publications/2011/2011-Gromov%E2%80%93Wasserstein%20Distances%20and%20the%20Metric%20Approach%20to%20Object%20Matching.pdf>`__.
     Foundations of computational mathematics 11.4 : 417-487.
 
-.. [14] Knott, M. and Smith, C. S. (1984).`On the optimal mapping of
+.. [14] Knott, M. and Smith, C. S. (1984). `On the optimal mapping of
     distributions <https://link.springer.com/article/10.1007/BF00934745>`__,
     Journal of Optimization Theory and Applications Vol 43.
 
-- 
cgit v1.2.3


From ef00ce42616fe7adf747c23a5590a83b62171a36 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Tue, 2 Jul 2019 16:40:30 +0200
Subject: quickstart proof reading

---
 docs/source/quickstart.rst | 173 +++++++++++++++++++++++----------------------
 1 file changed, 88 insertions(+), 85 deletions(-)

(limited to 'docs')

diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index 0dcd7ff..b726149 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -3,7 +3,9 @@ Quick start guide
 =================
 
 In the following we provide some pointers about which functions and classes 
-to use for different problems related to optimal transport (OT).
+to use for different problems related to optimal transport (OT) and machine
+learning. We refer when we can to concrete examples in the documentation that
+are also available as notebooks on the POT Github.
 
 This document is not a tutorial on numerical optimal transport. For this we strongly
 recommend to read the very nice book [15]_ . 
@@ -16,7 +18,8 @@ Optimal transport and Wasserstein distance
     In POT, most functions that solve OT or regularized OT problems have two
     versions that return the OT matrix or the value of the optimal solution. For
     instance :any:`ot.emd` return the OT matrix and :any:`ot.emd2` return the
-    Wassertsein distance.
+    Wassertsein distance. This approach has been implemented in practice for all
+    solvers that return an OT matrix (even Gromov-Wasserstsein)
 
 Solving optimal transport
 ^^^^^^^^^^^^^^^^^^^^^^^^^
@@ -32,7 +35,8 @@ as
 where :
 
 - :math:`M\in\mathbb{R}_+^{m\times n}` is the metric cost matrix defining the cost to move mass from bin :math:`a_i` to bin :math:`b_j`.
-- :math:`a` and :math:`b` are histograms (positive, sum to 1) that represent the weights of each samples in the source an target distributions.
+- :math:`a` and :math:`b` are histograms on the simplex (positive, sum to 1) that represent the
+weights of each samples in the source an target distributions.
 
 Solving the linear program above can be done using the function :any:`ot.emd`
 that will return the optimal transport matrix :math:`\gamma^*`:
@@ -43,7 +47,7 @@ that will return the optimal transport matrix :math:`\gamma^*`:
     # M is the ground cost matrix
     T=ot.emd(a,b,M) # exact linear program
 
-The method used for solving the OT problem is the network simplex, it is
+The method implemented for solving the OT problem is the network simplex, it is
 implemented in C from  [1]_. It has a complexity of :math:`O(n^3)` but the
 solver is quite efficient and uses sparsity of the solution.
 
@@ -54,15 +58,16 @@ solver is quite efficient and uses sparsity of the solution.
     - :any:`auto_examples/plot_OT_1D` 
     - :any:`auto_examples/plot_OT_L1_vs_L2` 
 
+
 Computing Wasserstein distance
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-The value of the OT solution is often more of interest that the OT matrix :
+The value of the OT solution is often more of interest than the OT matrix :
 
-    .. math::
-        OT(a,b)=\min_\gamma \quad \sum_{i,j}\gamma_{i,j}M_{i,j}
+.. math::
+    OT(a,b)=\min_\gamma \quad \sum_{i,j}\gamma_{i,j}M_{i,j}
 
-        s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
+    s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
 
 
 It can computed from an already estimated OT matrix with
@@ -92,7 +97,6 @@ the :math:`W_1` wasserstein distance can be done directly with  :any:`ot.emd2`
 when providing :code:`M=ot.dist(xs,xt, metric='euclidean')` to use the euclidean
 distance.
 
- 
 
 .. hint::
     Examples of use for :any:`ot.emd2` are available in the following examples:
@@ -111,15 +115,15 @@ For instance when the samples are in 1D, then the OT problem can be solved in
 function :any:`ot.emd_1d` and   :any:`ot.emd2_1d` to return respectively the OT
 matrix and value. Note that since the solution is very sparse the :code:`sparse`
 parameter of :any:`ot.emd_1d` allows for solving and returning the solution for
-very large problems. Note that in order to computed directly the :math:`W_p`
+very large problems. Note that in order to compute directly the :math:`W_p`
 Wasserstein distance in 1D we provide the function :any:`ot.wasserstein_1d` that
 takes :code:`p` as a parameter. 
 
-Another specials for estimating OT and Monge mapping is between Gaussian
+Another special case for estimating OT and Monge mapping is between Gaussian
 distributions. In this case there exists a close form solution given in Remark
 2.29 in [15]_ and the Monge mapping is an affine function and can be
 also computed from the covariances and means of the source and target
-distributions. In this case when the finite sample dataset is supposed gaussian, we provide 
+distributions. In the case when the finite sample dataset is supposed gaussian, we provide 
 :any:`ot.da.OT_mapping_linear` that returns the parameters for the Monge
 mapping.
 
@@ -129,8 +133,7 @@ Regularized Optimal Transport
 
 Recent developments have shown the interest of regularized OT both in terms of
 computational and statistical properties.
-
-We address in this section the regularized OT problem that can be expressed as
+We address in this section the regularized OT problems that can be expressed as
 
 .. math::
     \gamma^* = arg\min_\gamma \quad \sum_{i,j}\gamma_{i,j}M_{i,j} + \lambda\Omega(\gamma)
@@ -148,7 +151,6 @@ We discuss in the following specific algorithms that can be used depending on
 the regularization term.
 
 
-
 Entropic regularized OT
 ^^^^^^^^^^^^^^^^^^^^^^^
 
@@ -162,7 +164,8 @@ regularization has the following expression
 
 The use of the regularization term above in the optimization problem has a very
 strong impact. First it makes the problem smooth which leads to new optimization
-procedures such as L-BFGS (see :any:`ot.smooth` ). Next it makes the problem
+procedures such as the well known Sinkhorn algorithm [2]_ or L-BFGS (see
+:any:`ot.smooth` ). Next it makes the problem
 strictly convex meaning that there will be a unique solution. Finally the
 solution of the resulting optimization problem can be expressed as:
 
@@ -172,13 +175,13 @@ solution of the resulting optimization problem can be expressed as:
 
 where :math:`u` and :math:`v` are vectors and :math:`K=\exp(-M/\lambda)` where
 the :math:`\exp` is taken component-wise. In order to solve the optimization
-problem, on can use an alternative projection algorithm that can be very
+problem, on can use an alternative projection algorithm called Sinkhorn-Knopp that can be very
 efficient for large values if regularization. 
 
-The main function is POT are  :any:`ot.sinkhorn` and
+The Sinkhorn-Knopp algorithm is implemented in :any:`ot.sinkhorn` and
 :any:`ot.sinkhorn2` that return respectively the OT matrix and the value of the
 linear term. Note that the regularization parameter :math:`\lambda` in the
-equation above is given to those function with the parameter :code:`reg`.
+equation above is given to those functions with the parameter :code:`reg`.
 
     >>> import ot
     >>> a=[.5,.5]
@@ -188,10 +191,7 @@ equation above is given to those function with the parameter :code:`reg`.
     array([[ 0.36552929,  0.13447071],
         [ 0.13447071,  0.36552929]])
 
-
-
-More details about the algorithm used is given in the following note.
-
+More details about the algorithms used are given in the following note.
 
 .. note::
     The main function to solve entropic regularized OT is :any:`ot.sinkhorn`.
@@ -211,7 +211,7 @@ More details about the algorithm used is given in the following note.
     In addition to all those variants of sinkhorn, we have another
     implementation solving the problem in the smooth dual or semi-dual in
     :any:`ot.smooth`. This solver uses the :any:`scipy.optimize.minimize`
-    function to solve the smooth problem with :code:`L-BFGS` algorithm. Tu use
+    function to solve the smooth problem with :code:`L-BFGS-B` algorithm. Tu use
     this solver, use functions :any:`ot.smooth.smooth_ot_dual` or
     :any:`ot.smooth.smooth_ot_semi_dual` with parameter :code:`reg_type='kl'` to
     choose entropic/Kullbach Leibler regularization.
@@ -227,13 +227,13 @@ OT matrix and loss for empirical distributions with respectively
 :any:`ot.bregman.empirical_sinkhorn` and :any:`ot.bregman.empirical_sinkhorn2`.
 
 
-
-
 Finally note that we also provide in :any:`ot.stochastic` several implementation
-of stochastic solvers for entropic regularized OT [18]_ [19]_.  
+of stochastic solvers for entropic regularized OT [18]_ [19]_.  Those pure Python
+implementations are not optimized for speed but provide a roust implementation
+of algorithms in [18]_ [19]_.
 
 .. hint::
-    Examples of use for :any:`ot.sinkhorn` are available in the following examples:
+    Examples of use for :any:`ot.sinkhorn` are available in :
 
     - :any:`auto_examples/plot_OT_2D_samples`
     - :any:`auto_examples/plot_OT_1D` 
@@ -246,7 +246,7 @@ Other regularization
 
 While entropic OT is the most common and favored in practice, there exist other
 kind of regularization. We provide in POT two specific solvers for other
-regularization terms: namely quadratic regularization and group lasso
+regularization terms, namely quadratic regularization and group lasso
 regularization. But we also provide in :any:`ot.optim`  two generic solvers that allows solving any
 smooth regularization in practice. 
 
@@ -261,14 +261,14 @@ regularization of the form
 
 this regularization term has a similar effect to entropic regularization in
 densifying the OT matrix but it keeps some sort of sparsity that is lost with
-entropic regularization as soon as :math:`\lambda>0` [17]_. This problem cen be
+entropic regularization as soon as :math:`\lambda>0` [17]_. This problem can be
 solved with POT using solvers from :any:`ot.smooth`, more specifically
 functions :any:`ot.smooth.smooth_ot_dual` or
 :any:`ot.smooth.smooth_ot_semi_dual` with parameter :code:`reg_type='l2'` to 
 choose the quadratic regularization.
 
 .. hint::
-    Examples of quadratic regularization are available in the following examples:
+    Examples of quadratic regularization are available in :
 
     - :any:`auto_examples/plot_OT_1D_smooth`
     - :any:`auto_examples/plot_optim_OTreg`
@@ -288,17 +288,17 @@ where :math:`\mathcal{G}` contains non overlapping groups of lines in the OT
 matrix. This regularization proposed in [5]_ will promote sparsity at the group level and for
 instance will force target samples to get mass from a small number of groups.
 Note that the exact OT solution is already sparse so this regularization does
-not make sens if it is not combined with others such as entropic. Depending on
+not make sens if it is not combined with entropic regularization. Depending on
 the choice of :code:`p` and :code:`q`, the problem can be solved with different
 approaches.  When :code:`q=1` and :code:`p<1` the problem is non convex but can
-be solved using an efficient majoration minimization approach  with
+be solved using an efficient majoration minimization approach with
 :any:`ot.sinkhorn_lpl1_mm`. When :code:`q=2` and :code:`p=1` we recover the
-convex gourp lasso and we provide a solver using generalized conditional
+convex group lasso and we provide a solver using generalized conditional
 gradient algorithm [7]_ in function
 :any:`ot.da.sinkhorn_l1l2_gl`.
 
 .. hint::
-    Examples of group Lasso regularization are available in the following examples:
+    Examples of group Lasso regularization are available in :
 
     - :any:`auto_examples/plot_otda_classes` 
     - :any:`auto_examples/plot_otda_d2`
@@ -309,7 +309,7 @@ Generic solvers
 
 Finally we propose in POT generic solvers that can be used to solve any
 regularization as long as you can provide a function computing the
-regularization and a function computing its gradient.
+regularization and a function computing its gradient (or sub-gradient).
 
 In order to solve 
 
@@ -319,13 +319,14 @@ In order to solve
         s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
 
 you can use function :any:`ot.optim.cg` that will use a conditional gradient as
-proposed in [6]_ . you need to provide the regularization function as parameter
+proposed in [6]_ . You need to provide the regularization function as parameter
 ``f`` and its gradient as parameter  ``df``. Note that the conditional gradient relies on
 iterative solving of a linearization of the problem using the exact
-:any:`ot.emd` so it can be  slow in practice. Still it always returns a
+:any:`ot.emd` so it can be  slow in practice. But, being an interior point
+algorithm,  it always returns a
 transport matrix that does not violates the marginals.
 
-Another solver is proposed to solve the problem
+Another generic solver is proposed to solve the problem
 
 .. math::
     \gamma^* = arg\min_\gamma \quad \sum_{i,j}\gamma_{i,j}M_{i,j}+ \lambda_e\Omega_e(\gamma) + \lambda\Omega(\gamma)
@@ -333,11 +334,12 @@ Another solver is proposed to solve the problem
         s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
 
 where :math:`\Omega_e` is the entropic regularization. In this case we use a
-generalized conditional gradient [7]_ implemented in :any:`ot.optim.gcg`  that does not linearize the entropic term and
+generalized conditional gradient [7]_ implemented in :any:`ot.optim.gcg`  that
+does not linearize the entropic term but
 relies on :any:`ot.sinkhorn` for its iterations. 
 
 .. hint::
-    Example of generic solvers are available in the following example:
+    An example of generic solvers are available in :
 
     - :any:`auto_examples/plot_optim_OTreg` 
 
@@ -347,7 +349,7 @@ Wasserstein Barycenters
 
 A Wasserstein barycenter is a distribution that minimize its Wasserstein
 distance with respect to other distributions [16]_. It corresponds to minimizing the
-following problem by seaching a distribution :math:`\mu` 
+following problem by searching a distribution :math:`\mu` such that
 
 .. math::
     \min_\mu \quad \sum_{k} w_kW(\mu,\mu_k)
@@ -371,7 +373,6 @@ Barycenters with fixed support
 When optimizing a barycenter with a fixed support, the optimization problem can
 be expressed as
 
-
 .. math::
     \min_a \quad \sum_{k} w_k W(a,b_k)
 
@@ -379,36 +380,36 @@ where :math:`b_k` are also weights in the simplex. In the non-regularized case,
 the problem above is a classical linear program. In this case we propose a
 solver :any:`ot.lp.barycenter` that rely on generic LP solvers. By default the
 function uses :any:`scipy.optimize.linprog`, but more efficient LP solvers from
-cvxopt can be also used by changing parameter :code:`solver`. Note that these
-solver require to solve a very large linear program and can be very slow in
+cvxopt can be also used by changing parameter :code:`solver`. Note that this problem
+requires to solve a very large linear program and can be very slow in
 practice. 
 
 Similarly to the OT problem, OT barycenters can be computed in the regularized
-case. When using entropic regularization the problem can be solved with a
+case. When using entropic regularization is used, the problem can be solved with a
 generalization of the sinkhorn algorithm based on bregman projections [3]_. This
 algorithm is provided in function :any:`ot.bregman.barycenter` also available as
 :any:`ot.barycenter`. In this case, the algorithm scales better to large
 distributions and rely only on matrix multiplications that can be performed in
 parallel.
 
-In addition to teh speedup brought by regularization, one can also greatly
+In addition to the speedup brought by regularization, one can also greatly
 accelerate the estimation of Wasserstein barycenter when the support has a
 separable structure [21]_. In the case of 2D images for instance one can replace
-the matrix vector production in teh bregman projections by convolution
+the matrix vector production in the Bregman projections by convolution
 operators. We provide an implementation of this algorithm in function
 :any:`ot.bregman.convolutional_barycenter2d`.
 
 .. hint::
-    Example of Wasserstein (:any:`ot.lp.barycenter`) and regularized Wasserstein
-    barycenter (:any:`ot.bregman.barycenter`) computation are available in the following examples:
+    Examples of Wasserstein (:any:`ot.lp.barycenter`) and regularized Wasserstein
+    barycenter (:any:`ot.bregman.barycenter`) computation are available in :
 
     - :any:`auto_examples/plot_barycenter_1D` 
     - :any:`auto_examples/plot_barycenter_lp_vs_entropic` 
 
-    Example of convolutional barycenter
+    An example of convolutional barycenter
     (:any:`ot.bregman.convolutional_barycenter2d`) computation 
     for 2D images is available
-    in the following example:
+    in :
 
     - :any:`auto_examples/plot_convolutional_barycenter`
 
@@ -425,15 +426,15 @@ corresponds to  solving the following optimization problem:
 
     s.t. \quad \mu=\sum_{i=1}^n a_i\delta_{x_i}
 
-We provide an alternating solver based on [20]_ in
+We provide a solver based on [20]_ in
 :any:`ot.lp.free_support_barycenter`. This function minimize the problem and
-return an optimal support :math:`\{x_i\}` for uniform or given weights
+return a locally optimal support :math:`\{x_i\}` for uniform or given weights
 :math:`a`.
 
  .. hint::
 
-    Example of the fee support barycenter estimation is available
-    in the following example:
+    An example of the free support barycenter estimation is available
+    in :
 
     - :any:`auto_examples/plot_free_support_barycenter`
 
@@ -449,7 +450,8 @@ distribution but that minimizes the transport loss. The existence and uniqueness
 optimal mapping is still an open problem in the general case but has been proven
 for smooth distributions by Brenier in his eponym `theorem
 <https://who.rocq.inria.fr/Jean-David.Benamou/demiheure.pdf>`__. We provide in
-:any:`ot.da` several solvers for Monge mapping estimation and domain adaptation. 
+:any:`ot.da` several solvers for smooth Monge mapping estimation and domain
+adaptation from discrete distributions. 
 
 Monge Mapping estimation
 ^^^^^^^^^^^^^^^^^^^^^^^^
@@ -468,14 +470,14 @@ For a more general mapping estimation we also provide the barycentric mapping
 proposed in [6]_ . It is implemented in the class :any:`ot.da.EMDTransport` and
 other transport based classes in :any:`ot.da` . Those classes are discussed more
 in the following but follow an interface similar to sklearn classes. Finally a
-method proposed in [8]_ that estimate a continuous mapping approximating the
+method proposed in [8]_ that estimates a continuous mapping approximating the
 barycentric mapping is provided in :any:`ot.da.joint_OT_mapping_linear` for
 linear mapping and :any:`ot.da.joint_OT_mapping_kernel` for non linear mapping.
 
  .. hint::
 
-    Example of the linear Monge mapping estimation is available
-    in the following example:
+    An example of the linear Monge mapping estimation is available
+    in :
 
     - :any:`auto_examples/plot_otda_linear_mapping`
 
@@ -489,12 +491,14 @@ transport labeled source samples onto the target distribution with no labels.
 
 We provide several classes based on :any:`ot.da.BaseTransport` that provide
 several OT and mapping estimations. The interface of those classes is similar to
-classifiers in sklearn toolbox. At initialization several parameters (for
-instance regularization parameter) can be set. Then one needs to estimate the
+classifiers in sklearn toolbox. At initialization, several parameters such as
+ regularization parameter value can be set. Then one needs to estimate the
 mapping with function :any:`ot.da.BaseTransport.fit`. Finally one can map the
 samples from source to target with  :any:`ot.da.BaseTransport.transform` and
-from target to source with :any:`ot.da.BaseTransport.inverse_transform`. Here is
-an example for class :any:`ot.da.EMDTransport` 
+from target to source with :any:`ot.da.BaseTransport.inverse_transform`.
+
+Here is
+an example for class :any:`ot.da.EMDTransport` :
 
 .. code::
 
@@ -503,12 +507,11 @@ an example for class :any:`ot.da.EMDTransport`
 
     Mapped_Xs= ot_emd.transform(Xs=Xs)
 
-A list
-of the provided implementation is given in the following note.
+A list of the provided implementation is given in the following note.
 
 .. note::
 
-    Here is a list of the mapping classes inheriting from
+    Here is a list of the OT mapping classes inheriting from
     :any:`ot.da.BaseTransport`
     
     * :any:`ot.da.EMDTransport` : Barycentric mapping with EMD transport
@@ -523,7 +526,7 @@ of the provided implementation is given in the following note.
 
 .. hint::
 
-    Example of the use of OTDA classes are available in the following exmaples:
+    Example of the use of OTDA classes are available in :
 
     - :any:`auto_examples/plot_otda_color_images`
     - :any:`auto_examples/plot_otda_mapping`
@@ -533,7 +536,7 @@ of the provided implementation is given in the following note.
 Other applications
 ------------------
 
-We discuss in the following several implementations that has been used and
+We discuss in the following several OT related problems and tools that has been
 proposed in the OT and machine learning community.
 
 Wasserstein Discriminant Analysis
@@ -551,7 +554,7 @@ consist in finding a linear projector optimizing the following criterion
 where :math:`\#` is the push-forward operator, :math:`OT_e` is the entropic OT
 loss  and :math:`\mu_i` is the
 distribution of samples from class :math:`i`.  :math:`P` is also constrained to
-be in the Stiefel manifold. WDA can be solved in pot using function
+be in the Stiefel manifold. WDA can be solved in POT using function
 :any:`ot.dr.wda`. It requires to have installed :code:`pymanopt` and
 :code:`autograd` for manifold optimization and automatic differentiation
 respectively. Note that we also provide the Fisher discriminant estimator in
@@ -564,7 +567,7 @@ respectively. Note that we also provide the Fisher discriminant estimator in
 
 .. hint::
 
-    An example of the use of WDA is available in the following example:
+    An example of the use of WDA is available in :
 
     - :any:`auto_examples/plot_WDA`
 
@@ -582,7 +585,7 @@ problem:
     s.t. \quad  \gamma\geq 0
 
 
-where KL is the Kullback-Leibler divergence. This formulation allwos for
+where KL is the Kullback-Leibler divergence. This formulation allows for
 computing approximate mapping between distributions that do not have the same
 amount of mass. Interestingly the problem can be solved with a generalization of
 the Bregman projections algorithm [10]_. We provide a solver for unbalanced OT
@@ -594,7 +597,7 @@ is available in :any:`ot.barycenter_unbalanced`.
 .. hint::
 
     Examples of the use of :any:`ot.sinkhorn_unbalanced` and
-    :any:`ot.barycenter_unbalanced`  are available in:
+    :any:`ot.barycenter_unbalanced` are available in :
 
     - :any:`auto_examples/plot_UOT_1D`
     - :any:`auto_examples/plot_UOT_barycenter_1D`
@@ -609,46 +612,46 @@ from the two distributions. [13]_ proposed instead to realign the metric spaces
 by computing a transport between distance matrices. The Gromow Wasserstein
 alignement between two distributions can be expressed as the one minimizing:
 
-
 .. math::
     GW = \min_\gamma \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*\gamma_{i,j}*\gamma_{k,l}
 
     s.t. \gamma 1 = a; \gamma^T 1= b; \gamma\geq 0
 
 where ::math:`C1` is the distance matrix between samples in the source
-distribution and :math:`C2` the one between samples in the target,   :math:`L(C1_{i,k},C2_{j,l})` is a measure of similarity between
+distribution and :math:`C2` the one between samples in the target,
+:math:`L(C1_{i,k},C2_{j,l})` is a measure of similarity between
 :math:`C1_{i,k}` and :math:`C2_{j,l}` often chosen as
 :math:`L(C1_{i,k},C2_{j,l})=\|C1_{i,k}-C2_{j,l}\|^2`. The optimization problem
 above is a non-convex quadratic program but we provide a solver that finds a
 local minimum using conditional gradient in :any:`ot.gromov.gromov_wasserstein`.
-There also exist an entropic regularized variant of GW that has been proposed in
+There also exists an entropic regularized variant of GW that has been proposed in
 [12]_ and we provide an implementation of their algorithm in
 :any:`ot.gromov.entropic_gromov_wasserstein`.
 
 Note that similarly to Wasserstein distance GW allows for the definition of GW
-barycenters that cen be expressed as
+barycenters that can be expressed as
 
 .. math::
     \min_{C\geq 0} \quad \sum_{k} w_k GW(C,Ck)
 
 where :math:`Ck` is the distance matrix between samples in distribution
-:math:`k`. Note that interestingly the barycenter is defined a a symmetric
+:math:`k`. Note that interestingly the barycenter is defined as a symmetric
 positive matrix. We provide a block coordinate optimization procedure in 
 :any:`ot.gromov.gromov_barycenters` and
 :any:`ot.gromov.entropic_gromov_barycenters` for non-regularized and regularized
 barycenters respectively.
 
 Finally note that recently a fusion between Wasserstein and GW, coined Fused
-Groimov-Wasserstein (FGW) has been proposed
+Gromov-Wasserstein (FGW) has been proposed
 in [24]_. It allows to compute a similarity between objects that are only partly in
 the same space. As such it can be used to measure similarity between labeled
 graphs for instance and also provide computable barycenters.
-The implementations of FGW is provided in functions
+The implementations of FGW and FGW barycenter is provided in functions
 :any:`ot.gromov.fused_gromov_wasserstein` and :any:`ot.gromov.fgw_barycenters`.
 
 .. hint::
 
-    Examples of computation of GW, regularized G and FGW are provided in :
+    Examples of computation of GW, regularized G and FGW are available in :
 
     - :any:`auto_examples/plot_gromov`
     - :any:`auto_examples/plot_fgw`   
@@ -663,7 +666,7 @@ GPU acceleration
 ^^^^^^^^^^^^^^^^
 
 We provide several implementation of our OT solvers in :any:`ot.gpu`. Those
-implementation use the :code:`cupy` toolbox that obviously need to be installed.   
+implementations use the :code:`cupy` toolbox that obviously need to be installed.   
 
 
 .. note::
@@ -677,7 +680,7 @@ implementation use the :code:`cupy` toolbox that obviously need to be installed.
     -  :any:`ot.gpu.sinkhorn_lpl1_mm` : computation of sinkhorn + group lasso
 
 Note that while the :any:`ot.gpu` module has been designed to be compatible with
-POT,  calling its function with numpy array will incur a large overhead due to
+POT,  calling its function with :any:`numpy`  arrays will incur a large overhead due to
 the memory copy of the array on GPU prior to computation and conversion of the
 array after computation. To avoid this overhead, we provide functions
 :any:`ot.gpu.to_gpu` and :any:`ot.gpu.to_np` that perform the conversion
@@ -697,7 +700,7 @@ FAQ
 
 1. **How to solve a discrete optimal transport problem ?**
 
-    The solver for discrete  is the function :py:mod:`ot.emd` that returns
+    The solver for discrete OT is the function :py:mod:`ot.emd` that returns
     the OT transport matrix. If you want to solve a regularized OT you can 
     use :py:mod:`ot.sinkhorn`.
 
@@ -711,7 +714,7 @@ FAQ
        T=ot.emd(a,b,M) # exact linear program
        T_reg=ot.sinkhorn(a,b,M,reg) # entropic regularized OT
 
-    More detailed examples can be seen on this
+    More detailed examples can be seen on this example: 
     :doc:`auto_examples/plot_OT_2D_samples`
     
 
-- 
cgit v1.2.3


From e26aa8ee4498f19248f8dcc9868ec55b62eb35e5 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Tue, 2 Jul 2019 16:52:24 +0200
Subject: typo exmaples

---
 docs/source/quickstart.rst | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'docs')

diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst
index b726149..1640d6a 100644
--- a/docs/source/quickstart.rst
+++ b/docs/source/quickstart.rst
@@ -52,7 +52,7 @@ implemented in C from  [1]_. It has a complexity of :math:`O(n^3)` but the
 solver is quite efficient and uses sparsity of the solution.
 
 .. hint::
-    Examples of use for :any:`ot.emd` are available in the following examples:
+    Examples of use for :any:`ot.emd` are available in :
 
     - :any:`auto_examples/plot_OT_2D_samples`
     - :any:`auto_examples/plot_OT_1D` 
@@ -99,7 +99,7 @@ distance.
 
 
 .. hint::
-    Examples of use for :any:`ot.emd2` are available in the following examples:
+    An example of use for :any:`ot.emd2` is available in :
 
     - :any:`auto_examples/plot_compute_emd`
  
-- 
cgit v1.2.3