From 3fff90eb437dce30fd83012f4c0e24f3fca041b2 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Fri, 14 Jan 2022 17:47:27 +0100 Subject: [WIP] Set dev version and add minigallery to quick start guide (#334) * change version and add minigallery in quickstart guide * remove ot.gpu from documentation because it is deprecated and bacckends should be used * start 0.8.2dev and description in releases.md * typo for gallery sinkhorn2 * test better doc update for files in .githib folder --- docs/source/all.rst | 1 - docs/source/auto_examples/images/bak.png | Bin 304669 -> 0 bytes docs/source/auto_examples/images/sinkhorn.png | Bin 37204 -> 0 bytes docs/source/conf.py | 1 - docs/source/quickstart.rst | 219 ++++++++++---------------- 5 files changed, 86 insertions(+), 135 deletions(-) delete mode 100644 docs/source/auto_examples/images/bak.png delete mode 100644 docs/source/auto_examples/images/sinkhorn.png (limited to 'docs/source') diff --git a/docs/source/all.rst b/docs/source/all.rst index 6a07599..7f85a91 100644 --- a/docs/source/all.rst +++ b/docs/source/all.rst @@ -20,7 +20,6 @@ API and modules gromov optim da - gpu dr utils datasets diff --git a/docs/source/auto_examples/images/bak.png b/docs/source/auto_examples/images/bak.png deleted file mode 100644 index 25e7e8e..0000000 Binary files a/docs/source/auto_examples/images/bak.png and /dev/null differ diff --git a/docs/source/auto_examples/images/sinkhorn.png b/docs/source/auto_examples/images/sinkhorn.png deleted file mode 100644 index e003e13..0000000 Binary files a/docs/source/auto_examples/images/sinkhorn.png and /dev/null differ diff --git a/docs/source/conf.py b/docs/source/conf.py index 849e97c..163851f 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -74,7 +74,6 @@ extensions = [ autosummary_generate = True - napoleon_numpy_docstring = True # Add any paths that contain templates here, relative to this directory. diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index 232df7b..e74b019 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -207,13 +207,12 @@ The method implemented for solving the OT problem is the network simplex. It is implemented in C from [1]_. It has a complexity of :math:`O(n^3)` but the solver is quite efficient and uses sparsity of the solution. -.. hint:: - Examples of use for :any:`ot.emd` are available in : - - :any:`auto_examples/plot_OT_2D_samples` - - :any:`auto_examples/plot_OT_1D` - - :any:`auto_examples/plot_OT_L1_vs_L2` +.. minigallery:: ot.emd + :add-heading: Examples of use for :any:`ot.emd` + :heading-level: " + Computing Wasserstein distance @@ -255,11 +254,9 @@ the :math:`W_1` Wasserstein distance can be done directly with :any:`ot.emd2` when providing :code:`M = ot.dist(xs, xt, metric='euclidean')` to use the Euclidean distance. -.. hint:: - - An example of use for :any:`ot.emd2` is available in : - - - :any:`auto_examples/plot_compute_emd` +.. minigallery:: ot.emd2 + :add-heading: Examples of use for :any:`ot.emd2` + :heading-level: " Special cases @@ -416,17 +413,18 @@ of stochastic solvers for entropic regularized OT [18]_ [19]_. Those pure Pytho implementations are not optimized for speed but provide a robust implementation of algorithms in [18]_ [19]_. -.. hint:: - Examples of use for :any:`ot.sinkhorn` are available in : - - :any:`auto_examples/plot_OT_2D_samples` - - :any:`auto_examples/plot_OT_1D` - - :any:`auto_examples/plot_OT_1D_smooth` - - :any:`auto_examples/plot_stochastic` +.. minigallery:: ot.sinkhorn + :add-heading: Examples of use for :any:`ot.sinkhorn` + :heading-level: " +.. minigallery:: ot.sinkhorn2 + :add-heading: Examples of use for :any:`ot.sinkhorn2` + :heading-level: " -Other regularization -^^^^^^^^^^^^^^^^^^^^ + +Other regularizations +^^^^^^^^^^^^^^^^^^^^^ While entropic OT is the most common and favored in practice, there exists other kinds of regularizations. We provide in POT two specific solvers for other @@ -451,12 +449,9 @@ functions :any:`ot.smooth.smooth_ot_dual` or :any:`ot.smooth.smooth_ot_semi_dual` with parameter :code:`reg_type='l2'` to choose the quadratic regularization. -.. hint:: - Examples of quadratic regularization are available in : - - - :any:`auto_examples/plot_OT_1D_smooth` - - :any:`auto_examples/plot_optim_OTreg` - +.. minigallery:: ot.smooth.smooth_ot_dual ot.smooth.smooth_ot_semi_dual ot.optim.cg + :add-heading: Examples of use of quadratic regularization + :heading-level: " Group Lasso regularization @@ -480,11 +475,9 @@ be solved using an efficient majoration minimization approach with convex group lasso and we provide a solver using generalized conditional gradient algorithm [7]_ in function :any:`ot.da.sinkhorn_l1l2_gl`. -.. hint:: - Examples of group Lasso regularization are available in: - - - :any:`auto_examples/domain-adaptation/plot_otda_classes` - - :any:`auto_examples/domain-adaptation/plot_otda_d2` +.. minigallery:: ot.da.SinkhornLpl1Transport ot.da.SinkhornL1l2Transport ot.da.sinkhorn_l1l2_gl ot.da.sinkhorn_lpl1_mm + :add-heading: Examples of group Lasso regularization + :heading-level: " Generic solvers @@ -520,10 +513,9 @@ generalized conditional gradient [7]_ implemented in :any:`ot.optim.gcg` that does not linearize the entropic term but relies on :any:`ot.sinkhorn` for its iterations. -.. hint:: - An example of generic solvers are available in : - - - :any:`auto_examples/plot_optim_OTreg` +.. minigallery:: ot.optim.cg ot.optim.gcg + :add-heading: Examples of the generic solvers + :heading-level: " Wasserstein Barycenters @@ -581,19 +573,15 @@ the matrix vector production in the Bregman projections by convolution operators. We provide an implementation of this algorithm in function :any:`ot.bregman.convolutional_barycenter2d`. -.. hint:: - Examples of Wasserstein (:meth:`ot.lp.barycenter`) and regularized Wasserstein - barycenter (:any:`ot.bregman.barycenter`) computation are available in : - - :any:`auto_examples/barycenters/plot_barycenter_1D` - - :any:`auto_examples/barycenters/plot_barycenter_lp_vs_entropic` - An example of convolutional barycenter - (:any:`ot.bregman.convolutional_barycenter2d`) computation - for 2D images is available - in : +.. minigallery:: ot.lp.barycenter ot.bregman.barycenter ot.barycenter + :add-heading: Examples of Wasserstein and regularized Wasserstein barycenters + :heading-level: " - - :any:`auto_examples/barycenters/plot_convolutional_barycenter` +.. minigallery:: ot.bregman.convolutional_barycenter2d + :add-heading: An example of convolutional barycenter (:any:`ot.bregman.convolutional_barycenter2d`) computation + :heading-level: " @@ -613,13 +601,9 @@ We provide a solver based on [20]_ in return a locally optimal support :math:`\{x_i\}` for uniform or given weights :math:`a`. - .. hint:: - - An example of the free support barycenter estimation is available - in : - - - :any:`auto_examples/barycenters/plot_free_support_barycenter` - +.. minigallery:: ot.lp.free_support_barycenter + :add-heading: Examples of free support barycenter estimation + :heading-level: " @@ -656,12 +640,10 @@ method proposed in [8]_ that estimates a continuous mapping approximating the barycentric mapping is provided in :any:`ot.da.joint_OT_mapping_linear` for linear mapping and :any:`ot.da.joint_OT_mapping_kernel` for non-linear mapping. - .. hint:: - - An example of the linear Monge mapping estimation is available - in : +.. minigallery:: ot.da.joint_OT_mapping_linear ot.da.joint_OT_mapping_linear ot.da.OT_mapping_linear + :add-heading: Examples of Monge mapping estimation + :heading-level: " - - :any:`auto_examples/domain-adaptation/plot_otda_linear_mapping` Domain adaptation classes ^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -704,14 +686,11 @@ A list of the provided implementation is given in the following note. [14]_ * :any:`ot.da.MappingTransport`: Nonlinear mapping estimation [8]_ -.. hint:: - Examples of the use of OTDA classes are available in: +.. minigallery:: ot.da.SinkhornTransport ot.da.LinearTransport + :add-heading: Examples of the use of OTDA classes + :heading-level: " - - :any:`auto_examples/domain-adaptation/plot_otda_color_images` - - :any:`auto_examples/domain-adaptation/plot_otda_mapping` - - :any:`auto_examples/domain-adaptation/plot_otda_mapping_colors_images` - - :any:`auto_examples/domain-adaptation/plot_otda_semi_supervised` Other applications ------------------ @@ -746,11 +725,10 @@ respectively. Note that we also provide the Fisher discriminant estimator in :code:`autograd`, :any:`ot.dr` is not imported by default. If you want to use it you have to specifically import it with :code:`import ot.dr` . -.. hint:: +.. minigallery:: ot.dr.wda + :add-heading: Examples of the use of WDA + :heading-level: " - An example of the use of WDA is available in : - - - :any:`auto_examples/others/plot_WDA` Unbalanced optimal transport @@ -787,11 +765,9 @@ linear term. the log stabilized version of the algorithm [10]_. -.. hint:: - - Examples of the use of :any:`ot.sinkhorn_unbalanced` are available in : - - - :any:`auto_examples/unbalanced-partial/plot_UOT_1D` +.. minigallery:: ot.sinkhorn_unbalanced ot.sinkhorn_unbalanced2 ot.unbalanced.sinkhorn_unbalanced + :add-heading: Examples of Unbalanced OT + :heading-level: " Unbalanced Barycenters @@ -819,11 +795,10 @@ implemented the main function :any:`ot.barycenter_unbalanced`. the log stabilized version of the algorithm [10]_. -.. hint:: +.. minigallery:: ot.barycenter_unbalanced ot.unbalanced.barycenter_unbalanced + :add-heading: Examples of Unbalanced OT barycenters + :heading-level: " - Examples of the use of :any:`ot.barycenter_unbalanced` are available in : - - - :any:`auto_examples/unbalanced-partial/plot_UOT_barycenter_1D` Partial optimal transport @@ -865,11 +840,10 @@ is computed in :any:`ot.partial.partial_gromov_wasserstein` and in regularization of the problem. -.. hint:: - - Examples of the use of :any:`ot.partial` are available in: +.. minigallery:: ot.partial.partial_wasserstein ot.partial.partial_gromov_wasserstein + :add-heading: Examples of Partial OT + :heading-level: " - - :any:`auto_examples/unbalanced-partial/plot_partial_wass_and_gromov` @@ -898,6 +872,12 @@ There also exists an entropic regularized variant of GW that has been proposed i [12]_ and we provide an implementation of their algorithm in :any:`ot.gromov.entropic_gromov_wasserstein`. + +.. minigallery:: ot.gromov.gromov_wasserstein ot.gromov.entropic_gromov_wasserstein ot.gromov.fused_gromov_wasserstein ot.gromov.gromov_wasserstein2 + :add-heading: Examples of computation of GW, regularized G and FGW + :heading-level: " + + Note that similarly to Wasserstein distance GW allows for the definition of GW barycenters that can be expressed as @@ -919,59 +899,15 @@ graphs for instance and also provide computable barycenters. The implementations of FGW and FGW barycenter is provided in functions :any:`ot.gromov.fused_gromov_wasserstein` and :any:`ot.gromov.fgw_barycenters`. -.. hint:: - - Examples of computation of GW, regularized G and FGW are available in: - - :any:`auto_examples/gromov/plot_gromov` - - :any:`auto_examples/gromov/plot_fgw` +.. minigallery:: ot.gromov.gromov_barycenters ot.gromov.fgw_barycenters + :add-heading: Examples of GW, regularized G and FGW barycenters + :heading-level: " - Examples of GW, regularized GW and FGW barycenters are available in: - - :any:`auto_examples/gromov/plot_gromov_barycenter` - - :any:`auto_examples/gromov/plot_barycenter_fgw` - -GPU acceleration -^^^^^^^^^^^^^^^^ - -.. warning:: - - The :any:`ot.gpu` has been deprecated since the release 0.8 of POT and - should not be used. The GPU implementation (in Pytorch for instance) can be - used with the novel backends using the compatible functions from POT. - - -We provide several implementation of our OT solvers in :any:`ot.gpu`. Those -implementations use the :code:`cupy` toolbox that obviously need to be installed. - - -.. note:: - - Several implementations of POT functions (mainly those relying on linear - algebra) have been implemented in :any:`ot.gpu`. Here is a short list on the - main entries: - - - :meth:`ot.gpu.dist`: computation of distance matrix - - :meth:`ot.gpu.sinkhorn`: computation of sinkhorn - - :meth:`ot.gpu.sinkhorn_lpl1_mm`: computation of sinkhorn + group lasso - -Note that while the :any:`ot.gpu` module has been designed to be compatible with -POT, calling its function with :any:`numpy` arrays will incur a large overhead due to -the memory copy of the array on GPU prior to computation and conversion of the -array after computation. To avoid this overhead, we provide functions -:meth:`ot.gpu.to_gpu` and :meth:`ot.gpu.to_np` that perform the conversion -explicitly. - -.. warning:: - - Note that due to the hard dependency on :code:`cupy`, :any:`ot.gpu` is not - imported by default. If you want to - use it you have to specifically import it with :code:`import ot.gpu` . - - -Solving OT with Multiple backends ---------------------------------- +Solving OT with Multiple backends on CPU/GPU +-------------------------------------------- .. _backends_section: @@ -1002,7 +938,21 @@ the function will be the same type as the inputs and on the same device. When possible all computations are done on the same device and also when possible the output will be differentiable with respect to the input of the function. +GPU acceleration +^^^^^^^^^^^^^^^^ + +The backends provide automatic computations/compatibility on GPU for most of the +POT functions. +Note that all solvers relying on the exact OT solver en C++ will need to solve the +problem on CPU which can incur some memory copy overhead and be far from optimal +when all other computations are done on GPU. They will still work on array on +GPU since the copy is done automatically. +Some of the functions that rely on the exact C++ solver are: + +- :any:`ot.emd`, :any:`ot.emd2` +- :any:`ot.gromov_wasserstein`, :any:`ot.gromov_wasserstein2` +- :any:`ot.optim.cg` List of compatible Backends ^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -1010,18 +960,21 @@ List of compatible Backends - `Numpy `_ (all functions and solvers) - `Pytorch `_ (all outputs differentiable w.r.t. inputs) - `Jax `_ (Some functions are differentiable some require a wrapper) +- `Tensorflow `_ (all outputs differentiable w.r.t. inputs) +- `Cupy `_ (no differentiation, GPU only) + -List of compatible functions +List of compatible modules ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ This list will get longer for new releases and will hopefully disappear when POT become fully implemented with the backend. -- :any:`ot.emd` -- :any:`ot.emd2` -- :any:`ot.sinkhorn` -- :any:`ot.sinkhorn2` -- :any:`ot.dist` +- :any:`ot.bregman` +- :any:`ot.gromov` (some functions use CPU only solvers with copy overhead) +- :any:`ot.optim` (some functions use CPU only solvers with copy overhead) +- :any:`ot.sliced` +- :any:`ot.utils` (partial) FAQ -- cgit v1.2.3 From 5861209f27fe8e022eca2ed2c8d0bb1da4a1146b Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Mon, 17 Jan 2022 13:45:58 +0100 Subject: [MRG] Default pygment color for doc (#335) * back to default pygment * add images back * move static images and make it work --- docs/source/_static/images/bak.png | Bin 0 -> 304669 bytes docs/source/_static/images/sinkhorn.png | Bin 0 -> 37204 bytes docs/source/conf.py | 2 +- examples/plot_Intro_OT.py | 4 ++-- 4 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 docs/source/_static/images/bak.png create mode 100644 docs/source/_static/images/sinkhorn.png (limited to 'docs/source') diff --git a/docs/source/_static/images/bak.png b/docs/source/_static/images/bak.png new file mode 100644 index 0000000..25e7e8e Binary files /dev/null and b/docs/source/_static/images/bak.png differ diff --git a/docs/source/_static/images/sinkhorn.png b/docs/source/_static/images/sinkhorn.png new file mode 100644 index 0000000..e003e13 Binary files /dev/null and b/docs/source/_static/images/sinkhorn.png differ diff --git a/docs/source/conf.py b/docs/source/conf.py index 163851f..d1b8426 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -141,7 +141,7 @@ exclude_patterns = [] #show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = 'default' # A list of ignored prefixes for module index sorting. #modindex_common_prefix = [] diff --git a/examples/plot_Intro_OT.py b/examples/plot_Intro_OT.py index f282950..219aa51 100644 --- a/examples/plot_Intro_OT.py +++ b/examples/plot_Intro_OT.py @@ -58,7 +58,7 @@ help(ot.dist) # number of Bakeries to Cafés in a City (in this case Manhattan). We did a # quick google map search in Manhattan for bakeries and Cafés: # -# .. image:: images/bak.png +# .. image:: ../_static/images/bak.png # :align: center # :alt: bakery-cafe-manhattan # :width: 600px @@ -233,7 +233,7 @@ print('Wasserstein loss (EMD) = {0:.2f}'.format(W)) # The Sinkhorn algorithm is very simple to code. You can implement it directly # using the following pseudo-code # -# .. image:: images/sinkhorn.png +# .. image:: ../_static/images/sinkhorn.png # :align: center # :alt: Sinkhorn algorithm # :width: 440px -- cgit v1.2.3 From a5e0f0d40d5046a6639924347ef97e2ac80ad0c9 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 2 Feb 2022 11:53:12 +0100 Subject: [MRG] Add weak OT solver (#341) * add info in release file * update tests * pep8 * add weak OT example * update plot in doc * correction ewample with empirical sinkhorn * better thumbnail * comment from review * update documenation --- README.md | 3 + RELEASES.md | 8 ++- docs/source/all.rst | 1 + examples/others/plot_WeakOT_VS_OT.py | 98 +++++++++++++++++++++++++++ examples/plot_OT_2D_samples.py | 5 +- ot/__init__.py | 5 +- ot/gromov.py | 16 +++++ ot/lp/__init__.py | 9 ++- ot/lp/cvx.py | 1 - ot/utils.py | 12 +++- ot/weak.py | 124 +++++++++++++++++++++++++++++++++++ test/test_bregman.py | 13 ++-- test/test_ot.py | 2 +- test/test_utils.py | 18 ++++- test/test_weak.py | 54 +++++++++++++++ 15 files changed, 343 insertions(+), 26 deletions(-) create mode 100644 examples/others/plot_WeakOT_VS_OT.py create mode 100644 ot/weak.py create mode 100644 test/test_weak.py (limited to 'docs/source') diff --git a/README.md b/README.md index 17fbe81..a7627df 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,7 @@ POT provides the following generic OT solvers (links to examples): * Sinkhorn divergence [23] and entropic regularization OT from empirical data. * Debiased Sinkhorn barycenters [Sinkhorn divergence barycenter](https://pythonot.github.io/auto_examples/barycenters/plot_debiased_barycenter.html) [37] * [Smooth optimal transport solvers](https://pythonot.github.io/auto_examples/plot_OT_1D_smooth.html) (dual and semi-dual) for KL and squared L2 regularizations [17]. +* Weak OT solver between empirical distributions [39] * Non regularized [Wasserstein barycenters [16] ](https://pythonot.github.io/auto_examples/barycenters/plot_barycenter_lp_vs_entropic.html)) with LP solver (only small scale). * [Gromov-Wasserstein distances](https://pythonot.github.io/auto_examples/gromov/plot_gromov.html) and [GW barycenters](https://pythonot.github.io/auto_examples/gromov/plot_gromov_barycenter.html) (exact [13] and regularized [12]), differentiable using gradients from * [Fused-Gromov-Wasserstein distances solver](https://pythonot.github.io/auto_examples/gromov/plot_fgw.html#sphx-glr-auto-examples-plot-fgw-py) and [FGW barycenters](https://pythonot.github.io/auto_examples/gromov/plot_barycenter_fgw.html) [24] @@ -301,3 +302,5 @@ Conference on Machine Learning, PMLR 119:4692-4701, 2020 [38] C. Vincent-Cuaz, T. Vayer, R. Flamary, M. Corneli, N. Courty, [Online Graph Dictionary Learning](https://arxiv.org/pdf/2102.06555.pdf), International Conference on Machine Learning (ICML), 2021. + +[39] Gozlan, N., Roberto, C., Samson, P. M., & Tetali, P. (2017). [Kantorovich duality for general transport costs and applications](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.712.1825&rep=rep1&type=pdf). Journal of Functional Analysis, 273(11), 3327-3405. \ No newline at end of file diff --git a/RELEASES.md b/RELEASES.md index 94c853b..4d05582 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -5,10 +5,12 @@ #### New features -- Better list of related examples in quick start guide with `minigallery` (PR #334) +- Better list of related examples in quick start guide with `minigallery` (PR #334). - Add optional log-domain Sinkhorn implementation in WDA to support smaller values - of the regularization parameter (PR #336) -- Backend implementation for `ot.lp.free_support_barycenter` (PR #340) + of the regularization parameter (PR #336). +- Backend implementation for `ot.lp.free_support_barycenter` (PR #340). +- Add weak OT solver + example (PR #341). + #### Closed issues diff --git a/docs/source/all.rst b/docs/source/all.rst index 7f85a91..76d2ff5 100644 --- a/docs/source/all.rst +++ b/docs/source/all.rst @@ -28,6 +28,7 @@ API and modules unbalanced partial sliced + weak .. autosummary:: :toctree: ../modules/generated/ diff --git a/examples/others/plot_WeakOT_VS_OT.py b/examples/others/plot_WeakOT_VS_OT.py new file mode 100644 index 0000000..a29c875 --- /dev/null +++ b/examples/others/plot_WeakOT_VS_OT.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +""" +==================================================== +Weak Optimal Transport VS exact Optimal Transport +==================================================== + +Illustration of 2D optimal transport between distributions that are weighted +sum of diracs. The OT matrix is plotted with the samples. + +""" + +# Author: Remi Flamary +# +# License: MIT License + +# sphinx_gallery_thumbnail_number = 4 + +import numpy as np +import matplotlib.pylab as pl +import ot +import ot.plot + +############################################################################## +# Generate data an plot it +# ------------------------ + +#%% parameters and data generation + +n = 50 # nb samples + +mu_s = np.array([0, 0]) +cov_s = np.array([[1, 0], [0, 1]]) + +mu_t = np.array([4, 4]) +cov_t = np.array([[1, -.8], [-.8, 1]]) + +xs = ot.datasets.make_2D_samples_gauss(n, mu_s, cov_s) +xt = ot.datasets.make_2D_samples_gauss(n, mu_t, cov_t) + +a, b = ot.unif(n), ot.unif(n) # uniform distribution on samples + +# loss matrix +M = ot.dist(xs, xt) +M /= M.max() + +#%% plot samples + +pl.figure(1) +pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples') +pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples') +pl.legend(loc=0) +pl.title('Source and target distributions') + +pl.figure(2) +pl.imshow(M, interpolation='nearest') +pl.title('Cost matrix M') + + +############################################################################## +# Compute Weak OT and exact OT solutions +# -------------------------------------- + +#%% EMD + +G0 = ot.emd(a, b, M) + +#%% Weak OT + +Gweak = ot.weak_optimal_transport(xs, xt, a, b) + + +############################################################################## +# Plot weak OT and exact OT solutions +# -------------------------------------- + +pl.figure(3, (8, 5)) + +pl.subplot(1, 2, 1) +pl.imshow(G0, interpolation='nearest') +pl.title('OT matrix') + +pl.subplot(1, 2, 2) +pl.imshow(Gweak, interpolation='nearest') +pl.title('Weak OT matrix') + +pl.figure(4, (8, 5)) + +pl.subplot(1, 2, 1) +ot.plot.plot2D_samples_mat(xs, xt, G0, c=[.5, .5, 1]) +pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples') +pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples') +pl.title('OT matrix with samples') + +pl.subplot(1, 2, 2) +ot.plot.plot2D_samples_mat(xs, xt, Gweak, c=[.5, .5, 1]) +pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples') +pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples') +pl.title('Weak OT matrix with samples') diff --git a/examples/plot_OT_2D_samples.py b/examples/plot_OT_2D_samples.py index af1bc12..c3a7cd8 100644 --- a/examples/plot_OT_2D_samples.py +++ b/examples/plot_OT_2D_samples.py @@ -42,7 +42,6 @@ a, b = np.ones((n,)) / n, np.ones((n,)) / n # uniform distribution on samples # loss matrix M = ot.dist(xs, xt) -M /= M.max() ############################################################################## # Plot data @@ -87,7 +86,7 @@ pl.title('OT matrix with samples') #%% sinkhorn # reg term -lambd = 1e-3 +lambd = 1e-1 Gs = ot.sinkhorn(a, b, M, lambd) @@ -112,7 +111,7 @@ pl.show() #%% sinkhorn # reg term -lambd = 1e-3 +lambd = 1e-1 Ges = ot.bregman.empirical_sinkhorn(xs, xt, lambd) diff --git a/ot/__init__.py b/ot/__init__.py index 1ea7403..7253318 100644 --- a/ot/__init__.py +++ b/ot/__init__.py @@ -36,6 +36,7 @@ from . import unbalanced from . import partial from . import backend from . import regpath +from . import weak # OT functions from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d @@ -46,7 +47,7 @@ from .da import sinkhorn_lpl1_mm from .sliced import sliced_wasserstein_distance, max_sliced_wasserstein_distance from .gromov import (gromov_wasserstein, gromov_wasserstein2, gromov_barycenters, fused_gromov_wasserstein, fused_gromov_wasserstein2) - +from .weak import weak_optimal_transport # utils functions from .utils import dist, unif, tic, toc, toq @@ -59,5 +60,5 @@ __all__ = ['emd', 'emd2', 'emd_1d', 'sinkhorn', 'sinkhorn2', 'utils', 'sinkhorn_unbalanced', 'barycenter_unbalanced', 'sinkhorn_unbalanced2', 'sliced_wasserstein_distance', 'gromov_wasserstein', 'gromov_wasserstein2', 'gromov_barycenters', 'fused_gromov_wasserstein', 'fused_gromov_wasserstein2', - 'max_sliced_wasserstein_distance', + 'max_sliced_wasserstein_distance', 'weak_optimal_transport', 'smooth', 'stochastic', 'unbalanced', 'partial', 'regpath'] diff --git a/ot/gromov.py b/ot/gromov.py index 6544260..b7e7949 100644 --- a/ot/gromov.py +++ b/ot/gromov.py @@ -338,6 +338,10 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', log=False, armijo=F - :math:`\mathbf{q}`: distribution in the target space - `L`: loss function to account for the misfit between the similarity matrices + .. note:: This function is backend-compatible and will work on arrays + from all compatible backends. But the algorithm uses the C++ CPU backend + which can lead to copy overhead on GPU arrays. + Parameters ---------- C1 : array-like, shape (ns, ns) @@ -436,6 +440,10 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun='square_loss', log=False, armijo= Note that when using backends, this loss function is differentiable wrt the marices and weights for quadratic loss using the gradients from [38]_. + .. note:: This function is backend-compatible and will work on arrays + from all compatible backends. But the algorithm uses the C++ CPU backend + which can lead to copy overhead on GPU arrays. + Parameters ---------- C1 : array-like, shape (ns, ns) @@ -545,6 +553,10 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, - :math:`\mathbf{p}` and :math:`\mathbf{q}` are source and target weights (sum to 1) - `L` is a loss function to account for the misfit between the similarity matrices + .. note:: This function is backend-compatible and will work on arrays + from all compatible backends. But the algorithm uses the C++ CPU backend + which can lead to copy overhead on GPU arrays. + The algorithm used for solving the problem is conditional gradient as discussed in :ref:`[24] ` Parameters @@ -645,6 +657,10 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5 The algorithm used for solving the problem is conditional gradient as discussed in :ref:`[24] ` + .. note:: This function is backend-compatible and will work on arrays + from all compatible backends. But the algorithm uses the C++ CPU backend + which can lead to copy overhead on GPU arrays. + Note that when using backends, this loss function is differentiable wrt the marices and weights for quadratic loss using the gradients from [38]_. diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 2ff7c1f..d9b6fa9 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -26,6 +26,8 @@ from ..utils import dist, list_to_array from ..utils import parmap from ..backend import get_backend + + __all__ = ['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx', ' emd_1d_sorted', 'emd_1d', 'emd2_1d', 'wasserstein_1d'] @@ -220,7 +222,8 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True, numThreads=1): format .. note:: This function is backend-compatible and will work on arrays - from all compatible backends. + from all compatible backends. But the algorithm uses the C++ CPU backend + which can lead to copy overhead on GPU arrays. Uses the algorithm proposed in :ref:`[1] `. @@ -358,7 +361,8 @@ def emd2(a, b, M, processes=1, - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights .. note:: This function is backend-compatible and will work on arrays - from all compatible backends. + from all compatible backends. But the algorithm uses the C++ CPU backend + which can lead to copy overhead on GPU arrays. Uses the algorithm proposed in :ref:`[1] `. @@ -622,3 +626,4 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None return X, log_dict else: return X + diff --git a/ot/lp/cvx.py b/ot/lp/cvx.py index 869d450..fbf3c0e 100644 --- a/ot/lp/cvx.py +++ b/ot/lp/cvx.py @@ -11,7 +11,6 @@ import numpy as np import scipy as sp import scipy.sparse as sps - try: import cvxopt from cvxopt import solvers, matrix, spmatrix diff --git a/ot/utils.py b/ot/utils.py index e6c93c8..725ca00 100644 --- a/ot/utils.py +++ b/ot/utils.py @@ -116,7 +116,7 @@ def proj_simplex(v, z=1): return w -def unif(n): +def unif(n, type_as=None): r""" Return a uniform histogram of length `n` (simplex). @@ -124,13 +124,19 @@ def unif(n): ---------- n : int number of bins in the histogram + type_as : array_like + array of the same type of the expected output (numpy/pytorch/jax) Returns ------- - h : np.array (`n`,) + h : array_like (`n`,) histogram of length `n` such that :math:`\forall i, \mathbf{h}_i = \frac{1}{n}` """ - return np.ones((n,)) / n + if type_as is None: + return np.ones((n,)) / n + else: + nx = get_backend(type_as) + return nx.ones((n,)) / n def clean_zeros(a, b, M): diff --git a/ot/weak.py b/ot/weak.py new file mode 100644 index 0000000..f7d5b23 --- /dev/null +++ b/ot/weak.py @@ -0,0 +1,124 @@ +""" +Weak optimal ransport solvers +""" + +# Author: Remi Flamary +# +# License: MIT License + +from .backend import get_backend +from .optim import cg +import numpy as np + +__all__ = ['weak_optimal_transport'] + + +def weak_optimal_transport(Xa, Xb, a=None, b=None, verbose=False, log=False, G0=None, **kwargs): + r"""Solves the weak optimal transport problem between two empirical distributions + + + .. math:: + \gamma = \mathop{\arg \min}_\gamma \quad \|X_a-diag(1/a)\gammaX_b\|_F^2 + + s.t. \ \gamma \mathbf{1} = \mathbf{a} + + \gamma^T \mathbf{1} = \mathbf{b} + + \gamma \geq 0 + + where : + + - :math:`X_a` :math:`X_b` are the sample matrices. + - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights + + + .. note:: This function is backend-compatible and will work on arrays + from all compatible backends. But the algorithm uses the C++ CPU backend + which can lead to copy overhead on GPU arrays. + + Uses the conditional gradient algorithm to solve the problem proposed + in :ref:`[39] `. + + Parameters + ---------- + Xa : (ns,d) array-like, float + Source samples + Xb : (nt,d) array-like, float + Target samples + a : (ns,) array-like, float + Source histogram (uniform weight if empty list) + b : (nt,) array-like, float + Target histogram (uniform weight if empty list)) + numItermax : int, optional + Max number of iterations + numItermaxEmd : int, optional + Max number of iterations for emd + stopThr : float, optional + Stop threshold on the relative variation (>0) + stopThr2 : float, optional + Stop threshold on the absolute variation (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + + Returns + ------- + gamma: array-like, shape (ns, nt) + Optimal transportation matrix for the given + parameters + log: dict, optional + If input log is true, a dictionary containing the + cost and dual variables and exit status + + + .. _references-weak: + References + ---------- + .. [39] Gozlan, N., Roberto, C., Samson, P. M., & Tetali, P. (2017). + Kantorovich duality for general transport costs and applications. + Journal of Functional Analysis, 273(11), 3327-3405. + + See Also + -------- + ot.bregman.sinkhorn : Entropic regularized OT + ot.optim.cg : General regularized OT + """ + + nx = get_backend(Xa, Xb) + + Xa2 = nx.to_numpy(Xa) + Xb2 = nx.to_numpy(Xb) + + if a is None: + a2 = np.ones((Xa.shape[0])) / Xa.shape[0] + else: + a2 = nx.to_numpy(a) + if b is None: + b2 = np.ones((Xb.shape[0])) / Xb.shape[0] + else: + b2 = nx.to_numpy(b) + + # init uniform + if G0 is None: + T0 = a2[:, None] * b2[None, :] + else: + T0 = nx.to_numpy(G0) + + # weak OT loss + def f(T): + return np.dot(a2, np.sum((Xa2 - np.dot(T, Xb2) / a2[:, None])**2, 1)) + + # weak OT gradient + def df(T): + return -2 * np.dot(Xa2 - np.dot(T, Xb2) / a2[:, None], Xb2.T) + + # solve with conditional gradient and return solution + if log: + res, log = cg(a2, b2, 0, 1, f, df, T0, log=log, verbose=verbose, **kwargs) + log['u'] = nx.from_numpy(log['u'], type_as=Xa) + log['v'] = nx.from_numpy(log['v'], type_as=Xb) + return nx.from_numpy(res, type_as=Xa), log + else: + return nx.from_numpy(cg(a2, b2, 0, 1, f, df, T0, log=log, verbose=verbose, **kwargs), type_as=Xa) diff --git a/test/test_bregman.py b/test/test_bregman.py index 6e90aa4..1419f9b 100644 --- a/test/test_bregman.py +++ b/test/test_bregman.py @@ -60,7 +60,7 @@ def test_convergence_warning(method): ot.sinkhorn2(a1, a2, M, 1, method=method, stopThr=0, numItermax=1) -def test_not_impemented_method(): +def test_not_implemented_method(): # test sinkhorn w = 10 n = w ** 2 @@ -635,7 +635,7 @@ def test_wasserstein_bary_2d(nx, method): with pytest.raises(NotImplementedError): ot.bregman.convolutional_barycenter2d(A_nx, reg, method=method) else: - bary_wass_np = ot.bregman.convolutional_barycenter2d(A, reg, method=method) + bary_wass_np, log_np = ot.bregman.convolutional_barycenter2d(A, reg, method=method, verbose=True, log=True) bary_wass = nx.to_numpy(ot.bregman.convolutional_barycenter2d(A_nx, reg, method=method)) np.testing.assert_allclose(1, np.sum(bary_wass), rtol=1e-3) @@ -667,7 +667,7 @@ def test_wasserstein_bary_2d_debiased(nx, method): with pytest.raises(NotImplementedError): ot.bregman.convolutional_barycenter2d_debiased(A_nx, reg, method=method) else: - bary_wass_np = ot.bregman.convolutional_barycenter2d_debiased(A, reg, method=method) + bary_wass_np, log_np = ot.bregman.convolutional_barycenter2d_debiased(A, reg, method=method, verbose=True, log=True) bary_wass = nx.to_numpy(ot.bregman.convolutional_barycenter2d_debiased(A_nx, reg, method=method)) np.testing.assert_allclose(1, np.sum(bary_wass), rtol=1e-3) @@ -940,14 +940,11 @@ def test_screenkhorn(nx): bb = nx.from_numpy(b) M_nx = nx.from_numpy(M, type_as=ab) - # np sinkhorn - G_sink_np = ot.sinkhorn(a, b, M, 1e-03) # sinkhorn - G_sink = nx.to_numpy(ot.sinkhorn(ab, bb, M_nx, 1e-03)) + G_sink = nx.to_numpy(ot.sinkhorn(ab, bb, M_nx, 1e-1)) # screenkhorn - G_screen = nx.to_numpy(ot.bregman.screenkhorn(ab, bb, M_nx, 1e-03, uniform=True, verbose=True)) + G_screen = nx.to_numpy(ot.bregman.screenkhorn(ab, bb, M_nx, 1e-1, uniform=True, verbose=True)) # check marginals - np.testing.assert_allclose(G_sink_np, G_sink) np.testing.assert_allclose(G_sink.sum(0), G_screen.sum(0), atol=1e-02) np.testing.assert_allclose(G_sink.sum(1), G_screen.sum(1), atol=1e-02) diff --git a/test/test_ot.py b/test/test_ot.py index e8e2d97..3e2d845 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -232,7 +232,7 @@ def test_emd2_multi(): # Gaussian distributions a = gauss(n, m=20, s=5) # m= mean, s= std - ls = np.arange(20, 500, 20) + ls = np.arange(20, 500, 100) nb = len(ls) b = np.zeros((n, nb)) for i in range(nb): diff --git a/test/test_utils.py b/test/test_utils.py index 8b23c22..5ad167b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -62,12 +62,12 @@ def test_tic_toc(): import time ot.tic() - time.sleep(0.5) + time.sleep(0.1) t = ot.toc() t2 = ot.toq() # test timing - np.testing.assert_allclose(0.5, t, rtol=1e-1, atol=1e-1) + np.testing.assert_allclose(0.1, t, rtol=1e-1, atol=1e-1) # test toc vs toq np.testing.assert_allclose(t, t2, rtol=1e-1, atol=1e-1) @@ -94,10 +94,22 @@ def test_unif(): np.testing.assert_allclose(1, np.sum(u)) -def test_dist(): +def test_unif_backend(nx): n = 100 + for tp in nx.__type_list__: + print(nx.dtype_device(tp)) + + u = ot.unif(n, type_as=tp) + + np.testing.assert_allclose(1, np.sum(nx.to_numpy(u)), atol=1e-6) + + +def test_dist(): + + n = 10 + rng = np.random.RandomState(0) x = rng.randn(n, 2) diff --git a/test/test_weak.py b/test/test_weak.py new file mode 100644 index 0000000..c4c3278 --- /dev/null +++ b/test/test_weak.py @@ -0,0 +1,54 @@ +"""Tests for main module ot.weak """ + +# Author: Remi Flamary +# +# License: MIT License + +import ot +import numpy as np + + +def test_weak_ot(): + # test weak ot solver and identity stationary point + n = 50 + rng = np.random.RandomState(0) + + xs = rng.randn(n, 2) + xt = rng.randn(n, 2) + u = ot.utils.unif(n) + + G, log = ot.weak_optimal_transport(xs, xt, u, u, log=True) + + # check constraints + np.testing.assert_allclose(u, G.sum(1)) + np.testing.assert_allclose(u, G.sum(0)) + + # chaeck that identity is recovered + G = ot.weak_optimal_transport(xs, xs, G0=np.eye(n) / n) + + # check G is identity + np.testing.assert_allclose(G, np.eye(n) / n) + + # check constraints + np.testing.assert_allclose(u, G.sum(1)) + np.testing.assert_allclose(u, G.sum(0)) + + +def test_weak_ot_bakends(nx): + # test weak ot solver for different backends + n = 50 + rng = np.random.RandomState(0) + + xs = rng.randn(n, 2) + xt = rng.randn(n, 2) + u = ot.utils.unif(n) + + G = ot.weak_optimal_transport(xs, xt, u, u) + + xs2 = nx.from_numpy(xs) + xt2 = nx.from_numpy(xt) + u2 = nx.from_numpy(u) + + G2 = ot.weak_optimal_transport(xs2, xt2, u2, u2) + + np.testing.assert_allclose(nx.to_numpy(G2), G) -- cgit v1.2.3 From 3302cd48cdcc5d4832997bae921952cc3917fb59 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Wed, 23 Feb 2022 09:53:13 +0100 Subject: [MRG] Build POT against oldest-supported-numpy (local PR) (#349) * Configure setup to compile against oldest supported numpy version using the meta-package: https://pypi.org/project/oldest-supported-numpy/ - * Set minimum Python requirement to `>=3.7` in setup.py since !328 removed Python 3.6 support * Fix typo in pyproject.toml - * Update setup.py * Update setup.py and * build wheels * remove install dependencies for wheels building and build wheels * Apply suggestions from code review Co-authored-by: David M. Ghiurco <9147386+davidghiurco@users.noreply.github.com> * correct timing test add info in release file and build wheels * pep8 and Co-authored-by: David Ghiurco <9147386+davidghiurco@users.noreply.github.com> --- .github/workflows/build_wheels.yml | 8 +------- .github/workflows/build_wheels_weekly.yml | 2 -- RELEASES.md | 1 + docs/source/quickstart.rst | 2 +- pyproject.toml | 2 +- requirements.txt | 1 - setup.py | 5 +++-- test/test_utils.py | 4 +++- 8 files changed, 10 insertions(+), 15 deletions(-) (limited to 'docs/source') diff --git a/.github/workflows/build_wheels.yml b/.github/workflows/build_wheels.yml index c746eb8..475058c 100644 --- a/.github/workflows/build_wheels.yml +++ b/.github/workflows/build_wheels.yml @@ -27,8 +27,6 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt - pip install -U "cython" - name: Install cibuildwheel run: | @@ -37,7 +35,6 @@ jobs: - name: Build wheels env: CIBW_SKIP: "pp*-win* pp*-macosx* cp2* pp* cp36*" # remove pypy on mac and win (wrong version) - CIBW_BEFORE_BUILD: "pip install numpy cython" run: | python -m cibuildwheel --output-dir wheelhouse @@ -65,8 +62,6 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt - pip install -U "cython" - name: Install cibuildwheel run: | @@ -80,8 +75,7 @@ jobs: - name: Build wheels env: - CIBW_SKIP: "pp*-win* pp*-macosx* cp2* pp* cp*musl* cp36*" # remove pypy on mac and win (wrong version) - CIBW_BEFORE_BUILD: "pip install numpy cython" + CIBW_SKIP: "pp*-win* pp*-macosx* cp2* pp* cp*musl*" # remove pypy on mac and win (wrong version) CIBW_ARCHS_LINUX: auto aarch64 # force aarch64 with QEMU CIBW_ARCHS_MACOS: x86_64 universal2 arm64 run: | diff --git a/.github/workflows/build_wheels_weekly.yml b/.github/workflows/build_wheels_weekly.yml index dbf342f..b9154c5 100644 --- a/.github/workflows/build_wheels_weekly.yml +++ b/.github/workflows/build_wheels_weekly.yml @@ -26,8 +26,6 @@ jobs: - name: Install dependencies run: | python -m pip install --upgrade pip - pip install -r requirements.txt - pip install -U "cython" - name: Install cibuildwheel run: | diff --git a/RELEASES.md b/RELEASES.md index 925920a..92b7ba5 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -16,6 +16,7 @@ - Bug in instantiating an `autograd` function (`ValFunction`, Issue #337, PR #338) +- Make POT ABI compatible with old and new numpy (Issue #346, PR #349) ## 0.8.1.0 *December 2021* diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index e74b019..09a362b 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -1002,7 +1002,7 @@ FAQ 2. **pip install POT fails with error : ImportError: No module named Cython.Build** - As discussed shortly in the README file. POT requires to have :code:`numpy` + As discussed shortly in the README file. POT<0.8 requires to have :code:`numpy` and :code:`cython` installed to build. This corner case is not yet handled by :code:`pip` and for now you need to install both library prior to installing POT. diff --git a/pyproject.toml b/pyproject.toml index 93ebab3..3789206 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,3 +1,3 @@ [build-system] -requires = ["setuptools", "wheel", "numpy>=1.20", "cython>=0.23"] +requires = ["setuptools", "wheel", "oldest-supported-numpy", "cython>=0.23"] build-backend = "setuptools.build_meta" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f9934ce..7cbb29a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,5 @@ numpy>=1.20 scipy>=1.3 -cython matplotlib autograd pymanopt==0.2.4; python_version <'3' diff --git a/setup.py b/setup.py index d46ae1c..c03191a 100644 --- a/setup.py +++ b/setup.py @@ -68,8 +68,9 @@ setup( license='MIT', scripts=[], data_files=[], - setup_requires=["numpy>=1.20", "cython>=0.23"], - install_requires=["numpy>=1.20", "scipy>=1.0"], + setup_requires=["oldest-supported-numpy", "cython>=0.23"], + install_requires=["numpy>=1.16", "scipy>=1.0"], + python_requires=">=3.6", classifiers=[ 'Development Status :: 5 - Production/Stable', 'Intended Audience :: Developers', diff --git a/test/test_utils.py b/test/test_utils.py index 5ad167b..3cfd295 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -67,7 +67,9 @@ def test_tic_toc(): t2 = ot.toq() # test timing - np.testing.assert_allclose(0.1, t, rtol=1e-1, atol=1e-1) + # np.testing.assert_allclose(0.1, t, rtol=1e-1, atol=1e-1) + # very slow macos github action equality not possible + assert t > 0.09 # test toc vs toq np.testing.assert_allclose(t, t2, rtol=1e-1, atol=1e-1) -- cgit v1.2.3 From 9b9d2221d257f40ea3eb58b279b30d69162d62bb Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Fri, 18 Mar 2022 08:00:19 +0100 Subject: [MRG] Add logo to POT (#357) * add logo code and logo to doc * update release file --- RELEASES.md | 1 + docs/source/_static/images/logo.png | Bin 0 -> 5038 bytes docs/source/_static/images/logo.svg | 200 +++++++++++++++++++++++++++++++ docs/source/_static/images/logo_dark.png | Bin 0 -> 3437 bytes docs/source/_static/images/logo_dark.svg | 187 +++++++++++++++++++++++++++++ docs/source/conf.py | 4 +- docs/source/index.rst | 5 + examples/others/plot_logo.py | 112 +++++++++++++++++ 8 files changed, 508 insertions(+), 1 deletion(-) create mode 100644 docs/source/_static/images/logo.png create mode 100644 docs/source/_static/images/logo.svg create mode 100644 docs/source/_static/images/logo_dark.png create mode 100644 docs/source/_static/images/logo_dark.svg create mode 100644 examples/others/plot_logo.py (limited to 'docs/source') diff --git a/RELEASES.md b/RELEASES.md index 18562e7..0f1f231 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -5,6 +5,7 @@ #### New features +- A brand new logo for POT (PR #357) - Better list of related examples in quick start guide with `minigallery` (PR #334). - Add optional log-domain Sinkhorn implementation in WDA to support smaller values of the regularization parameter (PR #336). diff --git a/docs/source/_static/images/logo.png b/docs/source/_static/images/logo.png new file mode 100644 index 0000000..7be5df7 Binary files /dev/null and b/docs/source/_static/images/logo.png differ diff --git a/docs/source/_static/images/logo.svg b/docs/source/_static/images/logo.svg new file mode 100644 index 0000000..0bf2cb7 --- /dev/null +++ b/docs/source/_static/images/logo.svg @@ -0,0 +1,200 @@ + + + + + + + + + 2022-03-17T17:25:30.736761 + image/svg+xml + + + Matplotlib v3.3.3, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/_static/images/logo_dark.png b/docs/source/_static/images/logo_dark.png new file mode 100644 index 0000000..f484188 Binary files /dev/null and b/docs/source/_static/images/logo_dark.png differ diff --git a/docs/source/_static/images/logo_dark.svg b/docs/source/_static/images/logo_dark.svg new file mode 100644 index 0000000..56ce2d9 --- /dev/null +++ b/docs/source/_static/images/logo_dark.svg @@ -0,0 +1,187 @@ + + + + + + + + + 2022-03-17T17:25:30.847142 + image/svg+xml + + + Matplotlib v3.3.3, https://matplotlib.org/ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/docs/source/conf.py b/docs/source/conf.py index d1b8426..60d0bb7 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -162,6 +162,7 @@ html_theme = 'sphinx_rtd_theme' # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. + html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. @@ -176,7 +177,7 @@ html_theme_options = {} # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +html_logo = '_static/images/logo_dark.svg' # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 @@ -188,6 +189,7 @@ html_theme_options = {} # so a file named "default.css" will overwrite the builtin "default.css". html_static_path = ['_static'] + # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. diff --git a/docs/source/index.rst b/docs/source/index.rst index 8de31ae..7ff7d22 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -6,6 +6,10 @@ POT: Python Optimal Transport ============================= +.. image:: _static/images/logo.svg + :width: 400 + :alt: POT Logo + Contents -------- @@ -20,6 +24,7 @@ Contents .github/CONTRIBUTING .github/CODE_OF_CONDUCT + .. include:: ../../README.md :parser: myst_parser.sphinx_ diff --git a/examples/others/plot_logo.py b/examples/others/plot_logo.py new file mode 100644 index 0000000..afddcad --- /dev/null +++ b/examples/others/plot_logo.py @@ -0,0 +1,112 @@ + +# -*- coding: utf-8 -*- +r""" +======================= +Logo of the POT toolbox +======================= + +In this example we plot the logo of the POT toolbox. + +A specificity of this logo is that it is done 100% in Python and generated using +matplotlib using the EMD solver from POT. + +""" + +# Author: Remi Flamary +# +# License: MIT License + +# sphinx_gallery_thumbnail_number = 1 + +# %% +import numpy as np +import matplotlib.pyplot as pl +import ot + +# %% +# Data for logo +# ------------- + + +# Letter P +p1 = np.array([[0, 6.], [0, 5], [0, 4], [0, 3], [0, 2], [0, 1], ]) +p2 = np.array([[1.5, 6], [2, 4], [2, 5], [1.5, 3], [0.5, 2], [.5, 1], ]) + +# Letter O +o1 = np.array([[0, 6.], [-1, 5], [-1.5, 4], [-1.5, 3], [-1, 2], [0, 1], ]) +o2 = np.array([[1, 6.], [2, 5], [2.5, 4], [2.5, 3], [2, 2], [1, 1], ]) + +# scaling and translation for letter O +o1[:, 0] += 6.4 +o2[:, 0] += 6.4 +o1[:, 0] *= 0.6 +o2[:, 0] *= 0.6 + +# letter T +t1 = np.array([[-1, 6.], [-1, 5], [0, 4], [0, 3], [0, 2], [0, 1], ]) +t2 = np.array([[1.5, 6.], [1.5, 5], [0.5, 4], [0.5, 3], [0.5, 2], [0.5, 1], ]) + +# translatin the T +t1[:, 0] += 7.1 +t2[:, 0] += 7.1 + +# Cocatenate all letters +x1 = np.concatenate((p1, o1, t1), axis=0) +x2 = np.concatenate((p2, o2, t2), axis=0) + +# Horizontal and vertical scaling +sx = 1.0 +sy = .5 +x1[:, 0] *= sx +x1[:, 1] *= sy +x2[:, 0] *= sx +x2[:, 1] *= sy + +# %% +# Plot the logo (clear background) +# -------------------------------- + +# Solve OT problem between the points +M = ot.dist(x1, x2, metric='euclidean') +T = ot.emd([], [], M) + +pl.figure(1, (3.5, 1.1)) +pl.clf() +# plot the OT plan +for i in range(M.shape[0]): + for j in range(M.shape[1]): + if T[i, j] > 1e-8: + pl.plot([x1[i, 0], x2[j, 0]], [x1[i, 1], x2[j, 1]], color='k', alpha=0.6, linewidth=3, zorder=1) +# plot the samples +pl.plot(x1[:, 0], x1[:, 1], 'o', markerfacecolor='C3', markeredgecolor='k') +pl.plot(x2[:, 0], x2[:, 1], 'o', markerfacecolor='b', markeredgecolor='k') + + +pl.axis('equal') +pl.axis('off') + +# Save logo file +# pl.savefig('logo.svg', dpi=150, bbox_inches='tight') +# pl.savefig('logo.png', dpi=150, bbox_inches='tight') + +# %% +# Plot the logo (dark background) +# -------------------------------- + +pl.figure(2, (3.5, 1.1), facecolor='darkgray') +pl.clf() +# plot the OT plan +for i in range(M.shape[0]): + for j in range(M.shape[1]): + if T[i, j] > 1e-8: + pl.plot([x1[i, 0], x2[j, 0]], [x1[i, 1], x2[j, 1]], color='w', alpha=0.8, linewidth=3, zorder=1) +# plot the samples +pl.plot(x1[:, 0], x1[:, 1], 'o', markerfacecolor='w', markeredgecolor='w') +pl.plot(x2[:, 0], x2[:, 1], 'o', markerfacecolor='w', markeredgecolor='w') + +pl.axis('equal') +pl.axis('off') + +# Save logo file +# pl.savefig('logo_dark.svg', dpi=150, transparent=True, bbox_inches='tight') +# pl.savefig('logo_dark.png', dpi=150, transparent=True, bbox_inches='tight') -- cgit v1.2.3 From 82452e0f5f6dae05c7a1cc384e7a1fb62ae7e0d5 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Thu, 24 Mar 2022 14:13:25 +0100 Subject: [MRG] Add factored coupling (#358) * add gfactored ot * pep8 and add doc * add exmaple for factotred OT * final number of PR * correct test on backends * remove useless loss * better tests --- README.md | 4 +- RELEASES.md | 1 + docs/source/all.rst | 1 + examples/others/plot_factored_coupling.py | 86 ++++++++++++++++++ ot/__init__.py | 5 ++ ot/factored.py | 145 ++++++++++++++++++++++++++++++ ot/plot.py | 7 +- test/test_factored.py | 56 ++++++++++++ 8 files changed, 303 insertions(+), 2 deletions(-) create mode 100644 examples/others/plot_factored_coupling.py create mode 100644 ot/factored.py create mode 100644 test/test_factored.py (limited to 'docs/source') diff --git a/README.md b/README.md index c6bfd9c..ec5d221 100644 --- a/README.md +++ b/README.md @@ -305,4 +305,6 @@ Conference on Machine Learning, PMLR 119:4692-4701, 2020 [38] C. Vincent-Cuaz, T. Vayer, R. Flamary, M. Corneli, N. Courty, [Online Graph Dictionary Learning](https://arxiv.org/pdf/2102.06555.pdf), International Conference on Machine Learning (ICML), 2021. -[39] Gozlan, N., Roberto, C., Samson, P. M., & Tetali, P. (2017). [Kantorovich duality for general transport costs and applications](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.712.1825&rep=rep1&type=pdf). Journal of Functional Analysis, 273(11), 3327-3405. \ No newline at end of file +[39] Gozlan, N., Roberto, C., Samson, P. M., & Tetali, P. (2017). [Kantorovich duality for general transport costs and applications](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.712.1825&rep=rep1&type=pdf). Journal of Functional Analysis, 273(11), 3327-3405. + +[40] Forrow, A., Hütter, J. C., Nitzan, M., Rigollet, P., Schiebinger, G., & Weed, J. (2019, April). [Statistical optimal transport via factored couplings](http://proceedings.mlr.press/v89/forrow19a/forrow19a.pdf). In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 2454-2465). PMLR. \ No newline at end of file diff --git a/RELEASES.md b/RELEASES.md index 86b401a..c2bd0d1 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -5,6 +5,7 @@ #### New features +- Implementation of factored OT with emd and sinkhorn (PR #358). - A brand new logo for POT (PR #357) - Better list of related examples in quick start guide with `minigallery` (PR #334). - Add optional log-domain Sinkhorn implementation in WDA to support smaller values diff --git a/docs/source/all.rst b/docs/source/all.rst index 76d2ff5..3f7d029 100644 --- a/docs/source/all.rst +++ b/docs/source/all.rst @@ -29,6 +29,7 @@ API and modules partial sliced weak + factored .. autosummary:: :toctree: ../modules/generated/ diff --git a/examples/others/plot_factored_coupling.py b/examples/others/plot_factored_coupling.py new file mode 100644 index 0000000..b5b1c9f --- /dev/null +++ b/examples/others/plot_factored_coupling.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +""" +========================================== +Optimal transport with factored couplings +========================================== + +Illustration of the factored coupling OT between 2D empirical distributions + +""" + +# Author: Remi Flamary +# +# License: MIT License + +# sphinx_gallery_thumbnail_number = 2 + +import numpy as np +import matplotlib.pylab as pl +import ot +import ot.plot + +# %% +# Generate data an plot it +# ------------------------ + +# parameters and data generation + +np.random.seed(42) + +n = 100 # nb samples + +xs = np.random.rand(n, 2) - .5 + +xs = xs + np.sign(xs) + +xt = np.random.rand(n, 2) - .5 + +a, b = ot.unif(n), ot.unif(n) # uniform distribution on samples + +#%% plot samples + +pl.figure(1) +pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples') +pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples') +pl.legend(loc=0) +pl.title('Source and target distributions') + + +# %% +# Compute Factore OT and exact OT solutions +# -------------------------------------- + +#%% EMD +M = ot.dist(xs, xt) +G0 = ot.emd(a, b, M) + +#%% factored OT OT + +Ga, Gb, xb = ot.factored_optimal_transport(xs, xt, a, b, r=4) + + +# %% +# Plot factored OT and exact OT solutions +# -------------------------------------- + +pl.figure(2, (14, 4)) + +pl.subplot(1, 3, 1) +ot.plot.plot2D_samples_mat(xs, xt, G0, c=[.2, .2, .2], alpha=0.1) +pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples') +pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples') +pl.title('Exact OT with samples') + +pl.subplot(1, 3, 2) +ot.plot.plot2D_samples_mat(xs, xb, Ga, c=[.6, .6, .9], alpha=0.5) +ot.plot.plot2D_samples_mat(xb, xt, Gb, c=[.9, .6, .6], alpha=0.5) +pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples') +pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples') +pl.plot(xb[:, 0], xb[:, 1], 'og', label='Template samples') +pl.title('Factored OT with template samples') + +pl.subplot(1, 3, 3) +ot.plot.plot2D_samples_mat(xs, xt, Ga.dot(Gb), c=[.2, .2, .2], alpha=0.1) +pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples') +pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples') +pl.title('Factored OT low rank OT plan') diff --git a/ot/__init__.py b/ot/__init__.py index bda7a35..c5e1967 100644 --- a/ot/__init__.py +++ b/ot/__init__.py @@ -33,6 +33,7 @@ from . import partial from . import backend from . import regpath from . import weak +from . import factored # OT functions from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d @@ -44,6 +45,9 @@ from .sliced import sliced_wasserstein_distance, max_sliced_wasserstein_distance from .gromov import (gromov_wasserstein, gromov_wasserstein2, gromov_barycenters, fused_gromov_wasserstein, fused_gromov_wasserstein2) from .weak import weak_optimal_transport +from .factored import factored_optimal_transport + + # utils functions from .utils import dist, unif, tic, toc, toq @@ -57,4 +61,5 @@ __all__ = ['emd', 'emd2', 'emd_1d', 'sinkhorn', 'sinkhorn2', 'utils', 'sinkhorn_unbalanced2', 'sliced_wasserstein_distance', 'gromov_wasserstein', 'gromov_wasserstein2', 'gromov_barycenters', 'fused_gromov_wasserstein', 'fused_gromov_wasserstein2', 'max_sliced_wasserstein_distance', 'weak_optimal_transport', + 'factored_optimal_transport', 'smooth', 'stochastic', 'unbalanced', 'partial', 'regpath'] diff --git a/ot/factored.py b/ot/factored.py new file mode 100644 index 0000000..abc2445 --- /dev/null +++ b/ot/factored.py @@ -0,0 +1,145 @@ +""" +Factored OT solvers (low rank, cost or OT plan) +""" + +# Author: Remi Flamary +# +# License: MIT License + +from .backend import get_backend +from .utils import dist +from .lp import emd +from .bregman import sinkhorn + +__all__ = ['factored_optimal_transport'] + + +def factored_optimal_transport(Xa, Xb, a=None, b=None, reg=0.0, r=100, X0=None, stopThr=1e-7, numItermax=100, verbose=False, log=False, **kwargs): + r"""Solves factored OT problem and return OT plans and intermediate distribution + + This function solve the following OT problem [40]_ + + .. math:: + \mathop{\arg \min}_\mu \quad W_2^2(\mu_a,\mu)+ W_2^2(\mu,\mu_b) + + where : + + - :math:`\mu_a` and :math:`\mu_b` are empirical distributions. + - :math:`\mu` is an empirical distribution with r samples + + And returns the two OT plans between + + .. note:: This function is backend-compatible and will work on arrays + from all compatible backends. But the algorithm uses the C++ CPU backend + which can lead to copy overhead on GPU arrays. + + Uses the conditional gradient algorithm to solve the problem proposed in + :ref:`[39] `. + + Parameters + ---------- + Xa : (ns,d) array-like, float + Source samples + Xb : (nt,d) array-like, float + Target samples + a : (ns,) array-like, float + Source histogram (uniform weight if empty list) + b : (nt,) array-like, float + Target histogram (uniform weight if empty list)) + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshold on the relative variation (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + + Returns + ------- + Ga: array-like, shape (ns, r) + Optimal transportation matrix between source and the intermediate + distribution + Gb: array-like, shape (r, nt) + Optimal transportation matrix between the intermediate and target + distribution + X: array-like, shape (r, d) + Support of the intermediate distribution + log: dict, optional + If input log is true, a dictionary containing the cost and dual + variables and exit status + + + .. _references-factored: + References + ---------- + .. [40] Forrow, A., Hütter, J. C., Nitzan, M., Rigollet, P., Schiebinger, + G., & Weed, J. (2019, April). Statistical optimal transport via factored + couplings. In The 22nd International Conference on Artificial + Intelligence and Statistics (pp. 2454-2465). PMLR. + + See Also + -------- + ot.bregman.sinkhorn : Entropic regularized OT ot.optim.cg : General + regularized OT + """ + + nx = get_backend(Xa, Xb) + + n_a = Xa.shape[0] + n_b = Xb.shape[0] + d = Xa.shape[1] + + if a is None: + a = nx.ones((n_a), type_as=Xa) / n_a + if b is None: + b = nx.ones((n_b), type_as=Xb) / n_b + + if X0 is None: + X = nx.randn(r, d, type_as=Xa) + else: + X = X0 + + w = nx.ones(r, type_as=Xa) / r + + def solve_ot(X1, X2, w1, w2): + M = dist(X1, X2) + if reg > 0: + G, log = sinkhorn(w1, w2, M, reg, log=True, **kwargs) + log['cost'] = nx.sum(G * M) + return G, log + else: + return emd(w1, w2, M, log=True, **kwargs) + + norm_delta = [] + + # solve the barycenter + for i in range(numItermax): + + old_X = X + + # solve OT with template + Ga, loga = solve_ot(Xa, X, a, w) + Gb, logb = solve_ot(X, Xb, w, b) + + X = 0.5 * (nx.dot(Ga.T, Xa) + nx.dot(Gb, Xb)) * r + + delta = nx.norm(X - old_X) + if delta < stopThr: + break + if log: + norm_delta.append(delta) + + if log: + log_dic = {'delta_iter': norm_delta, + 'ua': loga['u'], + 'va': loga['v'], + 'ub': logb['u'], + 'vb': logb['v'], + 'costa': loga['cost'], + 'costb': logb['cost'], + } + return Ga, Gb, X, log_dic + + return Ga, Gb, X diff --git a/ot/plot.py b/ot/plot.py index 2208c90..8ade2eb 100644 --- a/ot/plot.py +++ b/ot/plot.py @@ -85,8 +85,13 @@ def plot2D_samples_mat(xs, xt, G, thr=1e-8, **kwargs): if ('color' not in kwargs) and ('c' not in kwargs): kwargs['color'] = 'k' mx = G.max() + if 'alpha' in kwargs: + scale = kwargs['alpha'] + del kwargs['alpha'] + else: + scale = 1 for i in range(xs.shape[0]): for j in range(xt.shape[0]): if G[i, j] / mx > thr: pl.plot([xs[i, 0], xt[j, 0]], [xs[i, 1], xt[j, 1]], - alpha=G[i, j] / mx, **kwargs) + alpha=G[i, j] / mx * scale, **kwargs) diff --git a/test/test_factored.py b/test/test_factored.py new file mode 100644 index 0000000..fd2fd01 --- /dev/null +++ b/test/test_factored.py @@ -0,0 +1,56 @@ +"""Tests for main module ot.weak """ + +# Author: Remi Flamary +# +# License: MIT License + +import ot +import numpy as np + + +def test_factored_ot(): + # test weak ot solver and identity stationary point + n = 50 + rng = np.random.RandomState(0) + + xs = rng.randn(n, 2) + xt = rng.randn(n, 2) + u = ot.utils.unif(n) + + Ga, Gb, X, log = ot.factored_optimal_transport(xs, xt, u, u, r=10, log=True) + + # check constraints + np.testing.assert_allclose(u, Ga.sum(1)) + np.testing.assert_allclose(u, Gb.sum(0)) + + Ga, Gb, X, log = ot.factored_optimal_transport(xs, xt, u, u, reg=1, r=10, log=True) + + # check constraints + np.testing.assert_allclose(u, Ga.sum(1)) + np.testing.assert_allclose(u, Gb.sum(0)) + + +def test_factored_ot_backends(nx): + # test weak ot solver for different backends + n = 50 + rng = np.random.RandomState(0) + + xs = rng.randn(n, 2) + xt = rng.randn(n, 2) + u = ot.utils.unif(n) + + xs2 = nx.from_numpy(xs) + xt2 = nx.from_numpy(xt) + u2 = nx.from_numpy(u) + + Ga2, Gb2, X2 = ot.factored_optimal_transport(xs2, xt2, u2, u2, r=10) + + # check constraints + np.testing.assert_allclose(u, nx.to_numpy(Ga2).sum(1)) + np.testing.assert_allclose(u, nx.to_numpy(Gb2).sum(0)) + + Ga2, Gb2, X2 = ot.factored_optimal_transport(xs2, xt2, reg=1, r=10, X0=X2) + + # check constraints + np.testing.assert_allclose(u, nx.to_numpy(Ga2).sum(1)) + np.testing.assert_allclose(u, nx.to_numpy(Gb2).sum(0)) -- cgit v1.2.3 From ad02112d4288f3efdd5bc6fc6e45444313bba871 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Tue, 5 Apr 2022 11:57:10 +0200 Subject: [MRG] Update examples in the doc (#359) * add transparent color logo * add transparent color logo * move screenkhorn * move stochastic and install ffmpeg on circleci * try something * add sudo * install ffmpeg before python * cleanup examples * test svg scrapper * add animation for reg path * better example OT sivergence * update ttles and add plots * update free support * proper figure indexes * have less frame sin animation * update readme and release file * add tests for python 3.10 --- .circleci/config.yml | 7 + .github/workflows/build_tests.yml | 6 +- README.md | 10 +- RELEASES.md | 3 +- docs/source/_static/images/logo.png | Bin 5038 -> 4325 bytes docs/source/_static/images/logo.svg | 174 +++++++++---------- docs/source/conf.py | 6 + .../backends/plot_sliced_wass_grad_flow_pytorch.py | 2 + examples/backends/plot_wass1d_torch.py | 8 +- .../barycenters/plot_free_support_barycenter.py | 55 +++--- examples/others/plot_logo.py | 8 +- examples/others/plot_screenkhorn_1D.py | 71 ++++++++ examples/others/plot_stochastic.py | 189 +++++++++++++++++++++ examples/plot_OT_1D.py | 12 +- examples/plot_OT_1D_smooth.py | 6 +- examples/plot_OT_2D_samples.py | 2 +- examples/plot_OT_L1_vs_L2.py | 32 ++-- examples/plot_compute_emd.py | 72 +++++--- examples/plot_optim_OTreg.py | 38 ++++- examples/plot_screenkhorn_1D.py | 71 -------- examples/plot_stochastic.py | 189 --------------------- examples/sliced-wasserstein/README.txt | 2 +- examples/sliced-wasserstein/plot_variance.py | 8 +- examples/unbalanced-partial/plot_UOT_1D.py | 17 +- examples/unbalanced-partial/plot_regpath.py | 88 +++++++++- 25 files changed, 628 insertions(+), 448 deletions(-) create mode 100644 examples/others/plot_screenkhorn_1D.py create mode 100644 examples/others/plot_stochastic.py delete mode 100644 examples/plot_screenkhorn_1D.py delete mode 100644 examples/plot_stochastic.py (limited to 'docs/source') diff --git a/.circleci/config.yml b/.circleci/config.yml index 39c19fb..77ab45c 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -35,6 +35,12 @@ jobs: - data-cache-0 - pip-cache + - run: + name: Install ffmpeg + command: | + sudo apt update + sudo apt install ffmpeg + - run: name: Get Python running command: | @@ -50,6 +56,7 @@ jobs: paths: - ~/.cache/pip + # Look at what we have and fail early if there is some library conflict - run: name: Check installation diff --git a/.github/workflows/build_tests.yml b/.github/workflows/build_tests.yml index 3c99da8..ce725c6 100644 --- a/.github/workflows/build_tests.yml +++ b/.github/workflows/build_tests.yml @@ -22,7 +22,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: ["3.7", "3.8", "3.9"] + python-version: ["3.7", "3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v1 @@ -93,7 +93,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: ["3.7", "3.8", "3.9"] + python-version: ["3.7", "3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v1 @@ -120,7 +120,7 @@ jobs: strategy: max-parallel: 4 matrix: - python-version: ["3.7", "3.8", "3.9"] + python-version: ["3.7", "3.8", "3.9", "3.10"] steps: - uses: actions/checkout@v1 diff --git a/README.md b/README.md index ec5d221..0c3bd19 100644 --- a/README.md +++ b/README.md @@ -29,8 +29,11 @@ POT provides the following generic OT solvers (links to examples): * Non regularized [Wasserstein barycenters [16] ](https://pythonot.github.io/auto_examples/barycenters/plot_barycenter_lp_vs_entropic.html)) with LP solver (only small scale). * [Gromov-Wasserstein distances](https://pythonot.github.io/auto_examples/gromov/plot_gromov.html) and [GW barycenters](https://pythonot.github.io/auto_examples/gromov/plot_gromov_barycenter.html) (exact [13] and regularized [12]), differentiable using gradients from * [Fused-Gromov-Wasserstein distances solver](https://pythonot.github.io/auto_examples/gromov/plot_fgw.html#sphx-glr-auto-examples-plot-fgw-py) and [FGW barycenters](https://pythonot.github.io/auto_examples/gromov/plot_barycenter_fgw.html) [24] -* [Stochastic solver](https://pythonot.github.io/auto_examples/plot_stochastic.html) for Large-scale Optimal Transport (semi-dual problem [18] and dual problem [19]) -* [Stochastic solver of Gromov Wasserstein](https://pythonot.github.io/auto_examples/gromov/plot_gromov.html) for large-scale problem with any loss functions [33] +* [Stochastic + solver](https://pythonot.github.io/auto_examples/others/plot_stochastic.html) and + [differentiable losses](https://pythonot.github.io/auto_examples/backends/plot_stoch_continuous_ot_pytorch.html) for + Large-scale Optimal Transport (semi-dual problem [18] and dual problem [19]) +* [Sampled solver of Gromov Wasserstein](https://pythonot.github.io/auto_examples/gromov/plot_gromov.html) for large-scale problem with any loss functions [33] * Non regularized [free support Wasserstein barycenters](https://pythonot.github.io/auto_examples/barycenters/plot_free_support_barycenter.html) [20]. * [Unbalanced OT](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_UOT_1D.html) with KL relaxation and [barycenter](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_UOT_barycenter_1D.html) [10, 25]. * [Partial Wasserstein and Gromov-Wasserstein](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_partial_wass_and_gromov.html) (exact [29] and entropic [3] @@ -119,7 +122,7 @@ Note that for easier access the module is named `ot` instead of `pot`. ### Dependencies -Some sub-modules require additional dependences which are discussed below +Some sub-modules require additional dependencies which are discussed below * **ot.dr** (Wasserstein dimensionality reduction) depends on autograd and pymanopt that can be installed with: @@ -127,7 +130,6 @@ Some sub-modules require additional dependences which are discussed below pip install pymanopt autograd ``` -* **ot.gpu** (GPU accelerated OT) depends on cupy that have to be installed following instructions on [this page](https://docs-cupy.chainer.org/en/stable/install.html). Obviously you will need CUDA installed and a compatible GPU. Note that this module is deprecated since version 0.8 and will be deleted in the future. GPU is now handled automatically through the backends and several solver already can run on GPU using the Pytorch backend. ## Examples diff --git a/RELEASES.md b/RELEASES.md index 45336f7..7d458f3 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -5,6 +5,7 @@ #### New features +- Update examples in the gallery (PR #359). - Add stochastic loss and OT plan computation for regularized OT and backend examples(PR #360). - Implementation of factored OT with emd and sinkhorn (PR #358). @@ -254,7 +255,7 @@ are coming for the next versions. #### Closed issues -- Add JMLR paper to teh readme ad Mathieu Blondel to the Acknoledgments (PR +- Add JMLR paper to the readme and Mathieu Blondel to the Acknoledgments (PR #231, #232) - Bug in Unbalanced OT example (Issue #127) - Clean Cython output when calling setup.py clean (Issue #122) diff --git a/docs/source/_static/images/logo.png b/docs/source/_static/images/logo.png index 7be5df7..2dd6f65 100644 Binary files a/docs/source/_static/images/logo.png and b/docs/source/_static/images/logo.png differ diff --git a/docs/source/_static/images/logo.svg b/docs/source/_static/images/logo.svg index 0bf2cb7..39fe900 100644 --- a/docs/source/_static/images/logo.svg +++ b/docs/source/_static/images/logo.svg @@ -1,24 +1,23 @@ - - + - + - 2022-03-17T17:25:30.736761 + 2022-03-30T17:25:32.476826 image/svg+xml - Matplotlib v3.3.3, https://matplotlib.org/ + Matplotlib v3.5.1, https://matplotlib.org/ - + @@ -26,103 +25,104 @@ L 209.7 75.384 L 209.7 0 L 0 0 +L 0 75.384 z -" style="fill:#ffffff;"/> +" style="fill: none"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" clip-path="url(#p367fff45ba)" style="fill: none; stroke: #000000; stroke-opacity: 0.6; stroke-width: 3; stroke-linecap: square"/> - +" style="stroke: #000000"/> - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - +" style="stroke: #000000"/> - - - - - - - - - - - - - - - - - - - + + + + + + + + + + + + + + + + + + + - - + + diff --git a/docs/source/conf.py b/docs/source/conf.py index 60d0bb7..9526518 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -17,9 +17,15 @@ import os import re try: import sphinx_gallery + except ImportError: print("warning sphinx-gallery not installed") + + + + + # !!!! allow readthedoc compilation try: from unittest.mock import MagicMock diff --git a/examples/backends/plot_sliced_wass_grad_flow_pytorch.py b/examples/backends/plot_sliced_wass_grad_flow_pytorch.py index 05b9952..cf5d64d 100644 --- a/examples/backends/plot_sliced_wass_grad_flow_pytorch.py +++ b/examples/backends/plot_sliced_wass_grad_flow_pytorch.py @@ -27,6 +27,8 @@ Machine Learning (pp. 4104-4113). PMLR. # # License: MIT License +# sphinx_gallery_thumbnail_number = 4 + # %% # Loading the data diff --git a/examples/backends/plot_wass1d_torch.py b/examples/backends/plot_wass1d_torch.py index 0abdd6d..cd8e2fd 100644 --- a/examples/backends/plot_wass1d_torch.py +++ b/examples/backends/plot_wass1d_torch.py @@ -1,9 +1,9 @@ r""" -================================= -Wasserstein 1D with PyTorch -================================= +================================================= +Wasserstein 1D (flow and barycenter) with PyTorch +================================================= -In this small example, we consider the following minization problem: +In this small example, we consider the following minimization problem: .. math:: \mu^* = \min_\mu W(\mu,\nu) diff --git a/examples/barycenters/plot_free_support_barycenter.py b/examples/barycenters/plot_free_support_barycenter.py index 2d68a39..226dfeb 100644 --- a/examples/barycenters/plot_free_support_barycenter.py +++ b/examples/barycenters/plot_free_support_barycenter.py @@ -9,61 +9,62 @@ sum of diracs. """ -# Author: Vivien Seguy +# Authors: Vivien Seguy +# Rémi Flamary # # License: MIT License +# sphinx_gallery_thumbnail_number = 2 + import numpy as np import matplotlib.pylab as pl import ot -############################################################################## +# %% # Generate data # ------------- -N = 3 +N = 2 d = 2 -measures_locations = [] -measures_weights = [] - -for i in range(N): - n_i = np.random.randint(low=1, high=20) # nb samples +I1 = pl.imread('../../data/redcross.png').astype(np.float64)[::4, ::4, 2] +I2 = pl.imread('../../data/duck.png').astype(np.float64)[::4, ::4, 2] - mu_i = np.random.normal(0., 4., (d,)) # Gaussian mean +sz = I2.shape[0] +XX, YY = np.meshgrid(np.arange(sz), np.arange(sz)) - A_i = np.random.rand(d, d) - cov_i = np.dot(A_i, A_i.transpose()) # Gaussian covariance matrix +x1 = np.stack((XX[I1 == 0], YY[I1 == 0]), 1) * 1.0 +x2 = np.stack((XX[I2 == 0] + 80, -YY[I2 == 0] + 32), 1) * 1.0 +x3 = np.stack((XX[I2 == 0], -YY[I2 == 0] + 32), 1) * 1.0 - x_i = ot.datasets.make_2D_samples_gauss(n_i, mu_i, cov_i) # Dirac locations - b_i = np.random.uniform(0., 1., (n_i,)) - b_i = b_i / np.sum(b_i) # Dirac weights +measures_locations = [x1, x2] +measures_weights = [ot.unif(x1.shape[0]), ot.unif(x2.shape[0])] - measures_locations.append(x_i) - measures_weights.append(b_i) +pl.figure(1, (12, 4)) +pl.scatter(x1[:, 0], x1[:, 1], alpha=0.5) +pl.scatter(x2[:, 0], x2[:, 1], alpha=0.5) +pl.title('Distributions') -############################################################################## +# %% # Compute free support barycenter # ------------------------------- -k = 10 # number of Diracs of the barycenter +k = 200 # number of Diracs of the barycenter X_init = np.random.normal(0., 1., (k, d)) # initial Dirac locations b = np.ones((k,)) / k # weights of the barycenter (it will not be optimized, only the locations are optimized) X = ot.lp.free_support_barycenter(measures_locations, measures_weights, X_init, b) - -############################################################################## -# Plot data +# %% +# Plot the barycenter # --------- -pl.figure(1) -for (x_i, b_i) in zip(measures_locations, measures_weights): - color = np.random.randint(low=1, high=10 * N) - pl.scatter(x_i[:, 0], x_i[:, 1], s=b_i * 1000, label='input measure') -pl.scatter(X[:, 0], X[:, 1], s=b * 1000, c='black', marker='^', label='2-Wasserstein barycenter') +pl.figure(2, (8, 3)) +pl.scatter(x1[:, 0], x1[:, 1], alpha=0.5) +pl.scatter(x2[:, 0], x2[:, 1], alpha=0.5) +pl.scatter(X[:, 0], X[:, 1], s=b * 1000, marker='s', label='2-Wasserstein barycenter') pl.title('Data measures and their barycenter') -pl.legend(loc=0) +pl.legend(loc="lower right") pl.show() diff --git a/examples/others/plot_logo.py b/examples/others/plot_logo.py index afddcad..9414371 100644 --- a/examples/others/plot_logo.py +++ b/examples/others/plot_logo.py @@ -7,8 +7,8 @@ Logo of the POT toolbox In this example we plot the logo of the POT toolbox. -A specificity of this logo is that it is done 100% in Python and generated using -matplotlib using the EMD solver from POT. +This logo is that it is done 100% in Python and generated using +matplotlib and ploting teh solution of the EMD solver from POT. """ @@ -86,8 +86,8 @@ pl.axis('equal') pl.axis('off') # Save logo file -# pl.savefig('logo.svg', dpi=150, bbox_inches='tight') -# pl.savefig('logo.png', dpi=150, bbox_inches='tight') +# pl.savefig('logo.svg', dpi=150, transparent=True, bbox_inches='tight') +# pl.savefig('logo.png', dpi=150, transparent=True, bbox_inches='tight') # %% # Plot the logo (dark background) diff --git a/examples/others/plot_screenkhorn_1D.py b/examples/others/plot_screenkhorn_1D.py new file mode 100644 index 0000000..2023649 --- /dev/null +++ b/examples/others/plot_screenkhorn_1D.py @@ -0,0 +1,71 @@ +# -*- coding: utf-8 -*- +""" +======================================== +Screened optimal transport (Screenkhorn) +======================================== + +This example illustrates the computation of Screenkhorn [26]. + +[26] Alaya M. Z., Bérar M., Gasso G., Rakotomamonjy A. (2019). +Screening Sinkhorn Algorithm for Regularized Optimal Transport, +Advances in Neural Information Processing Systems 33 (NeurIPS). +""" + +# Author: Mokhtar Z. Alaya +# +# License: MIT License + +import numpy as np +import matplotlib.pylab as pl +import ot.plot +from ot.datasets import make_1D_gauss as gauss +from ot.bregman import screenkhorn + +############################################################################## +# Generate data +# ------------- + +#%% parameters + +n = 100 # nb bins + +# bin positions +x = np.arange(n, dtype=np.float64) + +# Gaussian distributions +a = gauss(n, m=20, s=5) # m= mean, s= std +b = gauss(n, m=60, s=10) + +# loss matrix +M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1))) +M /= M.max() + +############################################################################## +# Plot distributions and loss matrix +# ---------------------------------- + +#%% plot the distributions + +pl.figure(1, figsize=(6.4, 3)) +pl.plot(x, a, 'b', label='Source distribution') +pl.plot(x, b, 'r', label='Target distribution') +pl.legend() + +# plot distributions and loss matrix + +pl.figure(2, figsize=(5, 5)) +ot.plot.plot1D_mat(a, b, M, 'Cost matrix M') + +############################################################################## +# Solve Screenkhorn +# ----------------------- + +# Screenkhorn +lambd = 2e-03 # entropy parameter +ns_budget = 30 # budget number of points to be keeped in the source distribution +nt_budget = 30 # budget number of points to be keeped in the target distribution + +G_screen = screenkhorn(a, b, M, lambd, ns_budget, nt_budget, uniform=False, restricted=True, verbose=True) +pl.figure(4, figsize=(5, 5)) +ot.plot.plot1D_mat(a, b, G_screen, 'OT matrix Screenkhorn') +pl.show() diff --git a/examples/others/plot_stochastic.py b/examples/others/plot_stochastic.py new file mode 100644 index 0000000..3a1ef31 --- /dev/null +++ b/examples/others/plot_stochastic.py @@ -0,0 +1,189 @@ +""" +=================== +Stochastic examples +=================== + +This example is designed to show how to use the stochatic optimization +algorithms for discrete and semi-continuous measures from the POT library. + +[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. +Stochastic Optimization for Large-scale Optimal Transport. +Advances in Neural Information Processing Systems (2016). + +[19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A. & +Blondel, M. Large-scale Optimal Transport and Mapping Estimation. +International Conference on Learning Representation (2018) + +""" + +# Author: Kilian Fatras +# +# License: MIT License + +import matplotlib.pylab as pl +import numpy as np +import ot +import ot.plot + + +############################################################################# +# Compute the Transportation Matrix for the Semi-Dual Problem +# ----------------------------------------------------------- +# +# Discrete case +# ````````````` +# +# Sample two discrete measures for the discrete case and compute their cost +# matrix c. + +n_source = 7 +n_target = 4 +reg = 1 +numItermax = 1000 + +a = ot.utils.unif(n_source) +b = ot.utils.unif(n_target) + +rng = np.random.RandomState(0) +X_source = rng.randn(n_source, 2) +Y_target = rng.randn(n_target, 2) +M = ot.dist(X_source, Y_target) + +############################################################################# +# Call the "SAG" method to find the transportation matrix in the discrete case + +method = "SAG" +sag_pi = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method, + numItermax) +print(sag_pi) + +############################################################################# +# Semi-Continuous Case +# ```````````````````` +# +# Sample one general measure a, one discrete measures b for the semicontinous +# case, the points where source and target measures are defined and compute the +# cost matrix. + +n_source = 7 +n_target = 4 +reg = 1 +numItermax = 1000 +log = True + +a = ot.utils.unif(n_source) +b = ot.utils.unif(n_target) + +rng = np.random.RandomState(0) +X_source = rng.randn(n_source, 2) +Y_target = rng.randn(n_target, 2) +M = ot.dist(X_source, Y_target) + +############################################################################# +# Call the "ASGD" method to find the transportation matrix in the semicontinous +# case. + +method = "ASGD" +asgd_pi, log_asgd = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method, + numItermax, log=log) +print(log_asgd['alpha'], log_asgd['beta']) +print(asgd_pi) + +############################################################################# +# Compare the results with the Sinkhorn algorithm + +sinkhorn_pi = ot.sinkhorn(a, b, M, reg) +print(sinkhorn_pi) + + +############################################################################## +# Plot Transportation Matrices +# ```````````````````````````` +# +# For SAG + +pl.figure(4, figsize=(5, 5)) +ot.plot.plot1D_mat(a, b, sag_pi, 'semi-dual : OT matrix SAG') +pl.show() + + +############################################################################## +# For ASGD + +pl.figure(4, figsize=(5, 5)) +ot.plot.plot1D_mat(a, b, asgd_pi, 'semi-dual : OT matrix ASGD') +pl.show() + + +############################################################################## +# For Sinkhorn + +pl.figure(4, figsize=(5, 5)) +ot.plot.plot1D_mat(a, b, sinkhorn_pi, 'OT matrix Sinkhorn') +pl.show() + + +############################################################################# +# Compute the Transportation Matrix for the Dual Problem +# ------------------------------------------------------ +# +# Semi-continuous case +# ```````````````````` +# +# Sample one general measure a, one discrete measures b for the semi-continuous +# case and compute the cost matrix c. + +n_source = 7 +n_target = 4 +reg = 1 +numItermax = 100000 +lr = 0.1 +batch_size = 3 +log = True + +a = ot.utils.unif(n_source) +b = ot.utils.unif(n_target) + +rng = np.random.RandomState(0) +X_source = rng.randn(n_source, 2) +Y_target = rng.randn(n_target, 2) +M = ot.dist(X_source, Y_target) + +############################################################################# +# +# Call the "SGD" dual method to find the transportation matrix in the +# semi-continuous case + +sgd_dual_pi, log_sgd = ot.stochastic.solve_dual_entropic(a, b, M, reg, + batch_size, numItermax, + lr, log=log) +print(log_sgd['alpha'], log_sgd['beta']) +print(sgd_dual_pi) + +############################################################################# +# +# Compare the results with the Sinkhorn algorithm +# ``````````````````````````````````````````````` +# +# Call the Sinkhorn algorithm from POT + +sinkhorn_pi = ot.sinkhorn(a, b, M, reg) +print(sinkhorn_pi) + +############################################################################## +# Plot Transportation Matrices +# ```````````````````````````` +# +# For SGD + +pl.figure(4, figsize=(5, 5)) +ot.plot.plot1D_mat(a, b, sgd_dual_pi, 'dual : OT matrix SGD') +pl.show() + + +############################################################################## +# For Sinkhorn + +pl.figure(4, figsize=(5, 5)) +ot.plot.plot1D_mat(a, b, sinkhorn_pi, 'OT matrix Sinkhorn') +pl.show() diff --git a/examples/plot_OT_1D.py b/examples/plot_OT_1D.py index 15ead96..62f0b7d 100644 --- a/examples/plot_OT_1D.py +++ b/examples/plot_OT_1D.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- """ -==================== -1D optimal transport -==================== +====================================== +Optimal Transport for 1D distributions +====================================== This example illustrates the computation of EMD and Sinkhorn transport plans and their visualization. @@ -64,7 +64,11 @@ ot.plot.plot1D_mat(a, b, M, 'Cost matrix M') #%% EMD -G0 = ot.emd(a, b, M) +# use fast 1D solver +G0 = ot.emd_1d(x, x, a, b) + +# Equivalent to +# G0 = ot.emd(a, b, M) pl.figure(3, figsize=(5, 5)) ot.plot.plot1D_mat(a, b, G0, 'OT matrix G0') diff --git a/examples/plot_OT_1D_smooth.py b/examples/plot_OT_1D_smooth.py index b07f99f..5415e4f 100644 --- a/examples/plot_OT_1D_smooth.py +++ b/examples/plot_OT_1D_smooth.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- """ -=========================== -1D smooth optimal transport -=========================== +================================ +Smooth optimal transport example +================================ This example illustrates the computation of EMD, Sinkhorn and smooth OT plans and their visualization. diff --git a/examples/plot_OT_2D_samples.py b/examples/plot_OT_2D_samples.py index c3a7cd8..1d82fb8 100644 --- a/examples/plot_OT_2D_samples.py +++ b/examples/plot_OT_2D_samples.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ ==================================================== -2D Optimal transport between empirical distributions +Optimal Transport between 2D empirical distributions ==================================================== Illustration of 2D optimal transport between discributions that are weighted diff --git a/examples/plot_OT_L1_vs_L2.py b/examples/plot_OT_L1_vs_L2.py index cb94574..cce51f8 100644 --- a/examples/plot_OT_L1_vs_L2.py +++ b/examples/plot_OT_L1_vs_L2.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- """ -========================================== -2D Optimal transport for different metrics -========================================== +================================================ +Optimal Transport with different gournd metrics +================================================ -2D OT on empirical distributio with different gound metric. +2D OT on empirical distributio with different ground metric. Stole the figure idea from Fig. 1 and 2 in https://arxiv.org/pdf/1706.07650.pdf @@ -23,7 +23,7 @@ import matplotlib.pylab as pl import ot import ot.plot -############################################################################## +# %% # Dataset 1 : uniform sampling # ---------------------------- @@ -46,7 +46,7 @@ M2 = ot.dist(xs, xt, metric='sqeuclidean') M2 /= M2.max() # loss matrix -Mp = np.sqrt(ot.dist(xs, xt, metric='euclidean')) +Mp = ot.dist(xs, xt, metric='cityblock') Mp /= Mp.max() # Data @@ -71,7 +71,7 @@ pl.title('Squared Euclidean cost') pl.subplot(1, 3, 3) pl.imshow(Mp, interpolation='nearest') -pl.title('Sqrt Euclidean cost') +pl.title('L1 (cityblock cost') pl.tight_layout() ############################################################################## @@ -109,22 +109,22 @@ pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples') pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples') pl.axis('equal') # pl.legend(loc=0) -pl.title('OT sqrt Euclidean') +pl.title('OT L1 (cityblock)') pl.tight_layout() pl.show() -############################################################################## +# %% # Dataset 2 : Partial circle # -------------------------- -n = 50 # nb samples +n = 20 # nb samples xtot = np.zeros((n + 1, 2)) xtot[:, 0] = np.cos( - (np.arange(n + 1) + 1.0) * 0.9 / (n + 2) * 2 * np.pi) + (np.arange(n + 1) + 1.0) * 0.8 / (n + 2) * 2 * np.pi) xtot[:, 1] = np.sin( - (np.arange(n + 1) + 1.0) * 0.9 / (n + 2) * 2 * np.pi) + (np.arange(n + 1) + 1.0) * 0.8 / (n + 2) * 2 * np.pi) xs = xtot[:n, :] xt = xtot[1:, :] @@ -140,7 +140,7 @@ M2 = ot.dist(xs, xt, metric='sqeuclidean') M2 /= M2.max() # loss matrix -Mp = np.sqrt(ot.dist(xs, xt, metric='euclidean')) +Mp = ot.dist(xs, xt, metric='cityblock') Mp /= Mp.max() @@ -166,13 +166,13 @@ pl.title('Squared Euclidean cost') pl.subplot(1, 3, 3) pl.imshow(Mp, interpolation='nearest') -pl.title('Sqrt Euclidean cost') +pl.title('L1 (cityblock) cost') pl.tight_layout() ############################################################################## # Dataset 2 : Plot OT Matrices # ----------------------------- - +# #%% EMD G1 = ot.emd(a, b, M1) @@ -204,7 +204,7 @@ pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples') pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples') pl.axis('equal') # pl.legend(loc=0) -pl.title('OT sqrt Euclidean') +pl.title('OT L1 (cityblock)') pl.tight_layout() pl.show() diff --git a/examples/plot_compute_emd.py b/examples/plot_compute_emd.py index 527a847..36cc7da 100644 --- a/examples/plot_compute_emd.py +++ b/examples/plot_compute_emd.py @@ -1,10 +1,10 @@ # -*- coding: utf-8 -*- """ -================= -Plot multiple EMD -================= +================== +OT distances in 1D +================== -Shows how to compute multiple EMD and Sinkhorn with two different +Shows how to compute multiple Wassersein and Sinkhorn with two different ground metrics and plot their values for different distributions. @@ -14,7 +14,7 @@ ground metrics and plot their values for different distributions. # # License: MIT License -# sphinx_gallery_thumbnail_number = 3 +# sphinx_gallery_thumbnail_number = 2 import numpy as np import matplotlib.pylab as pl @@ -29,7 +29,7 @@ from ot.datasets import make_1D_gauss as gauss #%% parameters n = 100 # nb bins -n_target = 50 # nb target distributions +n_target = 20 # nb target distributions # bin positions @@ -47,9 +47,9 @@ for i, m in enumerate(lst_m): # loss matrix and normalization M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)), 'euclidean') -M /= M.max() +M /= M.max() * 0.1 M2 = ot.dist(x.reshape((n, 1)), x.reshape((n, 1)), 'sqeuclidean') -M2 /= M2.max() +M2 /= M2.max() * 0.1 ############################################################################## # Plot data @@ -59,10 +59,12 @@ M2 /= M2.max() pl.figure(1) pl.subplot(2, 1, 1) -pl.plot(x, a, 'b', label='Source distribution') +pl.plot(x, a, 'r', label='Source distribution') pl.title('Source distribution') pl.subplot(2, 1, 2) -pl.plot(x, B, label='Target distributions') +for i in range(n_target): + pl.plot(x, B[:, i], 'b', alpha=i / n_target) +pl.plot(x, B[:, -1], 'b', label='Target distributions') pl.title('Target distributions') pl.tight_layout() @@ -73,14 +75,27 @@ pl.tight_layout() #%% Compute and plot distributions and loss matrix -d_emd = ot.emd2(a, B, M) # direct computation of EMD -d_emd2 = ot.emd2(a, B, M2) # direct computation of EMD with loss M2 - +d_emd = ot.emd2(a, B, M) # direct computation of OT loss +d_emd2 = ot.emd2(a, B, M2) # direct computation of OT loss with metrixc M2 +d_tv = [np.sum(abs(a - B[:, i])) for i in range(n_target)] pl.figure(2) -pl.plot(d_emd, label='Euclidean EMD') -pl.plot(d_emd2, label='Squared Euclidean EMD') -pl.title('EMD distances') +pl.subplot(2, 1, 1) +pl.plot(x, a, 'r', label='Source distribution') +pl.title('Distributions') +for i in range(n_target): + pl.plot(x, B[:, i], 'b', alpha=i / n_target) +pl.plot(x, B[:, -1], 'b', label='Target distributions') +pl.ylim((-.01, 0.13)) +pl.xticks(()) +pl.legend() +pl.subplot(2, 1, 2) +pl.plot(d_emd, label='Euclidean OT') +pl.plot(d_emd2, label='Squared Euclidean OT') +pl.plot(d_tv, label='Total Variation (TV)') +#pl.xlim((-7,23)) +pl.xlabel('Displacement') +pl.title('Divergences') pl.legend() ############################################################################## @@ -88,17 +103,30 @@ pl.legend() # ----------------------------------------- #%% -reg = 1e-2 +reg = 1e-1 d_sinkhorn = ot.sinkhorn2(a, B, M, reg) d_sinkhorn2 = ot.sinkhorn2(a, B, M2, reg) -pl.figure(2) +pl.figure(3) pl.clf() -pl.plot(d_emd, label='Euclidean EMD') -pl.plot(d_emd2, label='Squared Euclidean EMD') + +pl.subplot(2, 1, 1) +pl.plot(x, a, 'r', label='Source distribution') +pl.title('Distributions') +for i in range(n_target): + pl.plot(x, B[:, i], 'b', alpha=i / n_target) +pl.plot(x, B[:, -1], 'b', label='Target distributions') +pl.ylim((-.01, 0.13)) +pl.xticks(()) +pl.legend() +pl.subplot(2, 1, 2) +pl.plot(d_emd, label='Euclidean OT') +pl.plot(d_emd2, label='Squared Euclidean OT') pl.plot(d_sinkhorn, '+', label='Euclidean Sinkhorn') pl.plot(d_sinkhorn2, '+', label='Squared Euclidean Sinkhorn') -pl.title('EMD distances') +pl.plot(d_tv, label='Total Variation (TV)') +#pl.xlim((-7,23)) +pl.xlabel('Displacement') +pl.title('Divergences') pl.legend() - pl.show() diff --git a/examples/plot_optim_OTreg.py b/examples/plot_optim_OTreg.py index 5eb15bd..7b021d2 100644 --- a/examples/plot_optim_OTreg.py +++ b/examples/plot_optim_OTreg.py @@ -24,7 +24,7 @@ arXiv preprint arXiv:1510.06567. """ -# sphinx_gallery_thumbnail_number = 4 +# sphinx_gallery_thumbnail_number = 5 import numpy as np import matplotlib.pylab as pl @@ -58,7 +58,7 @@ M /= M.max() G0 = ot.emd(a, b, M) -pl.figure(3, figsize=(5, 5)) +pl.figure(1, figsize=(5, 5)) ot.plot.plot1D_mat(a, b, G0, 'OT matrix G0') ############################################################################## @@ -80,7 +80,7 @@ reg = 1e-1 Gl2 = ot.optim.cg(a, b, M, reg, f, df, verbose=True) -pl.figure(3) +pl.figure(2) ot.plot.plot1D_mat(a, b, Gl2, 'OT matrix Frob. reg') ############################################################################## @@ -102,7 +102,7 @@ reg = 1e-3 Ge = ot.optim.cg(a, b, M, reg, f, df, verbose=True) -pl.figure(4, figsize=(5, 5)) +pl.figure(3, figsize=(5, 5)) ot.plot.plot1D_mat(a, b, Ge, 'OT matrix Entrop. reg') ############################################################################## @@ -125,6 +125,34 @@ reg2 = 1e-1 Gel2 = ot.optim.gcg(a, b, M, reg1, reg2, f, df, verbose=True) -pl.figure(5, figsize=(5, 5)) +pl.figure(4, figsize=(5, 5)) ot.plot.plot1D_mat(a, b, Gel2, 'OT entropic + matrix Frob. reg') pl.show() + + +# %% +# Comparison of the OT matrices + +nvisu = 40 + +pl.figure(5, figsize=(10, 4)) + +pl.subplot(2, 2, 1) +pl.imshow(G0[:nvisu, :]) +pl.axis('off') +pl.title('Exact OT') + +pl.subplot(2, 2, 2) +pl.imshow(Gl2[:nvisu, :]) +pl.axis('off') +pl.title('Frobenius reg.') + +pl.subplot(2, 2, 3) +pl.imshow(Ge[:nvisu, :]) +pl.axis('off') +pl.title('Entropic reg.') + +pl.subplot(2, 2, 4) +pl.imshow(Gel2[:nvisu, :]) +pl.axis('off') +pl.title('Entropic + Frobenius reg.') diff --git a/examples/plot_screenkhorn_1D.py b/examples/plot_screenkhorn_1D.py deleted file mode 100644 index 785642a..0000000 --- a/examples/plot_screenkhorn_1D.py +++ /dev/null @@ -1,71 +0,0 @@ -# -*- coding: utf-8 -*- -""" -=============================== -1D Screened optimal transport -=============================== - -This example illustrates the computation of Screenkhorn [26]. - -[26] Alaya M. Z., Bérar M., Gasso G., Rakotomamonjy A. (2019). -Screening Sinkhorn Algorithm for Regularized Optimal Transport, -Advances in Neural Information Processing Systems 33 (NeurIPS). -""" - -# Author: Mokhtar Z. Alaya -# -# License: MIT License - -import numpy as np -import matplotlib.pylab as pl -import ot.plot -from ot.datasets import make_1D_gauss as gauss -from ot.bregman import screenkhorn - -############################################################################## -# Generate data -# ------------- - -#%% parameters - -n = 100 # nb bins - -# bin positions -x = np.arange(n, dtype=np.float64) - -# Gaussian distributions -a = gauss(n, m=20, s=5) # m= mean, s= std -b = gauss(n, m=60, s=10) - -# loss matrix -M = ot.dist(x.reshape((n, 1)), x.reshape((n, 1))) -M /= M.max() - -############################################################################## -# Plot distributions and loss matrix -# ---------------------------------- - -#%% plot the distributions - -pl.figure(1, figsize=(6.4, 3)) -pl.plot(x, a, 'b', label='Source distribution') -pl.plot(x, b, 'r', label='Target distribution') -pl.legend() - -# plot distributions and loss matrix - -pl.figure(2, figsize=(5, 5)) -ot.plot.plot1D_mat(a, b, M, 'Cost matrix M') - -############################################################################## -# Solve Screenkhorn -# ----------------------- - -# Screenkhorn -lambd = 2e-03 # entropy parameter -ns_budget = 30 # budget number of points to be keeped in the source distribution -nt_budget = 30 # budget number of points to be keeped in the target distribution - -G_screen = screenkhorn(a, b, M, lambd, ns_budget, nt_budget, uniform=False, restricted=True, verbose=True) -pl.figure(4, figsize=(5, 5)) -ot.plot.plot1D_mat(a, b, G_screen, 'OT matrix Screenkhorn') -pl.show() diff --git a/examples/plot_stochastic.py b/examples/plot_stochastic.py deleted file mode 100644 index 3a1ef31..0000000 --- a/examples/plot_stochastic.py +++ /dev/null @@ -1,189 +0,0 @@ -""" -=================== -Stochastic examples -=================== - -This example is designed to show how to use the stochatic optimization -algorithms for discrete and semi-continuous measures from the POT library. - -[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. -Stochastic Optimization for Large-scale Optimal Transport. -Advances in Neural Information Processing Systems (2016). - -[19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A. & -Blondel, M. Large-scale Optimal Transport and Mapping Estimation. -International Conference on Learning Representation (2018) - -""" - -# Author: Kilian Fatras -# -# License: MIT License - -import matplotlib.pylab as pl -import numpy as np -import ot -import ot.plot - - -############################################################################# -# Compute the Transportation Matrix for the Semi-Dual Problem -# ----------------------------------------------------------- -# -# Discrete case -# ````````````` -# -# Sample two discrete measures for the discrete case and compute their cost -# matrix c. - -n_source = 7 -n_target = 4 -reg = 1 -numItermax = 1000 - -a = ot.utils.unif(n_source) -b = ot.utils.unif(n_target) - -rng = np.random.RandomState(0) -X_source = rng.randn(n_source, 2) -Y_target = rng.randn(n_target, 2) -M = ot.dist(X_source, Y_target) - -############################################################################# -# Call the "SAG" method to find the transportation matrix in the discrete case - -method = "SAG" -sag_pi = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method, - numItermax) -print(sag_pi) - -############################################################################# -# Semi-Continuous Case -# ```````````````````` -# -# Sample one general measure a, one discrete measures b for the semicontinous -# case, the points where source and target measures are defined and compute the -# cost matrix. - -n_source = 7 -n_target = 4 -reg = 1 -numItermax = 1000 -log = True - -a = ot.utils.unif(n_source) -b = ot.utils.unif(n_target) - -rng = np.random.RandomState(0) -X_source = rng.randn(n_source, 2) -Y_target = rng.randn(n_target, 2) -M = ot.dist(X_source, Y_target) - -############################################################################# -# Call the "ASGD" method to find the transportation matrix in the semicontinous -# case. - -method = "ASGD" -asgd_pi, log_asgd = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method, - numItermax, log=log) -print(log_asgd['alpha'], log_asgd['beta']) -print(asgd_pi) - -############################################################################# -# Compare the results with the Sinkhorn algorithm - -sinkhorn_pi = ot.sinkhorn(a, b, M, reg) -print(sinkhorn_pi) - - -############################################################################## -# Plot Transportation Matrices -# ```````````````````````````` -# -# For SAG - -pl.figure(4, figsize=(5, 5)) -ot.plot.plot1D_mat(a, b, sag_pi, 'semi-dual : OT matrix SAG') -pl.show() - - -############################################################################## -# For ASGD - -pl.figure(4, figsize=(5, 5)) -ot.plot.plot1D_mat(a, b, asgd_pi, 'semi-dual : OT matrix ASGD') -pl.show() - - -############################################################################## -# For Sinkhorn - -pl.figure(4, figsize=(5, 5)) -ot.plot.plot1D_mat(a, b, sinkhorn_pi, 'OT matrix Sinkhorn') -pl.show() - - -############################################################################# -# Compute the Transportation Matrix for the Dual Problem -# ------------------------------------------------------ -# -# Semi-continuous case -# ```````````````````` -# -# Sample one general measure a, one discrete measures b for the semi-continuous -# case and compute the cost matrix c. - -n_source = 7 -n_target = 4 -reg = 1 -numItermax = 100000 -lr = 0.1 -batch_size = 3 -log = True - -a = ot.utils.unif(n_source) -b = ot.utils.unif(n_target) - -rng = np.random.RandomState(0) -X_source = rng.randn(n_source, 2) -Y_target = rng.randn(n_target, 2) -M = ot.dist(X_source, Y_target) - -############################################################################# -# -# Call the "SGD" dual method to find the transportation matrix in the -# semi-continuous case - -sgd_dual_pi, log_sgd = ot.stochastic.solve_dual_entropic(a, b, M, reg, - batch_size, numItermax, - lr, log=log) -print(log_sgd['alpha'], log_sgd['beta']) -print(sgd_dual_pi) - -############################################################################# -# -# Compare the results with the Sinkhorn algorithm -# ``````````````````````````````````````````````` -# -# Call the Sinkhorn algorithm from POT - -sinkhorn_pi = ot.sinkhorn(a, b, M, reg) -print(sinkhorn_pi) - -############################################################################## -# Plot Transportation Matrices -# ```````````````````````````` -# -# For SGD - -pl.figure(4, figsize=(5, 5)) -ot.plot.plot1D_mat(a, b, sgd_dual_pi, 'dual : OT matrix SGD') -pl.show() - - -############################################################################## -# For Sinkhorn - -pl.figure(4, figsize=(5, 5)) -ot.plot.plot1D_mat(a, b, sinkhorn_pi, 'OT matrix Sinkhorn') -pl.show() diff --git a/examples/sliced-wasserstein/README.txt b/examples/sliced-wasserstein/README.txt index a575345..73e6122 100644 --- a/examples/sliced-wasserstein/README.txt +++ b/examples/sliced-wasserstein/README.txt @@ -1,4 +1,4 @@ Sliced Wasserstein Distance ---------------------------- \ No newline at end of file +--------------------------- diff --git a/examples/sliced-wasserstein/plot_variance.py b/examples/sliced-wasserstein/plot_variance.py index 7d73907..f12b522 100644 --- a/examples/sliced-wasserstein/plot_variance.py +++ b/examples/sliced-wasserstein/plot_variance.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- """ -============================== -2D Sliced Wasserstein Distance -============================== +=============================================== +Sliced Wasserstein Distance on 2D distributions +=============================================== This example illustrates the computation of the sliced Wasserstein Distance as proposed in [31]. @@ -16,6 +16,8 @@ measures." Journal of Mathematical Imaging and Vision 51.1 (2015): 22-45 # # License: MIT License +# sphinx_gallery_thumbnail_number = 2 + import matplotlib.pylab as pl import numpy as np diff --git a/examples/unbalanced-partial/plot_UOT_1D.py b/examples/unbalanced-partial/plot_UOT_1D.py index 183849c..06dd02d 100644 --- a/examples/unbalanced-partial/plot_UOT_1D.py +++ b/examples/unbalanced-partial/plot_UOT_1D.py @@ -12,6 +12,8 @@ using a Kullback-Leibler relaxation. # # License: MIT License +# sphinx_gallery_thumbnail_number = 4 + import numpy as np import matplotlib.pylab as pl import ot @@ -69,7 +71,20 @@ epsilon = 0.1 # entropy parameter alpha = 1. # Unbalanced KL relaxation parameter Gs = ot.unbalanced.sinkhorn_unbalanced(a, b, M, epsilon, alpha, verbose=True) -pl.figure(4, figsize=(5, 5)) +pl.figure(3, figsize=(5, 5)) ot.plot.plot1D_mat(a, b, Gs, 'UOT matrix Sinkhorn') pl.show() + + +# %% +# plot the transported mass +# ------------------------- + +pl.figure(4, figsize=(6.4, 3)) +pl.plot(x, a, 'b', label='Source distribution') +pl.plot(x, b, 'r', label='Target distribution') +pl.fill(x, Gs.sum(1), 'b', alpha=0.5, label='Transported source') +pl.fill(x, Gs.sum(0), 'r', alpha=0.5, label='Transported target') +pl.legend(loc='upper right') +pl.title('Distributions and transported mass for UOT') diff --git a/examples/unbalanced-partial/plot_regpath.py b/examples/unbalanced-partial/plot_regpath.py index 4a51c2d..782e8c2 100644 --- a/examples/unbalanced-partial/plot_regpath.py +++ b/examples/unbalanced-partial/plot_regpath.py @@ -15,11 +15,12 @@ penalized linear regression. # Author: Haoran Wu # License: MIT License +# sphinx_gallery_thumbnail_number = 2 import numpy as np import matplotlib.pylab as pl import ot - +import matplotlib.animation as animation ############################################################################## # Generate data # ------------- @@ -72,6 +73,9 @@ t2, t_list2, g_list2 = ot.regpath.regularization_path(a, b, M, reg=final_gamma, ############################################################################## # Plot the regularization path # ---------------- +# +# The OT plan is ploted as a function of $\gamma$ that is the inverse of the +# weight on the marginal relaxations. #%% fully relaxed l2-penalized UOT @@ -103,13 +107,53 @@ for p in range(4): pl.show() +# %% +# Animation of the regpath for UOT l2 +# ------------------------ + +nv = 100 +g_list_v = np.logspace(-.5, -2.5, nv) + +pl.figure(3) + + +def _update_plot(iv): + pl.clf() + tp = ot.regpath.compute_transport_plan(g_list_v[iv], g_list, + t_list) + P = tp.reshape((n, n)) + if P.sum() > 0: + P = P / P.max() + for i in range(n): + for j in range(n): + if P[i, j] > 0: + pl.plot([xs[i, 0], xt[j, 0]], [xs[i, 1], xt[j, 1]], color='C2', + alpha=P[i, j] * 0.5) + pl.scatter(xs[:, 0], xs[:, 1], c='C0', alpha=0.2) + pl.scatter(xt[:, 0], xt[:, 1], c='C1', alpha=0.2) + pl.scatter(xs[:, 0], xs[:, 1], c='C0', s=P.sum(1).ravel() * (1 + p) * 4, + label='Re-weighted source', alpha=1) + pl.scatter(xt[:, 0], xt[:, 1], c='C1', s=P.sum(0).ravel() * (1 + p) * 4, + label='Re-weighted target', alpha=1) + pl.plot([], [], color='C2', alpha=0.8, label='OT plan') + pl.title(r'$\ell_2$ UOT $\gamma$={:1.3f}'.format(g_list_v[iv]), + fontsize=11) + return 1 + + +i = 0 +_update_plot(i) + +ani = animation.FuncAnimation(pl.gcf(), _update_plot, nv, interval=50, repeat_delay=2000) + + ############################################################################## # Plot the semi-relaxed regularization path # ------------------- #%% semi-relaxed l2-penalized UOT -pl.figure(3) +pl.figure(4) selected_gamma = [10, 1, 1e-1, 1e-2] for p in range(4): tp = ot.regpath.compute_transport_plan(selected_gamma[p], g_list2, @@ -133,3 +177,43 @@ for p in range(4): if p < 2: pl.xticks(()) pl.show() + + +# %% +# Animation of the regpath for semi-relaxed UOT l2 +# ------------------------ + +nv = 100 +g_list_v = np.logspace(2.5, -2, nv) + +pl.figure(5) + + +def _update_plot(iv): + pl.clf() + tp = ot.regpath.compute_transport_plan(g_list_v[iv], g_list2, + t_list2) + P = tp.reshape((n, n)) + if P.sum() > 0: + P = P / P.max() + for i in range(n): + for j in range(n): + if P[i, j] > 0: + pl.plot([xs[i, 0], xt[j, 0]], [xs[i, 1], xt[j, 1]], color='C2', + alpha=P[i, j] * 0.5) + pl.scatter(xs[:, 0], xs[:, 1], c='C0', alpha=0.2) + pl.scatter(xt[:, 0], xt[:, 1], c='C1', alpha=0.2) + pl.scatter(xs[:, 0], xs[:, 1], c='C0', s=P.sum(1).ravel() * (1 + p) * 4, + label='Re-weighted source', alpha=1) + pl.scatter(xt[:, 0], xt[:, 1], c='C1', s=P.sum(0).ravel() * (1 + p) * 4, + label='Re-weighted target', alpha=1) + pl.plot([], [], color='C2', alpha=0.8, label='OT plan') + pl.title(r'Semi-relaxed $\ell_2$ UOT $\gamma$={:1.3f}'.format(g_list_v[iv]), + fontsize=11) + return 1 + + +i = 0 +_update_plot(i) + +ani = animation.FuncAnimation(pl.gcf(), _update_plot, nv, interval=50, repeat_delay=2000) -- cgit v1.2.3 From 0b223ff883fd73601984a92c31cb70d4aded16e8 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Thu, 7 Apr 2022 14:18:54 +0200 Subject: [MRG] Remove deprecated ot.gpu submodule (#361) * remove all cpu submodule and tests * speedup tests gromov --- README.md | 2 +- RELEASES.md | 1 + docs/source/quickstart.rst | 58 +++++++++++--- ot/gpu/__init__.py | 50 ------------ ot/gpu/bregman.py | 196 --------------------------------------------- ot/gpu/da.py | 144 --------------------------------- ot/gpu/utils.py | 101 ----------------------- test/test_gpu.py | 106 ------------------------ test/test_gromov.py | 129 ++++++++++++++--------------- 9 files changed, 113 insertions(+), 674 deletions(-) delete mode 100644 ot/gpu/__init__.py delete mode 100644 ot/gpu/bregman.py delete mode 100644 ot/gpu/da.py delete mode 100644 ot/gpu/utils.py delete mode 100644 test/test_gpu.py (limited to 'docs/source') diff --git a/README.md b/README.md index 0c3bd19..2ace69c 100644 --- a/README.md +++ b/README.md @@ -185,7 +185,7 @@ The contributors to this library are * [Alexandre Gramfort](http://alexandre.gramfort.net/) (CI, documentation) * [Laetitia Chapel](http://people.irisa.fr/Laetitia.Chapel/) (Partial OT) * [Michael Perrot](http://perso.univ-st-etienne.fr/pem82055/) (Mapping estimation) -* [Léo Gautheron](https://github.com/aje) (GPU implementation) +* [Léo Gautheron](https://github.com/aje) (Initial GPU implementation) * [Nathalie Gayraud](https://www.linkedin.com/in/nathalie-t-h-gayraud/?ppe=1) (DA classes) * [Stanislas Chambon](https://slasnista.github.io/) (DA classes) * [Antoine Rolet](https://arolet.github.io/) (EMD solver debug) diff --git a/RELEASES.md b/RELEASES.md index 7d458f3..b54a84a 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -5,6 +5,7 @@ #### New features +- remode deprecated `ot.gpu` submodule (PR #361) - Update examples in the gallery (PR #359). - Add stochastic loss and OT plan computation for regularized OT and backend examples(PR #360). diff --git a/docs/source/quickstart.rst b/docs/source/quickstart.rst index 09a362b..b4cc8ab 100644 --- a/docs/source/quickstart.rst +++ b/docs/source/quickstart.rst @@ -1028,15 +1028,6 @@ FAQ speedup can be obtained by using a GPU implementation since all operations are matrix/vector products. -4. **Using GPU fails with error: module 'ot' has no attribute 'gpu'** - - In order to limit import time and hard dependencies in POT. we do not import - some sub-modules automatically with :code:`import ot`. In order to use the - acceleration in :any:`ot.gpu` you need first to import is with - :code:`import ot.gpu`. - - See `Issue #85 `__ and :any:`ot.gpu` - for more details. References @@ -1172,3 +1163,52 @@ References .. [30] Flamary, Rémi, et al. "Optimal transport with Laplacian regularization: Applications to domain adaptation and shape matching." NIPS Workshop on Optimal Transport and Machine Learning OTML. 2014. + +.. [31] Bonneel, Nicolas, et al. `Sliced and radon wasserstein barycenters of + measures + `_\ + , Journal of Mathematical Imaging and Vision 51.1 (2015): 22-45 + +.. [32] Huang, M., Ma S., Lai, L. (2021). `A Riemannian Block Coordinate Descent Method for Computing the Projection Robust Wasserstein Distance `_\ , Proceedings of the 38th International Conference on Machine Learning (ICML). + +.. [33] Kerdoncuff T., Emonet R., Marc S. `Sampled Gromov Wasserstein + `_\ , Machine + Learning Journal (MJL), 2021 + +.. [34] Feydy, J., Séjourné, T., Vialard, F. X., Amari, S. I., Trouvé, A., & + Peyré, G. (2019, April). `Interpolating between optimal transport and MMD + using Sinkhorn divergences + `_. In The 22nd + International Conference on Artificial Intelligence and Statistics (pp. + 2681-2690). PMLR. + +.. [35] Deshpande, I., Hu, Y. T., Sun, R., Pyrros, A., Siddiqui, N., Koyejo, S., + & Schwing, A. G. (2019). `Max-sliced wasserstein distance and its use + for gans + `_. + In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition (pp. 10648-10656). + +.. [36] Liutkus, A., Simsekli, U., Majewski, S., Durmus, A., & Stöter, F. R. + (2019, May). `Sliced-Wasserstein flows: Nonparametric generative modeling via + optimal transport and diffusions + `_. In International + Conference on Machine Learning (pp. 4104-4113). PMLR. + +.. [37] Janati, H., Cuturi, M., Gramfort, A. `Debiased sinkhorn barycenters + `_ Proceedings of + the 37th International Conference on Machine Learning, PMLR 119:4692-4701, 2020 + +.. [38] C. Vincent-Cuaz, T. Vayer, R. Flamary, M. Corneli, N. Courty, `Online + Graph Dictionary Learning `_\ , + International Conference on Machine Learning (ICML), 2021. + +.. [39] Gozlan, N., Roberto, C., Samson, P. M., & Tetali, P. (2017). + `Kantorovich duality for general transport costs and applications + `_. + Journal of Functional Analysis, 273(11), 3327-3405. + +.. [40] Forrow, A., Hütter, J. C., Nitzan, M., Rigollet, P., Schiebinger, G., & + Weed, J. (2019, April). `Statistical optimal transport via factored + couplings `_. In + The 22nd International Conference on Artificial Intelligence and Statistics + (pp. 2454-2465). PMLR. diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py deleted file mode 100644 index 12db605..0000000 --- a/ot/gpu/__init__.py +++ /dev/null @@ -1,50 +0,0 @@ -# -*- coding: utf-8 -*- -""" -GPU implementation for several OT solvers and utility -functions. - -The GPU backend in handled by `cupy -`_. - -.. warning:: - This module is now deprecated and will be removed in future releases. POT - now privides a backend mechanism that allows for solving prolem on GPU wth - the pytorch backend. - - -.. warning:: - Note that by default the module is not imported in :mod:`ot`. In order to - use it you need to explicitely import :mod:`ot.gpu` . - -By default, the functions in this module accept and return numpy arrays -in order to proide drop-in replacement for the other POT function but -the transfer between CPU en GPU comes with a significant overhead. - -In order to get the best performances, we recommend to give only cupy -arrays to the functions and desactivate the conversion to numpy of the -result of the function with parameter ``to_numpy=False``. - -""" - -# Author: Remi Flamary -# Leo Gautheron -# -# License: MIT License - -import warnings - -from . import bregman -from . import da -from .bregman import sinkhorn -from .da import sinkhorn_lpl1_mm - -from . import utils -from .utils import dist, to_gpu, to_np - - -warnings.warn('This module is deprecated and will be removed in the next minor release of POT', category=DeprecationWarning) - - -__all__ = ["utils", "dist", "sinkhorn", - "sinkhorn_lpl1_mm", 'bregman', 'da', 'to_gpu', 'to_np'] - diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py deleted file mode 100644 index 76af00e..0000000 --- a/ot/gpu/bregman.py +++ /dev/null @@ -1,196 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Bregman projections for regularized OT with GPU -""" - -# Author: Remi Flamary -# Leo Gautheron -# -# License: MIT License - -import cupy as np # np used for matrix computation -import cupy as cp # cp used for cupy specific operations -from . import utils - - -def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, - verbose=False, log=False, to_numpy=True, **kwargs): - r""" - Solve the entropic regularization optimal transport on GPU - - If the input matrix are in numpy format, they will be uploaded to the - GPU first which can incur significant time overhead. - - The function solves the following optimization problem: - - .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) - - s.t. \gamma 1 = a - - \gamma^T 1= b - - \gamma\geq 0 - where : - - - M is the (ns,nt) metric cost matrix - - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - a and b are source and target weights (sum to 1) - - The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [2]_ - - - Parameters - ---------- - a : np.ndarray (ns,) - samples weights in the source domain - b : np.ndarray (nt,) or np.ndarray (nt,nbb) - samples in the target domain, compute sinkhorn with multiple targets - and fixed M if b is a matrix (return OT loss + dual variables in log) - M : np.ndarray (ns,nt) - loss matrix - reg : float - Regularization term >0 - numItermax : int, optional - Max number of iterations - stopThr : float, optional - Stop threshold on error (>0) - verbose : bool, optional - Print information along iterations - log : bool, optional - record log if True - to_numpy : boolean, optional (default True) - If true convert back the GPU array result to numpy format. - - - Returns - ------- - gamma : (ns x nt) ndarray - Optimal transportation matrix for the given parameters - log : dict - log dictionary return only if log==True in parameters - - - References - ---------- - - .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013 - - - See Also - -------- - ot.lp.emd : Unregularized OT - ot.optim.cg : General regularized OT - - """ - - a = cp.asarray(a) - b = cp.asarray(b) - M = cp.asarray(M) - - if len(a) == 0: - a = np.ones((M.shape[0],)) / M.shape[0] - if len(b) == 0: - b = np.ones((M.shape[1],)) / M.shape[1] - - # init data - Nini = len(a) - Nfin = len(b) - - if len(b.shape) > 1: - nbb = b.shape[1] - else: - nbb = 0 - - if log: - log = {'err': []} - - # we assume that no distances are null except those of the diagonal of - # distances - if nbb: - u = np.ones((Nini, nbb)) / Nini - v = np.ones((Nfin, nbb)) / Nfin - else: - u = np.ones(Nini) / Nini - v = np.ones(Nfin) / Nfin - - # print(reg) - - # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute - K = np.empty(M.shape, dtype=M.dtype) - np.divide(M, -reg, out=K) - np.exp(K, out=K) - - # print(np.min(K)) - tmp2 = np.empty(b.shape, dtype=M.dtype) - - Kp = (1 / a).reshape(-1, 1) * K - cpt = 0 - err = 1 - while (err > stopThr and cpt < numItermax): - uprev = u - vprev = v - - KtransposeU = np.dot(K.T, u) - v = np.divide(b, KtransposeU) - u = 1. / np.dot(Kp, v) - - if (np.any(KtransposeU == 0) or - np.any(np.isnan(u)) or np.any(np.isnan(v)) or - np.any(np.isinf(u)) or np.any(np.isinf(v))): - # we have reached the machine precision - # come back to previous solution and quit loop - print('Warning: numerical errors at iteration', cpt) - u = uprev - v = vprev - break - if cpt % 10 == 0: - # we can speed up the process by checking for the error only all - # the 10th iterations - if nbb: - err = np.sqrt( - np.sum((u - uprev)**2) / np.sum((u)**2) - + np.sum((v - vprev)**2) / np.sum((v)**2) - ) - else: - # compute right marginal tmp2= (diag(u)Kdiag(v))^T1 - tmp2 = np.sum(u[:, None] * K * v[None, :], 0) - #tmp2=np.einsum('i,ij,j->j', u, K, v) - err = np.linalg.norm(tmp2 - b) # violation of marginal - if log: - log['err'].append(err) - - if verbose: - if cpt % 200 == 0: - print( - '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) - print('{:5d}|{:8e}|'.format(cpt, err)) - cpt = cpt + 1 - if log: - log['u'] = u - log['v'] = v - - if nbb: # return only loss - #res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) (explodes cupy memory) - res = np.empty(nbb) - for i in range(nbb): - res[i] = np.sum(u[:, None, i] * (K * M) * v[None, :, i]) - if to_numpy: - res = utils.to_np(res) - if log: - return res, log - else: - return res - - else: # return OT matrix - res = u.reshape((-1, 1)) * K * v.reshape((1, -1)) - if to_numpy: - res = utils.to_np(res) - if log: - return res, log - else: - return res - - -# define sinkhorn as sinkhorn_knopp -sinkhorn = sinkhorn_knopp diff --git a/ot/gpu/da.py b/ot/gpu/da.py deleted file mode 100644 index 7adb830..0000000 --- a/ot/gpu/da.py +++ /dev/null @@ -1,144 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Domain adaptation with optimal transport with GPU implementation -""" - -# Author: Remi Flamary -# Nicolas Courty -# Michael Perrot -# Leo Gautheron -# -# License: MIT License - - -import cupy as np # np used for matrix computation -import cupy as cp # cp used for cupy specific operations -import numpy as npp -from . import utils - -from .bregman import sinkhorn - - -def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, - numInnerItermax=200, stopInnerThr=1e-9, verbose=False, - log=False, to_numpy=True): - """ - Solve the entropic regularization optimal transport problem with nonconvex - group lasso regularization on GPU - - If the input matrix are in numpy format, they will be uploaded to the - GPU first which can incur significant time overhead. - - - The function solves the following optimization problem: - - .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma) - + \eta \Omega_g(\gamma) - - s.t. \gamma 1 = a - - \gamma^T 1= b - - \gamma\geq 0 - where : - - - M is the (ns,nt) metric cost matrix - - :math:`\Omega_e` is the entropic regularization term - :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - :math:`\Omega_g` is the group lasso regulaization term - :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` - where :math:`\mathcal{I}_c` are the index of samples from class c - in the source domain. - - a and b are source and target weights (sum to 1) - - The algorithm used for solving the problem is the generalised conditional - gradient as proposed in [5]_ [7]_ - - - Parameters - ---------- - a : np.ndarray (ns,) - samples weights in the source domain - labels_a : np.ndarray (ns,) - labels of samples in the source domain - b : np.ndarray (nt,) - samples weights in the target domain - M : np.ndarray (ns,nt) - loss matrix - reg : float - Regularization term for entropic regularization >0 - eta : float, optional - Regularization term for group lasso regularization >0 - numItermax : int, optional - Max number of iterations - numInnerItermax : int, optional - Max number of iterations (inner sinkhorn solver) - stopInnerThr : float, optional - Stop threshold on error (inner sinkhorn solver) (>0) - verbose : bool, optional - Print information along iterations - log : bool, optional - record log if True - to_numpy : boolean, optional (default True) - If true convert back the GPU array result to numpy format. - - - Returns - ------- - gamma : (ns x nt) ndarray - Optimal transportation matrix for the given parameters - log : dict - log dictionary return only if log==True in parameters - - - References - ---------- - - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, - "Optimal Transport for Domain Adaptation," in IEEE - Transactions on Pattern Analysis and Machine Intelligence , - vol.PP, no.99, pp.1-1 - .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). - Generalized conditional gradient: analysis of convergence - and applications. arXiv preprint arXiv:1510.06567. - - See Also - -------- - ot.lp.emd : Unregularized OT - ot.bregman.sinkhorn : Entropic regularized OT - ot.optim.cg : General regularized OT - - """ - - a, labels_a, b, M = utils.to_gpu(a, labels_a, b, M) - - p = 0.5 - epsilon = 1e-3 - - indices_labels = [] - labels_a2 = cp.asnumpy(labels_a) - classes = npp.unique(labels_a2) - for c in classes: - idxc = utils.to_gpu(*npp.where(labels_a2 == c)) - indices_labels.append(idxc) - - W = np.zeros(M.shape) - - for cpt in range(numItermax): - Mreg = M + eta * W - transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax, - stopThr=stopInnerThr, to_numpy=False) - # the transport has been computed. Check if classes are really - # separated - W = np.ones(M.shape) - for (i, c) in enumerate(classes): - - majs = np.sum(transp[indices_labels[i]], axis=0) - majs = p * ((majs + epsilon)**(p - 1)) - W[indices_labels[i]] = majs - - if to_numpy: - return utils.to_np(transp) - else: - return transp diff --git a/ot/gpu/utils.py b/ot/gpu/utils.py deleted file mode 100644 index 41e168a..0000000 --- a/ot/gpu/utils.py +++ /dev/null @@ -1,101 +0,0 @@ -# -*- coding: utf-8 -*- -""" -Utility functions for GPU -""" - -# Author: Remi Flamary -# Nicolas Courty -# Leo Gautheron -# -# License: MIT License - -import cupy as np # np used for matrix computation -import cupy as cp # cp used for cupy specific operations - - -def euclidean_distances(a, b, squared=False, to_numpy=True): - """ - Compute the pairwise euclidean distance between matrices a and b. - - If the input matrix are in numpy format, they will be uploaded to the - GPU first which can incur significant time overhead. - - Parameters - ---------- - a : np.ndarray (n, f) - first matrix - b : np.ndarray (m, f) - second matrix - to_numpy : boolean, optional (default True) - If true convert back the GPU array result to numpy format. - squared : boolean, optional (default False) - if True, return squared euclidean distance matrix - - Returns - ------- - c : (n x m) np.ndarray or cupy.ndarray - pairwise euclidean distance distance matrix - """ - - a, b = to_gpu(a, b) - - a2 = np.sum(np.square(a), 1) - b2 = np.sum(np.square(b), 1) - - c = -2 * np.dot(a, b.T) - c += a2[:, None] - c += b2[None, :] - - if not squared: - np.sqrt(c, out=c) - if to_numpy: - return to_np(c) - else: - return c - - -def dist(x1, x2=None, metric='sqeuclidean', to_numpy=True): - """Compute distance between samples in x1 and x2 on gpu - - Parameters - ---------- - - x1 : np.array (n1,d) - matrix with n1 samples of size d - x2 : np.array (n2,d), optional - matrix with n2 samples of size d (if None then x2=x1) - metric : str - Metric from 'sqeuclidean', 'euclidean', - - - Returns - ------- - - M : np.array (n1,n2) - distance matrix computed with given metric - - """ - if x2 is None: - x2 = x1 - if metric == "sqeuclidean": - return euclidean_distances(x1, x2, squared=True, to_numpy=to_numpy) - elif metric == "euclidean": - return euclidean_distances(x1, x2, squared=False, to_numpy=to_numpy) - else: - raise NotImplementedError - - -def to_gpu(*args): - """ Upload numpy arrays to GPU and return them""" - if len(args) > 1: - return (cp.asarray(x) for x in args) - else: - return cp.asarray(args[0]) - - -def to_np(*args): - """ convert GPU arras to numpy and return them""" - if len(args) > 1: - return (cp.asnumpy(x) for x in args) - else: - return cp.asnumpy(args[0]) diff --git a/test/test_gpu.py b/test/test_gpu.py deleted file mode 100644 index 8e62a74..0000000 --- a/test/test_gpu.py +++ /dev/null @@ -1,106 +0,0 @@ -"""Tests for module gpu for gpu acceleration """ - -# Author: Remi Flamary -# -# License: MIT License - -import numpy as np -import ot -import pytest - -try: # test if cudamat installed - import ot.gpu - nogpu = False -except ImportError: - nogpu = True - - -@pytest.mark.skipif(nogpu, reason="No GPU available") -def test_gpu_old_doctests(): - a = [.5, .5] - b = [.5, .5] - M = [[0., 1.], [1., 0.]] - G = ot.sinkhorn(a, b, M, 1) - np.testing.assert_allclose(G, np.array([[0.36552929, 0.13447071], - [0.13447071, 0.36552929]])) - - -@pytest.mark.skipif(nogpu, reason="No GPU available") -def test_gpu_dist(): - - rng = np.random.RandomState(0) - - for n_samples in [50, 100, 500, 1000]: - print(n_samples) - a = rng.rand(n_samples // 4, 100) - b = rng.rand(n_samples, 100) - - M = ot.dist(a.copy(), b.copy()) - M2 = ot.gpu.dist(a.copy(), b.copy()) - - np.testing.assert_allclose(M, M2, rtol=1e-10) - - M2 = ot.gpu.dist(a.copy(), b.copy(), metric='euclidean', to_numpy=False) - - # check raise not implemented wrong metric - with pytest.raises(NotImplementedError): - M2 = ot.gpu.dist(a.copy(), b.copy(), metric='cityblock', to_numpy=False) - - -@pytest.mark.skipif(nogpu, reason="No GPU available") -def test_gpu_sinkhorn(): - - rng = np.random.RandomState(0) - - for n_samples in [50, 100, 500, 1000]: - a = rng.rand(n_samples // 4, 100) - b = rng.rand(n_samples, 100) - - wa = ot.unif(n_samples // 4) - wb = ot.unif(n_samples) - - wb2 = np.random.rand(n_samples, 20) - wb2 /= wb2.sum(0, keepdims=True) - - M = ot.dist(a.copy(), b.copy()) - M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False) - - reg = 1 - - G = ot.sinkhorn(wa, wb, M, reg) - G1 = ot.gpu.sinkhorn(wa, wb, M, reg) - - np.testing.assert_allclose(G1, G, rtol=1e-10) - - # run all on gpu - ot.gpu.sinkhorn(wa, wb, M2, reg, to_numpy=False, log=True) - - # run sinkhorn for multiple targets - ot.gpu.sinkhorn(wa, wb2, M2, reg, to_numpy=False, log=True) - - -@pytest.mark.skipif(nogpu, reason="No GPU available") -def test_gpu_sinkhorn_lpl1(): - - rng = np.random.RandomState(0) - - for n_samples in [50, 100, 500]: - print(n_samples) - a = rng.rand(n_samples // 4, 100) - labels_a = np.random.randint(10, size=(n_samples // 4)) - b = rng.rand(n_samples, 100) - - wa = ot.unif(n_samples // 4) - wb = ot.unif(n_samples) - - M = ot.dist(a.copy(), b.copy()) - M2 = ot.gpu.dist(a.copy(), b.copy(), to_numpy=False) - - reg = 1 - - G = ot.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M, reg) - G1 = ot.gpu.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M, reg) - - np.testing.assert_allclose(G1, G, rtol=1e-10) - - ot.gpu.da.sinkhorn_lpl1_mm(wa, labels_a, wb, M2, reg, to_numpy=False, log=True) diff --git a/test/test_gromov.py b/test/test_gromov.py index 12fd2b9..9c85b92 100644 --- a/test/test_gromov.py +++ b/test/test_gromov.py @@ -188,7 +188,7 @@ def test_gromov2_gradients(): @pytest.skip_backend("jax", reason="test very slow with jax backend") @pytest.skip_backend("tf", reason="test very slow with tf backend") def test_entropic_gromov(nx): - n_samples = 50 # nb samples + n_samples = 10 # nb samples mu_s = np.array([0, 0]) cov_s = np.array([[1, 0], [0, 1]]) @@ -222,9 +222,9 @@ def test_entropic_gromov(nx): q, Gb.sum(0), atol=1e-04) # cf convergence gromov gw, log = ot.gromov.entropic_gromov_wasserstein2( - C1, C2, p, q, 'kl_loss', epsilon=1e-2, log=True) + C1, C2, p, q, 'kl_loss', max_iter=10, epsilon=1e-2, log=True) gwb, logb = ot.gromov.entropic_gromov_wasserstein2( - C1b, C2b, pb, qb, 'kl_loss', epsilon=1e-2, log=True) + C1b, C2b, pb, qb, 'kl_loss', max_iter=10, epsilon=1e-2, log=True) gwb = nx.to_numpy(gwb) G = log['T'] @@ -245,7 +245,7 @@ def test_entropic_gromov(nx): @pytest.skip_backend("tf", reason="test very slow with tf backend") def test_entropic_gromov_dtype_device(nx): # setup - n_samples = 50 # nb samples + n_samples = 5 # nb samples mu_s = np.array([0, 0]) cov_s = np.array([[1, 0], [0, 1]]) @@ -280,7 +280,7 @@ def test_entropic_gromov_dtype_device(nx): def test_pointwise_gromov(nx): - n_samples = 50 # nb samples + n_samples = 5 # nb samples mu_s = np.array([0, 0]) cov_s = np.array([[1, 0], [0, 1]]) @@ -331,14 +331,12 @@ def test_pointwise_gromov(nx): Gb = nx.to_numpy(nx.todense(Gb)) np.testing.assert_allclose(G, Gb, atol=1e-06) - np.testing.assert_allclose(float(logb['gw_dist_estimated']), 0.10342276348494964, atol=1e-8) - np.testing.assert_allclose(float(logb['gw_dist_std']), 0.0015952535464736394, atol=1e-8) @pytest.skip_backend("tf", reason="test very slow with tf backend") @pytest.skip_backend("jax", reason="test very slow with jax backend") def test_sampled_gromov(nx): - n_samples = 50 # nb samples + n_samples = 5 # nb samples mu_s = np.array([0, 0], dtype=np.float64) cov_s = np.array([[1, 0], [0, 1]], dtype=np.float64) @@ -365,9 +363,9 @@ def test_sampled_gromov(nx): return nx.abs(x - y) G, log = ot.gromov.sampled_gromov_wasserstein( - C1, C2, p, q, loss, max_iter=100, epsilon=1, log=True, verbose=True, random_state=42) + C1, C2, p, q, loss, max_iter=20, nb_samples_grad=2, epsilon=1, log=True, verbose=True, random_state=42) Gb, logb = ot.gromov.sampled_gromov_wasserstein( - C1b, C2b, pb, qb, lossb, max_iter=100, epsilon=1, log=True, verbose=True, random_state=42) + C1b, C2b, pb, qb, lossb, max_iter=20, nb_samples_grad=2, epsilon=1, log=True, verbose=True, random_state=42) Gb = nx.to_numpy(Gb) # check constraints @@ -377,13 +375,10 @@ def test_sampled_gromov(nx): np.testing.assert_allclose( q, Gb.sum(0), atol=1e-04) # cf convergence gromov - np.testing.assert_allclose(float(logb['gw_dist_estimated']), 0.05679474884977278, atol=1e-08) - np.testing.assert_allclose(float(logb['gw_dist_std']), 0.0005986592106971995, atol=1e-08) - def test_gromov_barycenter(nx): - ns = 10 - nt = 20 + ns = 5 + nt = 8 Xs, ys = ot.datasets.make_data_classif('3gauss', ns, random_state=42) Xt, yt = ot.datasets.make_data_classif('3gauss2', nt, random_state=42) @@ -450,8 +445,8 @@ def test_gromov_barycenter(nx): @pytest.mark.filterwarnings("ignore:divide") def test_gromov_entropic_barycenter(nx): - ns = 10 - nt = 20 + ns = 5 + nt = 10 Xs, ys = ot.datasets.make_data_classif('3gauss', ns, random_state=42) Xt, yt = ot.datasets.make_data_classif('3gauss2', nt, random_state=42) @@ -517,7 +512,7 @@ def test_gromov_entropic_barycenter(nx): def test_fgw(nx): - n_samples = 50 # nb samples + n_samples = 20 # nb samples mu_s = np.array([0, 0]) cov_s = np.array([[1, 0], [0, 1]]) @@ -579,7 +574,7 @@ def test_fgw(nx): def test_fgw2_gradients(): - n_samples = 50 # nb samples + n_samples = 20 # nb samples mu_s = np.array([0, 0]) cov_s = np.array([[1, 0], [0, 1]]) @@ -625,8 +620,8 @@ def test_fgw2_gradients(): def test_fgw_barycenter(nx): np.random.seed(42) - ns = 50 - nt = 60 + ns = 10 + nt = 20 Xs, ys = ot.datasets.make_data_classif('3gauss', ns, random_state=42) Xt, yt = ot.datasets.make_data_classif('3gauss2', nt, random_state=42) @@ -674,7 +669,7 @@ def test_fgw_barycenter(nx): def test_gromov_wasserstein_linear_unmixing(nx): - n = 10 + n = 4 X1, y1 = ot.datasets.make_data_classif('3gauss', n, random_state=42) X2, y2 = ot.datasets.make_data_classif('3gauss2', n, random_state=42) @@ -709,10 +704,10 @@ def test_gromov_wasserstein_linear_unmixing(nx): tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 ) - np.testing.assert_allclose(unmixing1, nx.to_numpy(unmixing1b), atol=1e-06) - np.testing.assert_allclose(unmixing1, [1., 0.], atol=1e-01) - np.testing.assert_allclose(unmixing2, nx.to_numpy(unmixing2b), atol=1e-06) - np.testing.assert_allclose(unmixing2, [0., 1.], atol=1e-01) + np.testing.assert_allclose(unmixing1, nx.to_numpy(unmixing1b), atol=5e-06) + np.testing.assert_allclose(unmixing1, [1., 0.], atol=5e-01) + np.testing.assert_allclose(unmixing2, nx.to_numpy(unmixing2b), atol=5e-06) + np.testing.assert_allclose(unmixing2, [0., 1.], atol=5e-01) np.testing.assert_allclose(C1_emb, nx.to_numpy(C1b_emb), atol=1e-06) np.testing.assert_allclose(C2_emb, nx.to_numpy(C2b_emb), atol=1e-06) np.testing.assert_allclose(reconstruction1, nx.to_numpy(reconstruction1b), atol=1e-06) @@ -758,7 +753,7 @@ def test_gromov_wasserstein_linear_unmixing(nx): def test_gromov_wasserstein_dictionary_learning(nx): # create dataset composed from 2 structures which are repeated 5 times - shape = 10 + shape = 4 n_samples = 2 n_atoms = 2 projection = 'nonnegative_symmetric' @@ -795,7 +790,7 @@ def test_gromov_wasserstein_dictionary_learning(nx): for i in range(n_samples): _, _, _, reconstruction = ot.gromov.gromov_wasserstein_linear_unmixing( Cs[i], Cdict_init, p=ps[i], q=q, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50 ) initial_total_reconstruction += reconstruction @@ -803,7 +798,7 @@ def test_gromov_wasserstein_dictionary_learning(nx): Cdict, log = ot.gromov.gromov_wasserstein_dictionary_learning( Cs, D=n_atoms, nt=shape, ps=ps, q=q, Cdict_init=Cdict_init, epochs=epochs, batch_size=2 * n_samples, learning_rate=1., reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200, + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50, projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose ) # > Compute reconstruction of samples on learned dictionary without backend @@ -811,7 +806,7 @@ def test_gromov_wasserstein_dictionary_learning(nx): for i in range(n_samples): _, _, _, reconstruction = ot.gromov.gromov_wasserstein_linear_unmixing( Cs[i], Cdict, p=None, q=None, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50 ) total_reconstruction += reconstruction @@ -822,7 +817,7 @@ def test_gromov_wasserstein_dictionary_learning(nx): Cdictb, log = ot.gromov.gromov_wasserstein_dictionary_learning( Csb, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=Cdict_initb, epochs=epochs, batch_size=n_samples, learning_rate=1., reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200, + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50, projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose ) # Compute reconstruction of samples on learned dictionary @@ -830,7 +825,7 @@ def test_gromov_wasserstein_dictionary_learning(nx): for i in range(n_samples): _, _, _, reconstruction = ot.gromov.gromov_wasserstein_linear_unmixing( Csb[i], Cdictb, p=psb[i], q=qb, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50 ) total_reconstruction_b += reconstruction @@ -846,7 +841,7 @@ def test_gromov_wasserstein_dictionary_learning(nx): Cdict_bis, log = ot.gromov.gromov_wasserstein_dictionary_learning( Cs, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=None, epochs=epochs, batch_size=n_samples, learning_rate=1., reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200, + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50, projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose ) # > Compute reconstruction of samples on learned dictionary @@ -854,7 +849,7 @@ def test_gromov_wasserstein_dictionary_learning(nx): for i in range(n_samples): _, _, _, reconstruction = ot.gromov.gromov_wasserstein_linear_unmixing( Cs[i], Cdict_bis, p=ps[i], q=q, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50 ) total_reconstruction_bis += reconstruction @@ -865,7 +860,7 @@ def test_gromov_wasserstein_dictionary_learning(nx): Cdictb_bis, log = ot.gromov.gromov_wasserstein_dictionary_learning( Csb, D=n_atoms, nt=shape, ps=psb, q=qb, Cdict_init=None, epochs=epochs, batch_size=n_samples, learning_rate=1., reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200, + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50, projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose ) # > Compute reconstruction of samples on learned dictionary @@ -873,7 +868,7 @@ def test_gromov_wasserstein_dictionary_learning(nx): for i in range(n_samples): _, _, _, reconstruction = ot.gromov.gromov_wasserstein_linear_unmixing( Csb[i], Cdictb_bis, p=None, q=None, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50 ) total_reconstruction_b_bis += reconstruction @@ -892,7 +887,7 @@ def test_gromov_wasserstein_dictionary_learning(nx): Cdict_bis2, log = ot.gromov.gromov_wasserstein_dictionary_learning( Cs, D=n_atoms, nt=shape, ps=ps, q=q, Cdict_init=Cdict, epochs=epochs, batch_size=n_samples, learning_rate=10., reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200, + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50, projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer, verbose=verbose ) # > Compute reconstruction of samples on learned dictionary @@ -900,7 +895,7 @@ def test_gromov_wasserstein_dictionary_learning(nx): for i in range(n_samples): _, _, _, reconstruction = ot.gromov.gromov_wasserstein_linear_unmixing( Cs[i], Cdict_bis2, p=ps[i], q=q, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50 ) total_reconstruction_bis2 += reconstruction @@ -911,7 +906,7 @@ def test_gromov_wasserstein_dictionary_learning(nx): Cdictb_bis2, log = ot.gromov.gromov_wasserstein_dictionary_learning( Csb, D=n_atoms, nt=shape, ps=psb, q=qb, Cdict_init=Cdictb, epochs=epochs, batch_size=n_samples, learning_rate=10., reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200, + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50, projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer, verbose=verbose ) # > Compute reconstruction of samples on learned dictionary @@ -919,7 +914,7 @@ def test_gromov_wasserstein_dictionary_learning(nx): for i in range(n_samples): _, _, _, reconstruction = ot.gromov.gromov_wasserstein_linear_unmixing( Csb[i], Cdictb_bis2, p=psb[i], q=qb, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50 ) total_reconstruction_b_bis2 += reconstruction @@ -929,7 +924,7 @@ def test_gromov_wasserstein_dictionary_learning(nx): def test_fused_gromov_wasserstein_linear_unmixing(nx): - n = 10 + n = 4 X1, y1 = ot.datasets.make_data_classif('3gauss', n, random_state=42) X2, y2 = ot.datasets.make_data_classif('3gauss2', n, random_state=42) F, y = ot.datasets.make_data_classif('3gauss', n, random_state=42) @@ -947,28 +942,28 @@ def test_fused_gromov_wasserstein_linear_unmixing(nx): unmixing1, C1_emb, Y1_emb, OT, reconstruction1 = ot.gromov.fused_gromov_wasserstein_linear_unmixing( C1, F, Cdict, Ydict, p=p, q=p, alpha=0.5, reg=reg, - tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200 + tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50 ) unmixing1b, C1b_emb, Y1b_emb, OTb, reconstruction1b = ot.gromov.fused_gromov_wasserstein_linear_unmixing( C1b, Fb, Cdictb, Ydictb, p=None, q=None, alpha=0.5, reg=reg, - tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200 + tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50 ) unmixing2, C2_emb, Y2_emb, OT, reconstruction2 = ot.gromov.fused_gromov_wasserstein_linear_unmixing( C2, F, Cdict, Ydict, p=None, q=None, alpha=0.5, reg=reg, - tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200 + tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50 ) unmixing2b, C2b_emb, Y2b_emb, OTb, reconstruction2b = ot.gromov.fused_gromov_wasserstein_linear_unmixing( C2b, Fb, Cdictb, Ydictb, p=pb, q=pb, alpha=0.5, reg=reg, - tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200 + tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50 ) - np.testing.assert_allclose(unmixing1, nx.to_numpy(unmixing1b), atol=1e-06) - np.testing.assert_allclose(unmixing1, [1., 0.], atol=1e-01) - np.testing.assert_allclose(unmixing2, nx.to_numpy(unmixing2b), atol=1e-06) - np.testing.assert_allclose(unmixing2, [0., 1.], atol=1e-01) + np.testing.assert_allclose(unmixing1, nx.to_numpy(unmixing1b), atol=4e-06) + np.testing.assert_allclose(unmixing1, [1., 0.], atol=4e-01) + np.testing.assert_allclose(unmixing2, nx.to_numpy(unmixing2b), atol=4e-06) + np.testing.assert_allclose(unmixing2, [0., 1.], atol=4e-01) np.testing.assert_allclose(C1_emb, nx.to_numpy(C1b_emb), atol=1e-03) np.testing.assert_allclose(C2_emb, nx.to_numpy(C2b_emb), atol=1e-03) np.testing.assert_allclose(Y1_emb, nx.to_numpy(Y1b_emb), atol=1e-03) @@ -983,22 +978,22 @@ def test_fused_gromov_wasserstein_linear_unmixing(nx): unmixing1, C1_emb, Y1_emb, OT, reconstruction1 = ot.gromov.fused_gromov_wasserstein_linear_unmixing( C1, F, Cdict, Ydict, p=p, q=p, alpha=0.5, reg=reg, - tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200 + tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50 ) unmixing1b, C1b_emb, Y1b_emb, OTb, reconstruction1b = ot.gromov.fused_gromov_wasserstein_linear_unmixing( C1b, Fb, Cdictb, Ydictb, p=None, q=None, alpha=0.5, reg=reg, - tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200 + tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50 ) unmixing2, C2_emb, Y2_emb, OT, reconstruction2 = ot.gromov.fused_gromov_wasserstein_linear_unmixing( C2, F, Cdict, Ydict, p=None, q=None, alpha=0.5, reg=reg, - tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200 + tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50 ) unmixing2b, C2b_emb, Y2b_emb, OTb, reconstruction2b = ot.gromov.fused_gromov_wasserstein_linear_unmixing( C2b, Fb, Cdictb, Ydictb, p=pb, q=pb, alpha=0.5, reg=reg, - tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=20, max_iter_inner=200 + tol_outer=10**(-6), tol_inner=10**(-6), max_iter_outer=10, max_iter_inner=50 ) np.testing.assert_allclose(unmixing1, nx.to_numpy(unmixing1b), atol=1e-06) @@ -1018,7 +1013,7 @@ def test_fused_gromov_wasserstein_linear_unmixing(nx): def test_fused_gromov_wasserstein_dictionary_learning(nx): # create dataset composed from 2 structures which are repeated 5 times - shape = 10 + shape = 4 n_samples = 2 n_atoms = 2 projection = 'nonnegative_symmetric' @@ -1060,7 +1055,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx): for i in range(n_samples): _, _, _, _, reconstruction = ot.gromov.fused_gromov_wasserstein_linear_unmixing( Cs[i], Ys[i], Cdict_init, Ydict_init, p=ps[i], q=q, - alpha=alpha, reg=0., tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 + alpha=alpha, reg=0., tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50 ) initial_total_reconstruction += reconstruction @@ -1069,7 +1064,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx): Cdict, Ydict, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning( Cs, Ys, D=n_atoms, nt=shape, ps=ps, q=q, Cdict_init=Cdict_init, Ydict_init=Ydict_init, epochs=epochs, batch_size=n_samples, learning_rate_C=1., learning_rate_Y=1., alpha=alpha, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200, + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50, projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose ) # > Compute reconstruction of samples on learned dictionary @@ -1077,7 +1072,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx): for i in range(n_samples): _, _, _, _, reconstruction = ot.gromov.fused_gromov_wasserstein_linear_unmixing( Cs[i], Ys[i], Cdict, Ydict, p=None, q=None, alpha=alpha, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50 ) total_reconstruction += reconstruction # Compare both @@ -1088,7 +1083,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx): Cdictb, Ydictb, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning( Csb, Ysb, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=Cdict_initb, Ydict_init=Ydict_initb, epochs=epochs, batch_size=2 * n_samples, learning_rate_C=1., learning_rate_Y=1., alpha=alpha, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200, + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50, projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose ) # > Compute reconstruction of samples on learned dictionary @@ -1096,7 +1091,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx): for i in range(n_samples): _, _, _, _, reconstruction = ot.gromov.fused_gromov_wasserstein_linear_unmixing( Csb[i], Ysb[i], Cdictb, Ydictb, p=psb[i], q=qb, alpha=alpha, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50 ) total_reconstruction_b += reconstruction @@ -1111,7 +1106,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx): Cdict_bis, Ydict_bis, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning( Cs, Ys, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=None, Ydict_init=None, epochs=epochs, batch_size=n_samples, learning_rate_C=1., learning_rate_Y=1., alpha=alpha, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200, + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50, projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose ) # > Compute reconstruction of samples on learned dictionary @@ -1119,7 +1114,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx): for i in range(n_samples): _, _, _, _, reconstruction = ot.gromov.fused_gromov_wasserstein_linear_unmixing( Cs[i], Ys[i], Cdict_bis, Ydict_bis, p=ps[i], q=q, alpha=alpha, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50 ) total_reconstruction_bis += reconstruction @@ -1130,7 +1125,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx): Cdictb_bis, Ydictb_bis, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning( Csb, Ysb, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=None, Ydict_init=None, epochs=epochs, batch_size=n_samples, learning_rate_C=1., learning_rate_Y=1., alpha=alpha, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200, + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50, projection=projection, use_log=False, use_adam_optimizer=use_adam_optimizer, verbose=verbose ) @@ -1139,7 +1134,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx): for i in range(n_samples): _, _, _, _, reconstruction = ot.gromov.fused_gromov_wasserstein_linear_unmixing( Csb[i], Ysb[i], Cdictb_bis, Ydictb_bis, p=psb[i], q=qb, alpha=alpha, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50 ) total_reconstruction_b_bis += reconstruction @@ -1156,7 +1151,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx): Cdict_bis2, Ydict_bis2, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning( Cs, Ys, D=n_atoms, nt=shape, ps=ps, q=q, Cdict_init=Cdict, Ydict_init=Ydict, epochs=epochs, batch_size=n_samples, learning_rate_C=10., learning_rate_Y=10., alpha=alpha, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200, + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50, projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer, verbose=verbose ) # > Compute reconstruction of samples on learned dictionary @@ -1164,7 +1159,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx): for i in range(n_samples): _, _, _, _, reconstruction = ot.gromov.fused_gromov_wasserstein_linear_unmixing( Cs[i], Ys[i], Cdict_bis2, Ydict_bis2, p=ps[i], q=q, alpha=alpha, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50 ) total_reconstruction_bis2 += reconstruction @@ -1175,7 +1170,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx): Cdictb_bis2, Ydictb_bis2, log = ot.gromov.fused_gromov_wasserstein_dictionary_learning( Csb, Ysb, D=n_atoms, nt=shape, ps=None, q=None, Cdict_init=Cdictb, Ydict_init=Ydictb, epochs=epochs, batch_size=n_samples, learning_rate_C=10., learning_rate_Y=10., alpha=alpha, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200, + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50, projection=projection, use_log=use_log, use_adam_optimizer=use_adam_optimizer, verbose=verbose ) @@ -1184,7 +1179,7 @@ def test_fused_gromov_wasserstein_dictionary_learning(nx): for i in range(n_samples): _, _, _, _, reconstruction = ot.gromov.fused_gromov_wasserstein_linear_unmixing( Csb[i], Ysb[i], Cdictb_bis2, Ydictb_bis2, p=None, q=None, alpha=alpha, reg=0., - tol_outer=tol, tol_inner=tol, max_iter_outer=20, max_iter_inner=200 + tol_outer=tol, tol_inner=tol, max_iter_outer=10, max_iter_inner=50 ) total_reconstruction_b_bis2 += reconstruction -- cgit v1.2.3 From ac4cf442735ed4c0d5405ad861eddaa02afd4edd Mon Sep 17 00:00:00 2001 From: Laetitia Chapel Date: Mon, 11 Apr 2022 15:38:18 +0200 Subject: [MRG] MM algorithms for UOT (#362) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * bugfix * update refs partial OT * fixes small typos in plot_partial_wass_and_gromov * fix small bugs in partial.py * update README * pep8 bugfix * modif doctest * fix bugtests * update on test_partial and test on the numerical precision on ot/partial * resolve merge pb * Delete partial.py * update unbalanced: mm algo+plot * update unbalanced: mm algo+plot * update unbalanced: mm algo+plot * update unbalanced: mm algo+plot * update unbalanced: mm algo+plot * add test mm algo unbalanced OT * add test mm algo unbalanced OT * add test mm algo unbalanced OT * add test mm algo unbalanced OT * add test mm algo unbalanced OT * add test mm algo unbalanced OT * add test mm algo unbalanced OT * add test mm algo unbalanced OT * update unbalanced: mm algo+plot * update unbalanced: mm algo+plot * update releases.md with new MM UOT algorithms Co-authored-by: Rémi Flamary --- README.md | 6 +- RELEASES.md | 1 + docs/source/all.rst | 1 + examples/unbalanced-partial/plot_unbalanced_OT.py | 116 +++++ ot/partial.py | 84 +++- ot/regpath.py | 545 ++++++++++++++-------- ot/unbalanced.py | 223 +++++++++ test/test_unbalanced.py | 50 ++ 8 files changed, 802 insertions(+), 224 deletions(-) create mode 100644 examples/unbalanced-partial/plot_unbalanced_OT.py (limited to 'docs/source') diff --git a/README.md b/README.md index 2ace69c..1b50aeb 100644 --- a/README.md +++ b/README.md @@ -35,7 +35,7 @@ POT provides the following generic OT solvers (links to examples): Large-scale Optimal Transport (semi-dual problem [18] and dual problem [19]) * [Sampled solver of Gromov Wasserstein](https://pythonot.github.io/auto_examples/gromov/plot_gromov.html) for large-scale problem with any loss functions [33] * Non regularized [free support Wasserstein barycenters](https://pythonot.github.io/auto_examples/barycenters/plot_free_support_barycenter.html) [20]. -* [Unbalanced OT](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_UOT_1D.html) with KL relaxation and [barycenter](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_UOT_barycenter_1D.html) [10, 25]. +* [One dimensional Unbalanced OT](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_UOT_1D.html) with KL relaxation and [barycenter](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_UOT_barycenter_1D.html) [10, 25]. Also [exact unbalanced OT](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_unbalanced_ot.html) with KL and quadratic regularization and the [regularization path of UOT](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_regpath.html) [41] * [Partial Wasserstein and Gromov-Wasserstein](https://pythonot.github.io/auto_examples/unbalanced-partial/plot_partial_wass_and_gromov.html) (exact [29] and entropic [3] formulations). * [Sliced Wasserstein](https://pythonot.github.io/auto_examples/sliced-wasserstein/plot_variance.html) [31, 32] and Max-sliced Wasserstein [35] that can be used for gradient flows [36]. @@ -309,4 +309,6 @@ Dictionary Learning](https://arxiv.org/pdf/2102.06555.pdf), International Confer [39] Gozlan, N., Roberto, C., Samson, P. M., & Tetali, P. (2017). [Kantorovich duality for general transport costs and applications](https://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.712.1825&rep=rep1&type=pdf). Journal of Functional Analysis, 273(11), 3327-3405. -[40] Forrow, A., Hütter, J. C., Nitzan, M., Rigollet, P., Schiebinger, G., & Weed, J. (2019, April). [Statistical optimal transport via factored couplings](http://proceedings.mlr.press/v89/forrow19a/forrow19a.pdf). In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 2454-2465). PMLR. \ No newline at end of file +[40] Forrow, A., Hütter, J. C., Nitzan, M., Rigollet, P., Schiebinger, G., & Weed, J. (2019, April). [Statistical optimal transport via factored couplings](http://proceedings.mlr.press/v89/forrow19a/forrow19a.pdf). In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 2454-2465). PMLR. + +[41] Chapel*, L., Flamary*, R., Wu, H., Févotte, C., Gasso, G. (2021). [Unbalanced Optimal Transport through Non-negative Penalized Linear Regression](https://proceedings.neurips.cc/paper/2021/file/c3c617a9b80b3ae1ebd868b0017cc349-Paper.pdf) Advances in Neural Information Processing Systems (NeurIPS), 2020. (Two first co-authors) \ No newline at end of file diff --git a/RELEASES.md b/RELEASES.md index b54a84a..7942a15 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -19,6 +19,7 @@ - Add backend support for Domain Adaptation and Unbalanced solvers (PR #343). - Add (F)GW linear dictionary learning solvers + example (PR #319) - Add links to related PR and Issues in the doc release page (PR #350) +- Add new minimization-maximization algorithms for solving exact Unbalanced OT + example (PR #362) #### Closed issues diff --git a/docs/source/all.rst b/docs/source/all.rst index 3f7d029..1ec6be3 100644 --- a/docs/source/all.rst +++ b/docs/source/all.rst @@ -26,6 +26,7 @@ API and modules plot stochastic unbalanced + regpath partial sliced weak diff --git a/examples/unbalanced-partial/plot_unbalanced_OT.py b/examples/unbalanced-partial/plot_unbalanced_OT.py new file mode 100644 index 0000000..03487e7 --- /dev/null +++ b/examples/unbalanced-partial/plot_unbalanced_OT.py @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +""" +============================================================== +2D examples of exact and entropic unbalanced optimal transport +============================================================== +This example is designed to show how to compute unbalanced and +partial OT in POT. + +UOT aims at solving the following optimization problem: + + .. math:: + W = \min_{\gamma} <\gamma, \mathbf{M}>_F + + \mathrm{reg}\cdot\Omega(\gamma) + + \mathrm{reg_m} \cdot \mathrm{div}(\gamma \mathbf{1}, \mathbf{a}) + + \mathrm{reg_m} \cdot \mathrm{div}(\gamma^T \mathbf{1}, \mathbf{b}) + + s.t. + \gamma \geq 0 + +where :math:`\mathrm{div}` is a divergence. +When using the entropic UOT, :math:`\mathrm{reg}>0` and :math:`\mathrm{div}` +should be the Kullback-Leibler divergence. +When solving exact UOT, :math:`\mathrm{reg}=0` and :math:`\mathrm{div}` +can be either the Kullback-Leibler or the quadratic divergence. +Using :math:`\ell_1` norm gives the so-called partial OT. +""" + +# Author: Laetitia Chapel +# License: MIT License + +import numpy as np +import matplotlib.pylab as pl +import ot + +############################################################################## +# Generate data +# ------------- + +# %% parameters and data generation + +n = 40 # nb samples + +mu_s = np.array([-1, -1]) +cov_s = np.array([[1, 0], [0, 1]]) + +mu_t = np.array([4, 4]) +cov_t = np.array([[1, -.8], [-.8, 1]]) + +np.random.seed(0) +xs = ot.datasets.make_2D_samples_gauss(n, mu_s, cov_s) +xt = ot.datasets.make_2D_samples_gauss(n, mu_t, cov_t) + +n_noise = 10 + +xs = np.concatenate((xs, ((np.random.rand(n_noise, 2) - 4))), axis=0) +xt = np.concatenate((xt, ((np.random.rand(n_noise, 2) + 6))), axis=0) + +n = n + n_noise + +a, b = np.ones((n,)) / n, np.ones((n,)) / n # uniform distribution on samples + +# loss matrix +M = ot.dist(xs, xt) +M /= M.max() + + +############################################################################## +# Compute entropic kl-regularized UOT, kl- and l2-regularized UOT +# ----------- + +reg = 0.005 +reg_m_kl = 0.05 +reg_m_l2 = 5 +mass = 0.7 + +entropic_kl_uot = ot.unbalanced.sinkhorn_unbalanced(a, b, M, reg, reg_m_kl) +kl_uot = ot.unbalanced.mm_unbalanced(a, b, M, reg_m_kl, div='kl') +l2_uot = ot.unbalanced.mm_unbalanced(a, b, M, reg_m_l2, div='l2') +partial_ot = ot.partial.partial_wasserstein(a, b, M, m=mass) + +############################################################################## +# Plot the results +# ---------------- + +pl.figure(2) +transp = [partial_ot, l2_uot, kl_uot, entropic_kl_uot] +title = ["partial OT \n m=" + str(mass), "$\ell_2$-UOT \n $\mathrm{reg_m}$=" + + str(reg_m_l2), "kl-UOT \n $\mathrm{reg_m}$=" + str(reg_m_kl), + "entropic kl-UOT \n $\mathrm{reg_m}$=" + str(reg_m_kl)] + +for p in range(4): + pl.subplot(2, 4, p + 1) + P = transp[p] + if P.sum() > 0: + P = P / P.max() + for i in range(n): + for j in range(n): + if P[i, j] > 0: + pl.plot([xs[i, 0], xt[j, 0]], [xs[i, 1], xt[j, 1]], color='C2', + alpha=P[i, j] * 0.3) + pl.scatter(xs[:, 0], xs[:, 1], c='C0', alpha=0.2) + pl.scatter(xt[:, 0], xt[:, 1], c='C1', alpha=0.2) + pl.scatter(xs[:, 0], xs[:, 1], c='C0', s=P.sum(1).ravel() * (1 + p) * 2) + pl.scatter(xt[:, 0], xt[:, 1], c='C1', s=P.sum(0).ravel() * (1 + p) * 2) + pl.title(title[p]) + pl.yticks(()) + pl.xticks(()) + if p < 1: + pl.ylabel("mappings") + pl.subplot(2, 4, p + 5) + pl.imshow(P, cmap='jet') + pl.yticks(()) + pl.xticks(()) + if p < 1: + pl.ylabel("transport plans") +pl.show() diff --git a/ot/partial.py b/ot/partial.py index b7093e4..0a9e450 100755 --- a/ot/partial.py +++ b/ot/partial.py @@ -7,7 +7,6 @@ Partial OT solvers # License: MIT License import numpy as np - from .lp import emd @@ -29,7 +28,8 @@ def partial_wasserstein_lagrange(a, b, M, reg_m=None, nb_dummies=1, log=False, \gamma &\geq 0 - \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\} + \mathbf{1}^T \gamma^T \mathbf{1} = m & + \leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\} or equivalently (see Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. @@ -50,7 +50,8 @@ def partial_wasserstein_lagrange(a, b, M, reg_m=None, nb_dummies=1, log=False, - :math:`\lambda` is the lagrangian cost. Tuning its value allows attaining a given mass to be transported `m` - The formulation of the problem has been proposed in :ref:`[28] ` + The formulation of the problem has been proposed in + :ref:`[28] ` Parameters @@ -261,7 +262,7 @@ def partial_wasserstein(a, b, M, m=None, nb_dummies=1, log=False, **kwargs): b_extended = np.append(b, [(np.sum(a) - m) / nb_dummies] * nb_dummies) a_extended = np.append(a, [(np.sum(b) - m) / nb_dummies] * nb_dummies) M_extended = np.zeros((len(a_extended), len(b_extended))) - M_extended[-nb_dummies:, -nb_dummies:] = np.max(M) * 1e5 + M_extended[-nb_dummies:, -nb_dummies:] = np.max(M) * 2 M_extended[:len(a), :len(b)] = M gamma, log_emd = emd(a_extended, b_extended, M_extended, log=True, @@ -455,7 +456,8 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None, - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights - `m` is the amount of mass to be transported - The formulation of the problem has been proposed in :ref:`[29] ` + The formulation of the problem has been proposed in + :ref:`[29] ` Parameters @@ -469,7 +471,8 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None, q : ndarray, shape (nt,) Distribution in the target space m : float, optional - Amount of mass to be transported (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`) + Amount of mass to be transported + (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`) nb_dummies : int, optional Number of dummy points to add (avoid instabilities in the EMD solver) G0 : ndarray, shape (ns, nt), optional @@ -623,16 +626,19 @@ def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None, \gamma &\geq 0 - \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\} + \mathbf{1}^T \gamma^T \mathbf{1} = m + &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\} where : - :math:`\mathbf{M}` is the metric cost matrix - - :math:`\Omega` is the entropic regularization term, :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega` is the entropic regularization term, + :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights - `m` is the amount of mass to be transported - The formulation of the problem has been proposed in :ref:`[29] ` + The formulation of the problem has been proposed in + :ref:`[29] ` Parameters @@ -646,7 +652,8 @@ def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None, q : ndarray, shape (nt,) Distribution in the target space m : float, optional - Amount of mass to be transported (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`) + Amount of mass to be transported + (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`) nb_dummies : int, optional Number of dummy points to add (avoid instabilities in the EMD solver) G0 : ndarray, shape (ns, nt), optional @@ -728,21 +735,25 @@ def entropic_partial_wasserstein(a, b, M, reg, m=None, numItermax=1000, The function considers the following problem: .. math:: - \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F + \mathrm{reg} \cdot\Omega(\gamma) + \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, + \mathbf{M} \rangle_F + \mathrm{reg} \cdot\Omega(\gamma) s.t. \gamma \mathbf{1} &\leq \mathbf{a} \\ \gamma^T \mathbf{1} &\leq \mathbf{b} \\ \gamma &\geq 0 \\ - \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\} \\ + \mathbf{1}^T \gamma^T \mathbf{1} = m + &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\} \\ where : - :math:`\mathbf{M}` is the metric cost matrix - - :math:`\Omega` is the entropic regularization term, :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega` is the entropic regularization term, + :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights - `m` is the amount of mass to be transported - The formulation of the problem has been proposed in :ref:`[3] ` (prop. 5) + The formulation of the problem has been proposed in + :ref:`[3] ` (prop. 5) Parameters @@ -829,12 +840,23 @@ def entropic_partial_wasserstein(a, b, M, reg, m=None, numItermax=1000, np.multiply(K, m / np.sum(K), out=K) err, cpt = 1, 0 + q1 = np.ones(K.shape) + q2 = np.ones(K.shape) + q3 = np.ones(K.shape) while (err > stopThr and cpt < numItermax): Kprev = K + K = K * q1 K1 = np.dot(np.diag(np.minimum(a / np.sum(K, axis=1), dx)), K) + q1 = q1 * Kprev / K1 + K1prev = K1 + K1 = K1 * q2 K2 = np.dot(K1, np.diag(np.minimum(b / np.sum(K1, axis=0), dy))) + q2 = q2 * K1prev / K2 + K2prev = K2 + K2 = K2 * q3 K = K2 * (m / np.sum(K2)) + q3 = q3 * K2prev / K if np.any(np.isnan(K)) or np.any(np.isinf(K)): print('Warning: numerical errors at iteration', cpt) @@ -861,7 +883,8 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None, numItermax=1000, tol=1e-7, log=False, verbose=False): r""" - Returns the partial Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` + Returns the partial Gromov-Wasserstein transport between + :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` The function solves the following optimization problem: @@ -877,7 +900,8 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None, \gamma^T \mathbf{1} &\leq \mathbf{b} - \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\} + \mathbf{1}^T \gamma^T \mathbf{1} = m + &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\} where : @@ -885,10 +909,13 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None, - :math:`\mathbf{C_2}` is the metric cost matrix in the target space - :math:`\mathbf{p}` and :math:`\mathbf{q}` are the sample weights - `L`: quadratic loss function - - :math:`\Omega` is the entropic regularization term, :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega` is the entropic regularization term, + :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - `m` is the amount of mass to be transported - The formulation of the GW problem has been proposed in :ref:`[12] ` and the partial GW in :ref:`[29] ` + The formulation of the GW problem has been proposed in + :ref:`[12] ` and the + partial GW in :ref:`[29] ` Parameters ---------- @@ -903,7 +930,8 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None, reg: float entropic regularization parameter m : float, optional - Amount of mass to be transported (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`) + Amount of mass to be transported (default: + :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`) G0 : ndarray, shape (ns, nt), optional Initialisation of the transportation matrix numItermax : int, optional @@ -1005,13 +1033,15 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None, numItermax=1000, tol=1e-7, log=False, verbose=False): r""" - Returns the partial Gromov-Wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` + Returns the partial Gromov-Wasserstein discrepancy between + :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` The function solves the following optimization problem: .. math:: - GW = \min_{\gamma} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l})\cdot - \gamma_{i,j}\cdot\gamma_{k,l} + \mathrm{reg} \cdot\Omega(\gamma) + GW = \min_{\gamma} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, + \mathbf{C_2}_{j,l})\cdot + \gamma_{i,j}\cdot\gamma_{k,l} + \mathrm{reg} \cdot\Omega(\gamma) .. math:: s.t. \ \gamma &\geq 0 @@ -1028,10 +1058,13 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None, - :math:`\mathbf{C_2}` is the metric cost matrix in the target space - :math:`\mathbf{p}` and :math:`\mathbf{q}` are the sample weights - `L` : quadratic loss function - - :math:`\Omega` is the entropic regularization term, :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega` is the entropic regularization term, + :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - `m` is the amount of mass to be transported - The formulation of the GW problem has been proposed in :ref:`[12] ` and the partial GW in :ref:`[29] ` + The formulation of the GW problem has been proposed in + :ref:`[12] ` and the + partial GW in :ref:`[29] ` Parameters @@ -1047,7 +1080,8 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None, reg: float entropic regularization parameter m : float, optional - Amount of mass to be transported (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`) + Amount of mass to be transported (default: + :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`) G0 : ndarray, shape (ns, nt), optional Initialisation of the transportation matrix numItermax : int, optional diff --git a/ot/regpath.py b/ot/regpath.py index 269937a..e745288 100644 --- a/ot/regpath.py +++ b/ot/regpath.py @@ -11,34 +11,48 @@ import scipy.sparse as sp def recast_ot_as_lasso(a, b, C): - r"""This function recasts the l2-penalized UOT problem as a Lasso problem + r"""This function recasts the l2-penalized UOT problem as a Lasso problem. + + Recall the l2-penalized UOT problem defined in + :ref:`[41] ` - Recall the l2-penalized UOT problem defined in [Chapel et al., 2021] .. math:: - UOT = \min_T + \lambda \|T 1_m - a\|_2^2 + - \lambda \|T^T 1_n - b\|_2^2 + \text{UOT}_{\lambda} = \min_T + \lambda \|T 1_m - + \mathbf{a}\|_2^2 + + \lambda \|T^T 1_n - \mathbf{b}\|_2^2 + s.t. T \geq 0 + where : - - C is the (dim_a, dim_b) metric cost matrix - - :math:`\lambda` is the l2-regularization coefficient - - a and b are source and target distributions - - T is the transport plan to optimize - The problem above can be reformulated to a non-negative penalized + - :math:`C` is the cost matrix + - :math:`\lambda` is the l2-regularization parameter + - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the source and target \ + distributions + - :math:`T` is the transport plan to optimize + + The problem above can be reformulated as a non-negative penalized linear regression problem, particularly Lasso + .. math:: - UOT2 = \min_t \gamma c^T t + 0.5 * \|H t - y\|_2^2 + \text{UOT2}_{\lambda} = \min_{\mathbf{t}} \gamma \mathbf{c}^T + \mathbf{t} + 0.5 * \|H \mathbf{t} - \mathbf{y}\|_2^2 + s.t. - t \geq 0 + \mathbf{t} \geq 0 + where : - - c is a (dim_a * dim_b, ) metric cost vector (flattened version of C) - - :math:`\gamma = 1/\lambda` is the l2-regularization coefficient - - y is the concatenation of vectors a and b, defined as y^T = [a^T b^T] - - H is a (dim_a + dim_b, dim_a * dim_b) metric matrix, - see [Chapel et al., 2021] for the design of H. The matrix product H t - computes both the source marginal and the target marginal. - - t is a (dim_a * dim_b, ) metric vector (flattened version of T) + + - :math:`\mathbf{c}` is the flattened version of the cost matrix :math:`C` + - :math:`\mathbf{y}` is the concatenation of vectors :math:`\mathbf{a}` \ + and :math:`\mathbf{b}` + - :math:`H` is a metric matrix, see :ref:`[41] ` for \ + the design of :math:`H`. The matrix product :math:`H\mathbf{t}` \ + computes both the source marginal and the target marginals. + - :math:`\mathbf{t}` is the flattened version of the transport plan \ + :math:`T` + Parameters ---------- a : np.ndarray (dim_a,) @@ -47,14 +61,16 @@ def recast_ot_as_lasso(a, b, C): Histogram of dimension dim_b C : np.ndarray, shape (dim_a, dim_b) Cost matrix + Returns ------- H : np.ndarray (dim_a+dim_b, dim_a*dim_b) - Auxiliary matrix constituted by 0 and 1 + Design matrix that contains only 0 and 1 y : np.ndarray (ns + nt, ) - Concatenation of histogram a and histogram b + Concatenation of histograms :math:`\mathbf{a}` and :math:`\mathbf{b}` c : np.ndarray (ns * nt, ) - Flattened array of cost matrix + Flattened array of the cost matrix + Examples -------- >>> import ot @@ -73,12 +89,12 @@ def recast_ot_as_lasso(a, b, C): >>> c array([16., 25., 28., 16., 40., 36.]) + References ---------- - [Chapel et al., 2021]: - Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). + .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). Unbalanced optimal transport through non-negative penalized - linear regression. + linear regression. NeurIPS. """ dim_a = np.shape(a)[0] @@ -97,33 +113,47 @@ def recast_ot_as_lasso(a, b, C): def recast_semi_relaxed_as_lasso(a, b, C): - r"""This function recasts the semi-relaxed l2-UOT problem as Lasso problem + r"""This function recasts the semi-relaxed l2-UOT problem as Lasso problem. .. math:: - semi-relaxed UOT = \min_T + \lambda \|T 1_m - a\|_2^2 + + \text{semi-relaxed UOT} = \min_T + + \lambda \|T 1_m - \mathbf{a}\|_2^2 + s.t. - T^T 1_n = b - t \geq 0 + T^T 1_n = \mathbf{b} + + \mathbf{t} \geq 0 + where : - - C is the (dim_a, dim_b) metric cost matrix - - :math:`\lambda` is the l2-regularization coefficient - - a and b are source and target distributions - - T is the transport plan to optimize + + - :math:`C` is the metric cost matrix + - :math:`\lambda` is the l2-regularization parameter + - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the source and target \ + distributions + - :math:`T` is the transport plan to optimize The problem above can be reformulated as follows + .. math:: - semi-relaxed UOT2 = \min_t \gamma c^T t + 0.5 * \|H_r t - a\|_2^2 + \text{semi-relaxed UOT2} = \min_t \gamma \mathbf{c}^T t + + 0.5 * \|H_r \mathbf{t} - \mathbf{a}\|_2^2 + s.t. - H_c t = b - t \geq 0 + H_c \mathbf{t} = \mathbf{b} + + \mathbf{t} \geq 0 + where : - - c is a (dim_a * dim_b, ) metric cost vector (flattened version of C) - - :math:`\gamma = 1/\lambda` is the l2-regularization coefficient - - H_r is a (dim_a, dim_a * dim_b) metric matrix, - which computes the sum along the rows of transport plan T - - H_c is a (dim_b, dim_a * dim_b) metric matrix, - which computes the sum along the columns of transport plan T - - t is a (dim_a * dim_b, ) metric vector (flattened version of T) + + - :math:`\mathbf{c}` is flattened version of the cost matrix :math:`C` + - :math:`\gamma = 1/\lambda` is the l2-regularization parameter + - :math:`H_r` is a metric matrix which computes the sum along the \ + rows of the transport plan :math:`T` + - :math:`H_c` is a metric matrix which computes the sum along the \ + columns of the transport plan :math:`T` + - :math:`\mathbf{t}` is the flattened version of :math:`T` + Parameters ---------- a : np.ndarray (dim_a,) @@ -132,16 +162,18 @@ def recast_semi_relaxed_as_lasso(a, b, C): Histogram of dimension dim_b C : np.ndarray, shape (dim_a, dim_b) Cost matrix + Returns ------- Hr : np.ndarray (dim_a, dim_a * dim_b) Auxiliary matrix constituted by 0 and 1, which computes - the sum along the rows of transport plan T + the sum along the rows of transport plan :math:`T` Hc : np.ndarray (dim_b, dim_a * dim_b) Auxiliary matrix constituted by 0 and 1, which computes - the sum along the columns of transport plan T + the sum along the columns of transport plan :math:`T` c : np.ndarray (ns * nt, ) - Flattened array of cost matrix + Flattened array of the cost matrix + Examples -------- >>> import ot @@ -179,49 +211,60 @@ def recast_semi_relaxed_as_lasso(a, b, C): def ot_next_gamma(phi, delta, HtH, Hty, c, active_index, current_gamma): r""" This function computes the next value of gamma if a variable - will be added in next iteration of the regularization path + is added in the next iteration of the regularization path. We look for the largest value of gamma such that the gradient of an inactive variable vanishes + .. math:: - \max_{i \in \bar{A}} \frac{h_i^T(H_A \phi - y)}{h_i^T H_A \delta - c_i} + \max_{i \in \bar{A}} \frac{\mathbf{h}_i^T(H_A \phi - \mathbf{y})} + {\mathbf{h}_i^T H_A \delta - \mathbf{c}_i} + where : + - A is the current active set - - h_i is the ith column of auxiliary matrix H - - H_A is the sub-matrix constructed by the columns of H - whose indices belong to the active set A - - c_i is the ith element of cost vector c - - y is the concatenation of source and target distribution - - :math:`\phi` is the intercept of the solutions in current iteration - - :math:`\delta` is the slope of the solutions in current iteration + - :math:`\mathbf{h}_i` is the :math:`i` th column of the design \ + matrix :math:`{H}` + - :math:`{H}_A` is the sub-matrix constructed by the columns of \ + :math:`{H}` whose indices belong to the active set A + - :math:`\mathbf{c}_i` is the :math:`i` th element of the cost vector \ + :math:`\mathbf{c}` + - :math:`\mathbf{y}` is the concatenation of the source and target \ + distributions + - :math:`\phi` is the intercept of the solutions at the current iteration + - :math:`\delta` is the slope of the solutions at the current iteration + Parameters ---------- - phi : np.ndarray (|A|, ) - Intercept of the solutions in current iteration (t is piecewise linear) - delta : np.ndarray (|A|, ) - Slope of the solutions in current iteration (t is piecewise linear) + phi : np.ndarray (size(A), ) + Intercept of the solutions at the current iteration + delta : np.ndarray (size(A), ) + Slope of the solutions at the current iteration HtH : np.ndarray (dim_a * dim_b, dim_a * dim_b) - Matrix product of H^T H + Matrix product of :math:`{H}^T {H}` Hty : np.ndarray (dim_a + dim_b, ) - Matrix product of H^T y + Matrix product of :math:`{H}^T \mathbf{y}` c: np.ndarray (dim_a * dim_b, ) - Flattened array of cost matrix C + Flattened array of the cost matrix :math:`{C}` active_index : list Indices of active variables current_gamma : float - Value of regularization coefficient at the start of current iteration + Value of the regularization parameter at the beginning of the current \ + iteration + Returns ------- next_gamma : float Value of gamma if a variable is added to active set in next iteration next_active_index : int Index of variable to be activated + + References ---------- - [Chapel et al., 2021]: - Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). + .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). Unbalanced optimal transport through non-negative penalized - linear regression. + linear regression. NeurIPS. """ M = (HtH[:, active_index].dot(phi) - Hty) / \ (HtH[:, active_index].dot(delta) - c + 1e-16) @@ -237,56 +280,65 @@ def semi_relaxed_next_gamma(phi, delta, phi_u, delta_u, HrHr, Hc, Hra, By taking the Lagrangian form of the problem, we obtain a similar update as the two-sided relaxed UOT + .. math:: - \max_{i \in \bar{A}} \frac{h_{r i}^T(H_{r A} \phi - a) + h_{c i}^T - \phi_u}{h_{r i}^T H_{r A} \delta + h_{c i} \delta_u - c_i} + + \max_{i \in \bar{A}} \frac{\mathbf{h}_{ri}^T(H_{rA} \phi - \mathbf{a}) + + \mathbf{h}_{c i}^T\phi_u}{\mathbf{h}_{r i}^T H_{r A} \delta + \ + \mathbf{h}_{c i} \delta_u - \mathbf{c}_i} + where : + - A is the current active set - - h_{r i} is the ith column of the matrix H_r - - h_{c i} is the ith column of the matrix H_c - - H_{r A} is the sub-matrix constructed by the columns of H_r - whose indices belong to the active set A - - c_i is the ith element of cost vector c - - y is the concatenation of source and target distribution + - :math:`\mathbf{h}_{r i}` is the ith column of the matrix :math:`H_r` + - :math:`\mathbf{h}_{c i}` is the ith column of the matrix :math:`H_c` + - :math:`H_{r A}` is the sub-matrix constructed by the columns of \ + :math:`H_r` whose indices belong to the active set A + - :math:`\mathbf{c}_i` is the :math:`i` th element of cost vector \ + :math:`\mathbf{c}` - :math:`\phi` is the intercept of the solutions in current iteration - :math:`\delta` is the slope of the solutions in current iteration - - :math:`\phi_u` is the intercept of Lagrange parameter in current - iteration - - :math:`\delta_u` is the slope of Lagrange parameter in current iteration + - :math:`\phi_u` is the intercept of Lagrange parameter at the \ + current iteration + - :math:`\delta_u` is the slope of Lagrange parameter at the \ + current iteration + Parameters ---------- - phi : np.ndarray (|A|, ) - Intercept of the solutions in current iteration (t is piecewise linear) - delta : np.ndarray (|A|, ) - Slope of the solutions in current iteration (t is piecewise linear) + phi : np.ndarray (size(A), ) + Intercept of the solutions at the current iteration + delta : np.ndarray (size(A), ) + Slope of the solutions at the current iteration phi_u : np.ndarray (dim_b, ) - Intercept of the Lagrange parameter in current iteration (also linear) + Intercept of the Lagrange parameter at the current iteration delta_u : np.ndarray (dim_b, ) - Slope of the Lagrange parameter in current iteration (also linear) + Slope of the Lagrange parameter at the current iteration HrHr : np.ndarray (dim_a * dim_b, dim_a * dim_b) - Matrix product of H_r^T H_r + Matrix product of :math:`H_r^T H_r` Hc : np.ndarray (dim_b, dim_a * dim_b) - Matrix that computes the sum along the columns of transport plan T + Matrix that computes the sum along the columns of the transport plan \ + :math:`T` Hra : np.ndarray (dim_a * dim_b, ) - Matrix product of H_r^T a + Matrix product of :math:`H_r^T \mathbf{a}` c: np.ndarray (dim_a * dim_b, ) - Flattened array of cost matrix C + Flattened array of cost matrix :math:`C` active_index : list Indices of active variables current_gamma : float Value of regularization coefficient at the start of current iteration + Returns ------- next_gamma : float Value of gamma if a variable is added to active set in next iteration next_active_index : int Index of variable to be activated + References ---------- - [Chapel et al., 2021]: - Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). + .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). Unbalanced optimal transport through non-negative penalized - linear regression. + linear regression. NeurIPS. """ M = (HrHr[:, active_index].dot(phi) - Hra + Hc.T.dot(phi_u)) / \ @@ -297,37 +349,48 @@ def semi_relaxed_next_gamma(phi, delta, phi_u, delta_u, HrHr, Hc, Hra, def compute_next_removal(phi, delta, current_gamma): - r""" This function computes the next value of gamma if a variable - is removed in next iteration of regularization path + r""" This function computes the next gamma value if a variable + is removed at the next iteration of the regularization path. + + We look for the largest value of the regularization parameter such that + an element of the current solution vanishes - We look for the largest value of gamma such that - an element of current solution vanishes .. math:: \max_{j \in A} \frac{\phi_j}{\delta_j} + where : + - A is the current active set - - phi_j is the jth element of the intercept of current solution - - delta_j is the jth elemnt of the slope of current solution + - :math:`\phi_j` is the :math:`j` th element of the intercept of the \ + current solution + - :math:`\delta_j` is the :math:`j` th element of the slope of the \ + current solution + + Parameters ---------- - phi : np.ndarray (|A|, ) - Intercept of the solutions in current iteration (t is piecewise linear) - delta : np.ndarray (|A|, ) - Slope of the solutions in current iteration (t is piecewise linear) + phi : ndarray, shape (size(A), ) + Intercept of the solution at the current iteration + delta : ndarray, shape (size(A), ) + Slope of the solution at the current iteration current_gamma : float - Value of regularization coefficient at the start of current iteration + Value of the regularization parameter at the beginning of the \ + current iteration + Returns ------- next_removal_gamma : float - Value of gamma if a variable is removed in next iteration + Gamma value if a variable is removed at the next iteration next_removal_index : int - Index of the variable to remove in next iteration + Index of the variable to be removed at the next iteration + + + .. _references-regpath: References ---------- - [Chapel et al., 2021]: - Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). + .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). Unbalanced optimal transport through non-negative penalized - linear regression. + linear regression. NeurIPS. """ r_candidate = phi / (delta - 1e-16) r_candidate[r_candidate >= (1 - 1e-8) * current_gamma] = 0 @@ -335,56 +398,74 @@ def compute_next_removal(phi, delta, current_gamma): def complement_schur(M_current, b, d, id_pop): - r""" This function computes the inverse of matrix in regularization path - using Schur complement + r""" This function computes the inverse of the design matrix in the \ + regularization path using the Schur complement. Two cases may arise: + + Case 1: one variable is added to the active set + - Two cases may arise: Firstly one variable is added to the active set .. math:: M_{k+1}^{-1} = \begin{bmatrix} - M_{k}^{-1} + s^{-1} M_{k}^{-1} b b^T M_{k}^{-1} & -s^{-1} \\ - - s^{-1} b^T M_{k}^{-1} & s^{-1} + M_{k}^{-1} + s^{-1} M_{k}^{-1} \mathbf{b} \mathbf{b}^T M_{k}^{-1} \ + & - M_{k}^{-1} \mathbf{b} s^{-1} \\ + - s^{-1} \mathbf{b}^T M_{k}^{-1} & s^{-1} \end{bmatrix} + + where : - - :math:`M_k^{-1}` is the inverse of matrix in previous iteration and - :math:`M_k` is the upper left block matrix in Schur formulation - - b is the upper right block matrix in Schur formulation. In our case, - b is reduced to a column vector and b^T is the lower left block matrix - - s is the Schur complement, given by - :math:`s = d - b^T M_{k}^{-1} b` in our case - - Secondly, one variable is removed from the active set + + - :math:`M_k^{-1}` is the inverse of the design matrix :math:`H_A^tH_A` \ + of the previous iteration + - :math:`\mathbf{b}` is the last column of :math:`M_{k}` + - :math:`s` is the Schur complement, given by \ + :math:`s = \mathbf{d} - \mathbf{b}^T M_{k}^{-1} \mathbf{b}` + + Case 2: one variable is removed from the active set. + .. math:: - M_{k+1}^{-1} = M^{-1}_{A_k \backslash q} - + M_{k+1}^{-1} = M^{-1}_{k \backslash q} - \frac{r_{-q,q} r^{T}_{-q,q}}{r_{q,q}} + where : - - q is the index of column and row to delete - - :math:`M^{-1}_{A_k \backslash q}` is the previous inverse matrix - without qth column and qth row - - r_{-q,q} is the qth column of :math:`M^{-1}_{k}` without the qth element - - r_{q, q} is the element of qth column and qth row in :math:`M^{-1}_{k}` + + - :math:`q` is the index of column and row to delete + - :math:`M^{-1}_{k \backslash q}` is the previous inverse matrix deprived \ + of the :math:`q` th column and :math:`q` th row + - :math:`r_{-q,q}` is the :math:`q` th column of :math:`M^{-1}_{k}` \ + without the :math:`q` th element + - :math:`r_{q, q}` is the element of :math:`q` th column and :math:`q` th \ + row in :math:`M^{-1}_{k}` + + Parameters ---------- - M_current : np.ndarray (|A|-1, |A|-1) - Inverse matrix in previous iteration - b : np.ndarray (|A|-1, ) - Upper right matrix in Schur complement, a column vector in our case + M_current : ndarray, shape (size(A)-1, size(A)-1) + Inverse matrix of :math:`H_A^tH_A` at the previous iteration, with \ + size(A) the size of the active set + b : ndarray, shape (size(A)-1, ) + None for case 2 (removal), last column of :math:`M_{k}` for case 1 \ + (addition) d : float - Lower right matrix in Schur complement, a scalar in our case - id_pop + should be equal to 2 when UOT and 1 for the semi-relaxed OT + id_pop : int Index of the variable to be removed, equal to -1 - if none of the variables is deleted in current iteration + if no variable is deleted at the current iteration + + Returns ------- - M : np.ndarray (|A|, |A|) - Inverse matrix needed in current iteration + M : ndarray, shape (size(A), size(A)) + Inverse matrix of :math:`H_A^tH_A` of the current iteration + + References ---------- - [Chapel et al., 2021]: - Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). + .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). Unbalanced optimal transport through non-negative penalized - linear regression. + linear regression. NeurIPS. """ + if b is None: b = M_current[id_pop, :] b = np.delete(b, id_pop) @@ -409,33 +490,39 @@ def complement_schur(M_current, b, d, id_pop): def construct_augmented_H(active_index, m, Hc, HrHr): - r""" This function construct an augmented matrix for the first iteration of - semi-relaxed regularization path + r""" This function constructs an augmented matrix for the first iteration + of the semi-relaxed regularization path .. math:: - Augmented_H = + \text{Augmented}_H = \begin{bmatrix} 0 & H_{c A} \\ H_{c A}^T & H_{r A}^T H_{r A} \end{bmatrix} + where : - - H_{r A} is the sub-matrix constructed by the columns of H_r - whose indices belong to the active set A - - H_{c A} is the sub-matrix constructed by the columns of H_c - whose indices belong to the active set A + + - :math:`H_{r A}` is the sub-matrix constructed by the columns of \ + :math:`H_r` whose indices belong to the active set A + - :math:`H_{c A}` is the sub-matrix constructed by the columns of \ + :math:`H_c` whose indices belong to the active set A + + Parameters ---------- active_index : list - Indices of active variables + Indices of the active variables m : int Length of the target distribution Hc : np.ndarray (dim_b, dim_a * dim_b) - Matrix that computes the sum along the columns of transport plan T + Matrix that computes the sum along the columns of the transport plan \ + :math:`T` HrHr : np.ndarray (dim_a * dim_b, dim_a * dim_b) - Matrix product of H_r^T H_r + Matrix product of :math:`H_r^T H_r` + Returns ------- - H_augmented : np.ndarray (dim_b + |A|, dim_b + |A|) + H_augmented : np.ndarray (dim_b + size(A), dim_b + size(A)) Augmented matrix for the first iteration of the semi-relaxed regularization path """ @@ -451,18 +538,27 @@ def fully_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4, r"""This function gives the regularization path of l2-penalized UOT problem The problem to optimize is the Lasso reformulation of the l2-penalized UOT: + .. math:: - \min_t \gamma c^T t + 0.5 * \|H t - y\|_2^2 + \min_t \gamma \mathbf{c}^T \mathbf{t} + + 0.5 * \|{H} \mathbf{t} - \mathbf{y}\|_2^2 + s.t. - t \geq 0 + \mathbf{t} \geq 0 + where : - - c is a (dim_a * dim_b, ) metric cost vector (flattened version of C) + + - :math:`\mathbf{c}` is the flattened version of the cost matrix \ + :math:`{C}` - :math:`\gamma = 1/\lambda` is the l2-regularization coefficient - - y is the concatenation of vectors a and b, defined as y^T = [a^T b^T] - - H is a (dim_a + dim_b, dim_a * dim_b) metric matrix, - see [Chapel et al., 2021] for the design of H. The matrix product Ht - computes both the source marginal and the target marginal. - - t is a (dim_a * dim_b, ) metric vector (flattened version of T) + - :math:`\mathbf{y}` is the concatenation of vectors :math:`\mathbf{a}` \ + and :math:`\mathbf{b}`, defined as \ + :math:`\mathbf{y}^T = [\mathbf{a}^T \mathbf{b}^T]` + - :math:`{H}` is a design matrix, see :ref:`[41] ` \ + for the design of :math:`{H}`. The matrix product :math:`H\mathbf{t}` \ + computes both the source marginal and the target marginals. + - :math:`\mathbf{t}` is the flattened version of the transport matrix + Parameters ---------- a : np.ndarray (dim_a,) @@ -478,11 +574,12 @@ def fully_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4, Returns ------- t : np.ndarray (dim_a*dim_b, ) - Flattened vector of optimal transport matrix + Flattened vector of the optimal transport matrix t_list : list - List of solutions in regularization path + List of solutions in the regularization path gamma_list : list - List of regularization coefficient in regularization path + List of regularization coefficients in the regularization path + Examples -------- >>> import ot @@ -502,10 +599,9 @@ def fully_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4, References ---------- - [Chapel et al., 2021]: - Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). + .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). Unbalanced optimal transport through non-negative penalized - linear regression. + linear regression. NeurIPS. """ n = np.shape(a)[0] @@ -580,22 +676,32 @@ def fully_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4, def semi_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4, itmax=50000): r"""This function gives the regularization path of semi-relaxed - l2-UOT problem + l2-UOT problem. The problem to optimize is the Lasso reformulation of the l2-penalized UOT: + .. math:: - \min_t \gamma c^T t + 0.5 * \|H_r t - a\|_2^2 + + \min_t \gamma \mathbf{c}^T t + + 0.5 * \|H_r \mathbf{t} - \mathbf{a}\|_2^2 + s.t. - H_c t = b - t \geq 0 + H_c \mathbf{t} = \mathbf{b} + + \mathbf{t} \geq 0 + where : - - c is a (dim_a * dim_b, ) metric cost vector (flattened version of C) - - :math:`\gamma = 1/\lambda` is the l2-regularization coefficient - - H_r is a (dim_a, dim_a * dim_b) metric matrix, - which computes the sum along the rows of transport plan T - - H_c is a (dim_b, dim_a * dim_b) metric matrix, - which computes the sum along the columns of transport plan T - - t is a (dim_a * dim_b, ) metric vector (flattened version of T) + + - :math:`\mathbf{c}` is the flattened version of the cost matrix \ + :math:`C` + - :math:`\gamma = 1/\lambda` is the l2-regularization parameter + - :math:`H_r` is a matrix that computes the sum along the rows of \ + the transport plan :math:`T` + - :math:`H_c` is a matrix that computes the sum along the columns of \ + the transport plan :math:`T` + - :math:`\mathbf{t}` is the flattened version of the transport plan \ + :math:`T` + Parameters ---------- a : np.ndarray (dim_a,) @@ -608,14 +714,16 @@ def semi_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4, l2-regularization coefficient itmax: int (optional) Maximum number of iteration + Returns ------- t : np.ndarray (dim_a*dim_b, ) - Flattened vector of optimal transport matrix + Flattened vector of the (unregularized) optimal transport matrix t_list : list - List of solutions in regularization path + List of all the optimal transport vectors of the regularization path gamma_list : list - List of regularization coefficient in regularization path + List of the regularization parameters in the path + Examples -------- >>> import ot @@ -635,10 +743,9 @@ def semi_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4, References ---------- - [Chapel et al., 2021]: - Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). + .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). Unbalanced optimal transport through non-negative penalized - linear regression. + linear regression. NeurIPS. """ n = np.shape(a)[0] @@ -722,8 +829,44 @@ def semi_relaxed_path(a: np.array, b: np.array, C: np.array, reg=1e-4, def regularization_path(a: np.array, b: np.array, C: np.array, reg=1e-4, semi_relaxed=False, itmax=50000): - r"""This function combines both the semi-relaxed and the fully-relaxed - regularization paths of l2-UOT problem + r"""This function provides all the solutions of the regularization path \ + of the l2-UOT problem :ref:`[41] `. + + The problem to optimize is the Lasso reformulation of the l2-penalized UOT: + + .. math:: + \min_t \gamma \mathbf{c}^T \mathbf{t} + + 0.5 * \|{H} \mathbf{t} - \mathbf{y}\|_2^2 + + s.t. + \mathbf{t} \geq 0 + + where : + + - :math:`\mathbf{c}` is the flattened version of the cost matrix \ + :math:`{C}` + - :math:`\gamma = 1/\lambda` is the l2-regularization coefficient + - :math:`\mathbf{y}` is the concatenation of vectors :math:`\mathbf{a}` \ + and :math:`\mathbf{b}`, defined as \ + :math:`\mathbf{y}^T = [\mathbf{a}^T \mathbf{b}^T]` + - :math:`{H}` is a design matrix, see :ref:`[41] ` \ + for the design of :math:`{H}`. The matrix product :math:`H\mathbf{t}` \ + computes both the source marginal and the target marginals. + - :math:`\mathbf{t}` is the flattened version of the transport matrix + + For the semi-relaxed problem, it optimizes the Lasso reformulation of the + l2-penalized UOT: + + .. math:: + + \min_t \gamma \mathbf{c}^T \mathbf{t} + + 0.5 * \|H_r \mathbf{t} - \mathbf{a}\|_2^2 + + s.t. + H_c \mathbf{t} = \mathbf{b} + + \mathbf{t} \geq 0 + Parameters ---------- @@ -736,23 +879,24 @@ def regularization_path(a: np.array, b: np.array, C: np.array, reg=1e-4, reg: float (optional) l2-regularization coefficient semi_relaxed : bool (optional) - Give the semi-relaxed path if true + Give the semi-relaxed path if True itmax: int (optional) Maximum number of iteration + Returns ------- t : np.ndarray (dim_a*dim_b, ) - Flattened vector of optimal transport matrix + Flattened vector of the (unregularized) optimal transport matrix t_list : list - List of solutions in regularization path + List of all the optimal transport vectors of the regularization path gamma_list : list - List of regularization coefficient in regularization path + List of the regularization parameters in the path + References ---------- - [Chapel et al., 2021]: - Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). + .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). Unbalanced optimal transport through non-negative penalized - linear regression. + linear regression. NeurIPS. """ if semi_relaxed: t, t_list, gamma_list = semi_relaxed_path(a, b, C, reg=reg, @@ -765,27 +909,33 @@ def regularization_path(a: np.array, b: np.array, C: np.array, reg=1e-4, def compute_transport_plan(gamma, gamma_list, Pi_list): r""" Given the regularization path, this function computes the transport - plan for any value of gamma by the piecewise linearity of the path + plan for any value of gamma thanks to the piecewise linearity of the path. .. math:: t(\gamma) = \phi(\gamma) - \gamma \delta(\gamma) - where : - - :math:`\gamma` is the regularization coefficient + + where: + + - :math:`\gamma` is the regularization parameter - :math:`\phi(\gamma)` is the corresponding intercept - :math:`\delta(\gamma)` is the corresponding slope - - t is a (dim_a * dim_b, ) vector (flattened version of transport matrix) + - :math:`\mathbf{t}` is the flattened version of the transport matrix + Parameters ---------- gamma : float Regularization coefficient gamma_list : list - List of regularization coefficients in regularization path + List of regularization parameters of the regularization path Pi_list : list - List of solutions in regularization path + List of all the solutions of the regularization path + Returns ------- t : np.ndarray (dim_a*dim_b, ) - Transport vector corresponding to the given value of gamma + Vectorization of the transport plan corresponding to the given value + of gamma + Examples -------- >>> import ot @@ -804,12 +954,13 @@ def compute_transport_plan(gamma, gamma_list, Pi_list): array([0. , 0. , 0. , 0.19722222, 0.05555556, 0. , 0. , 0.24722222, 0. ]) + + .. _references-regpath: References ---------- - [Chapel et al., 2021]: - Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). + .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). Unbalanced optimal transport through non-negative penalized - linear regression. + linear regression. NeurIPS. """ if gamma >= gamma_list[0]: diff --git a/ot/unbalanced.py b/ot/unbalanced.py index 503cc1e..90c920c 100644 --- a/ot/unbalanced.py +++ b/ot/unbalanced.py @@ -4,6 +4,7 @@ Regularized Unbalanced OT solvers """ # Author: Hicham Janati +# Laetitia Chapel # License: MIT License from __future__ import division @@ -1029,3 +1030,225 @@ def barycenter_unbalanced(A, M, reg, reg_m, method="sinkhorn", weights=None, log=log, **kwargs) else: raise ValueError("Unknown method '%s'." % method) + + +def mm_unbalanced(a, b, M, reg_m, div='kl', G0=None, numItermax=1000, + stopThr=1e-15, verbose=False, log=False): + r""" + Solve the unbalanced optimal transport problem and return the OT plan. + The function solves the following optimization problem: + + .. math:: + W = \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F + + \mathrm{reg_m} \cdot \mathrm{div}(\gamma \mathbf{1}, \mathbf{a}) + + \mathrm{reg_m} \cdot \mathrm{div}(\gamma^T \mathbf{1}, \mathbf{b}) + s.t. + \gamma \geq 0 + + where: + + - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix + - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target + unbalanced distributions + - div is a divergence, either Kullback-Leibler or :math:`\ell_2` divergence + + The algorithm used for solving the problem is a maximization- + minimization algorithm as proposed in :ref:`[41] ` + + Parameters + ---------- + a : array-like (dim_a,) + Unnormalized histogram of dimension `dim_a` + b : array-like (dim_b,) + Unnormalized histogram of dimension `dim_b` + M : array-like (dim_a, dim_b) + loss matrix + reg_m: float + Marginal relaxation term > 0 + div: string, optional + Divergence to quantify the difference between the marginals. + Can take two values: 'kl' (Kullback-Leibler) or 'l2' (quadratic) + G0: array-like (dim_a, dim_b) + Initialization of the transport matrix + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshold on error (> 0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + Returns + ------- + gamma : (dim_a, dim_b) array-like + Optimal transportation matrix for the given parameters + log : dict + log dictionary returned only if `log` is `True` + + Examples + -------- + >>> import ot + >>> import numpy as np + >>> a=[.5, .5] + >>> b=[.5, .5] + >>> M=[[1., 36.],[9., 4.]] + >>> np.round(ot.unbalanced.mm_unbalanced(a, b, M, 1, 'kl'), 2) + array([[0.3 , 0. ], + [0. , 0.07]]) + >>> np.round(ot.unbalanced.mm_unbalanced(a, b, M, 1, 'l2'), 2) + array([[0.25, 0. ], + [0. , 0. ]]) + + + .. _references-regpath: + References + ---------- + .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). + Unbalanced optimal transport through non-negative penalized + linear regression. NeurIPS. + See Also + -------- + ot.lp.emd : Unregularized OT + ot.unbalanced.sinkhorn_unbalanced : Entropic regularized OT + """ + M, a, b = list_to_array(M, a, b) + nx = get_backend(M, a, b) + + dim_a, dim_b = M.shape + + if len(a) == 0: + a = nx.ones(dim_a, type_as=M) / dim_a + if len(b) == 0: + b = nx.ones(dim_b, type_as=M) / dim_b + + if G0 is None: + G = a[:, None] * b[None, :] + else: + G = G0 + + if log: + log = {'err': [], 'G': []} + + if div == 'kl': + K = nx.exp(M / - reg_m / 2) + elif div == 'l2': + K = nx.maximum(a[:, None] + b[None, :] - M / reg_m / 2, + nx.zeros((dim_a, dim_b), type_as=M)) + else: + warnings.warn("The div parameter should be either equal to 'kl' or \ + 'l2': it has been set to 'kl'.") + div = 'kl' + K = nx.exp(M / - reg_m / 2) + + for i in range(numItermax): + Gprev = G + + if div == 'kl': + u = nx.sqrt(a / (nx.sum(G, 1) + 1e-16)) + v = nx.sqrt(b / (nx.sum(G, 0) + 1e-16)) + G = G * K * u[:, None] * v[None, :] + elif div == 'l2': + Gd = nx.sum(G, 0, keepdims=True) + nx.sum(G, 1, keepdims=True) + 1e-16 + G = G * K / Gd + + err = nx.sqrt(nx.sum((G - Gprev) ** 2)) + if log: + log['err'].append(err) + log['G'].append(G) + if verbose: + print('{:5d}|{:8e}|'.format(i, err)) + if err < stopThr: + break + + if log: + log['cost'] = nx.sum(G * M) + return G, log + else: + return G + + +def mm_unbalanced2(a, b, M, reg_m, div='kl', G0=None, numItermax=1000, + stopThr=1e-15, verbose=False, log=False): + r""" + Solve the unbalanced optimal transport problem and return the OT plan. + The function solves the following optimization problem: + + .. math:: + W = \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F + + \mathrm{reg_m} \cdot \mathrm{div}(\gamma \mathbf{1}, \mathbf{a}) + + \mathrm{reg_m} \cdot \mathrm{div}(\gamma^T \mathbf{1}, \mathbf{b}) + + s.t. + \gamma \geq 0 + + where: + + - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix + - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target + unbalanced distributions + - :math:`\mathrm{div}` is a divergence, either Kullback-Leibler or :math:`\ell_2` divergence + + The algorithm used for solving the problem is a maximization- + minimization algorithm as proposed in :ref:`[41] ` + + Parameters + ---------- + a : array-like (dim_a,) + Unnormalized histogram of dimension `dim_a` + b : array-like (dim_b,) + Unnormalized histogram of dimension `dim_b` + M : array-like (dim_a, dim_b) + loss matrix + reg_m: float + Marginal relaxation term > 0 + div: string, optional + Divergence to quantify the difference between the marginals. + Can take two values: 'kl' (Kullback-Leibler) or 'l2' (quadratic) + G0: array-like (dim_a, dim_b) + Initialization of the transport matrix + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshold on error (> 0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + Returns + ------- + ot_distance : array-like + the OT distance between :math:`\mathbf{a}` and :math:`\mathbf{b}` + log : dict + log dictionary returned only if `log` is `True` + + Examples + -------- + >>> import ot + >>> import numpy as np + >>> a=[.5, .5] + >>> b=[.5, .5] + >>> M=[[1., 36.],[9., 4.]] + >>> np.round(ot.unbalanced.mm_unbalanced2(a, b, M, 1, 'l2'),2) + 0.25 + >>> np.round(ot.unbalanced.mm_unbalanced2(a, b, M, 1, 'kl'),2) + 0.57 + + References + ---------- + .. [41] Chapel, L., Flamary, R., Wu, H., Févotte, C., and Gasso, G. (2021). + Unbalanced optimal transport through non-negative penalized + linear regression. NeurIPS. + See Also + -------- + ot.lp.emd2 : Unregularized OT loss + ot.unbalanced.sinkhorn_unbalanced2 : Entropic regularized OT loss + """ + _, log_mm = mm_unbalanced(a, b, M, reg_m, div=div, G0=G0, + numItermax=numItermax, stopThr=stopThr, + verbose=verbose, log=True) + + if log: + return log_mm['cost'], log_mm + else: + return log_mm['cost'] diff --git a/test/test_unbalanced.py b/test/test_unbalanced.py index db59504..02b3fc3 100644 --- a/test/test_unbalanced.py +++ b/test/test_unbalanced.py @@ -1,6 +1,7 @@ """Tests for module Unbalanced OT with entropy regularization""" # Author: Hicham Janati +# Laetitia Chapel # # License: MIT License @@ -286,3 +287,52 @@ def test_implemented_methods(nx): method=method) barycenter_unbalanced(A, M, reg=epsilon, reg_m=reg_m, method=method) + + +def test_mm_convergence(nx): + n = 100 + rng = np.random.RandomState(42) + x = rng.randn(n, 2) + rng = np.random.RandomState(75) + y = rng.randn(n, 2) + a = ot.utils.unif(n) + b = ot.utils.unif(n) + + M = ot.dist(x, y) + M = M / M.max() + reg_m = 100 + a, b, M = nx.from_numpy(a, b, M) + + G_kl, _ = ot.unbalanced.mm_unbalanced(a, b, M, reg_m=reg_m, div='kl', + verbose=True, log=True) + loss_kl = nx.to_numpy(ot.unbalanced.mm_unbalanced2( + a, b, M, reg_m, div='kl', verbose=True)) + G_l2, _ = ot.unbalanced.mm_unbalanced(a, b, M, reg_m=reg_m, div='l2', + verbose=False, log=True) + + # check if the marginals come close to the true ones when large reg + np.testing.assert_allclose(np.sum(nx.to_numpy(G_kl), 1), a, atol=1e-03) + np.testing.assert_allclose(np.sum(nx.to_numpy(G_kl), 0), b, atol=1e-03) + np.testing.assert_allclose(np.sum(nx.to_numpy(G_l2), 1), a, atol=1e-03) + np.testing.assert_allclose(np.sum(nx.to_numpy(G_l2), 0), b, atol=1e-03) + + # check if mm_unbalanced2 returns the correct loss + np.testing.assert_allclose(nx.to_numpy(nx.sum(G_kl * M)), loss_kl, + atol=1e-5) + + # check in case no histogram is provided + a_np, b_np = np.array([]), np.array([]) + a, b = nx.from_numpy(a_np, b_np) + + G_kl_null = ot.unbalanced.mm_unbalanced(a, b, M, reg_m=reg_m, div='kl') + G_l2_null = ot.unbalanced.mm_unbalanced(a, b, M, reg_m=reg_m, div='l2') + np.testing.assert_allclose(G_kl_null, G_kl) + np.testing.assert_allclose(G_l2_null, G_l2) + + # test when G0 is given + G0 = ot.emd(a, b, M) + reg_m = 10000 + G_kl = ot.unbalanced.mm_unbalanced(a, b, M, reg_m=reg_m, div='kl', G0=G0) + G_l2 = ot.unbalanced.mm_unbalanced(a, b, M, reg_m=reg_m, div='l2', G0=G0) + np.testing.assert_allclose(G0, G_kl, atol=1e-05) + np.testing.assert_allclose(G0, G_l2, atol=1e-05) -- cgit v1.2.3 From eccb1386eea52b94b82456d126bd20cbe3198e05 Mon Sep 17 00:00:00 2001 From: Rémi Flamary Date: Thu, 21 Apr 2022 16:34:01 +0200 Subject: [MRG] Release 8.2 (#365) * release text and number * add examples in release fil build wheels * switch gallery to release * add much needed contributors file * debug circleci * une line of logos * working logo * back to stable sphinx galery --- .circleci/config.yml | 5 ++- CONTRIBUTORS.md | 52 ++++++++++++++++++++++++++++ README.md | 35 ++++--------------- RELEASES.md | 57 ++++++++++++++++++++++++++----- docs/source/_static/images/logo_3ia.jpg | Bin 0 -> 25029 bytes docs/source/_static/images/logo_anr.jpg | Bin 0 -> 23493 bytes docs/source/_static/images/logo_cnrs.jpg | Bin 0 -> 6918 bytes docs/source/contributors.rst | 6 ++++ docs/source/index.rst | 1 + docs/source/releases.rst | 2 +- examples/others/plot_logo.py | 10 +++--- ot/__init__.py | 2 +- 12 files changed, 122 insertions(+), 48 deletions(-) create mode 100644 CONTRIBUTORS.md create mode 100644 docs/source/_static/images/logo_3ia.jpg create mode 100644 docs/source/_static/images/logo_anr.jpg create mode 100644 docs/source/_static/images/logo_cnrs.jpg create mode 100644 docs/source/contributors.rst (limited to 'docs/source') diff --git a/.circleci/config.yml b/.circleci/config.yml index 77ab45c..7e15a65 100644 --- a/.circleci/config.yml +++ b/.circleci/config.yml @@ -48,9 +48,8 @@ jobs: python -m pip install --user -e . python -m pip install --user --upgrade --no-cache-dir --progress-bar off -r requirements.txt python -m pip install --user --upgrade --progress-bar off -r docs/requirements.txt - python -m pip install --user --upgrade --progress-bar off ipython "https://api.github.com/repos/sphinx-gallery/sphinx-gallery/zipball/master" memory_profiler - - + python -m pip install --user --upgrade --progress-bar off ipython sphinx-gallery memory_profiler + # python -m pip install --user --upgrade --progress-bar off ipython "https://api.github.com/repos/sphinx-gallery/sphinx-gallery/zipball/master" memory_profiler - save_cache: key: pip-cache paths: diff --git a/CONTRIBUTORS.md b/CONTRIBUTORS.md new file mode 100644 index 0000000..ab64fba --- /dev/null +++ b/CONTRIBUTORS.md @@ -0,0 +1,52 @@ + + +## Creators and Maintainers + +This toolbox has been created and is maintained by: + +* [Rémi Flamary](http://remi.flamary.com/) +* [Nicolas Courty](http://people.irisa.fr/Nicolas.Courty/) + +## Contributors + +The contributors to this library are: + +* [Rémi Flamary](http://remi.flamary.com/) (EMD wrapper, Pytorch backend, DA + classes, conditional gradients, WDA, weak OT, linear OT mapping, documentation) +* [Nicolas Courty](http://people.irisa.fr/Nicolas.Courty/) (Original sinkhorn, + Wasserstein barycenters and convolutional barycenters, 1D wasserstein) +* [Alexandre Gramfort](http://alexandre.gramfort.net/) (CI, documentation) +* [Laetitia Chapel](http://people.irisa.fr/Laetitia.Chapel/) (Partial OT, + Unbalanced OT non-regularized) +* [Michael Perrot](http://perso.univ-st-etienne.fr/pem82055/) (Mapping estimation) +* [Léo Gautheron](https://github.com/aje) (Initial GPU implementation) +* [Nathalie Gayraud](https://www.linkedin.com/in/nathalie-t-h-gayraud/?ppe=1) (DA classes) +* [Stanislas Chambon](https://slasnista.github.io/) (DA classes) +* [Antoine Rolet](https://arolet.github.io/) (EMD solver debug) +* Erwan Vautier (Gromov-Wasserstein) +* [Kilian Fatras](https://kilianfatras.github.io/) (Stochastic solvers, + empirical sinkhorn) +* [Alain Rakotomamonjy](https://sites.google.com/site/alainrakotomamonjy/home) (Greenkhorn) +* [Vayer Titouan](https://tvayer.github.io/) (Gromov-Wasserstein, Fused-Gromov-Wasserstein) +* [Hicham Janati](https://hichamjanati.github.io/) (Unbalanced OT, Debiased barycenters) +* [Romain Tavenard](https://rtavenar.github.io/) (1D Wasserstein) +* [Mokhtar Z. Alaya](http://mzalaya.github.io/) (Screenkhorn) +* [Ievgen Redko](https://ievred.github.io/) (Laplacian DA, JCPOT) +* [Adrien Corenflos](https://adriencorenflos.github.io/) (Sliced Wasserstein Distance) +* [Tanguy Kerdoncuff](https://hv0nnus.github.io/) (Sampled Gromov Wasserstein) +* [Minhui Huang](https://mhhuang95.github.io) (Projection Robust Wasserstein Distance) +* [Nathan Cassereau](https://github.com/ncassereau-idris) (Backends) +* [Cédric Vincent-Cuaz](https://github.com/cedricvincentcuaz) (Graph Dictionary Learning) + +## Acknowledgments + +This toolbox benefit a lot from open source research and we would like to thank the following persons for providing some code (in various languages): + +* [Gabriel Peyré](http://gpeyre.github.io/) (Wasserstein Barycenters in Matlab) +* [Mathieu Blondel](https://mblondel.org/) (original implementation smooth OT) +* [Nicolas Bonneel](http://liris.cnrs.fr/~nbonneel/) (C++ code for EMD) +* [Marco Cuturi](http://marcocuturi.net/) (Sinkhorn Knopp in Matlab/Cuda) + +POT has benefited from the financing or manpower from the following partners: + +ANRCNRS3IA \ No newline at end of file diff --git a/README.md b/README.md index 1b50aeb..e2b33d9 100644 --- a/README.md +++ b/README.md @@ -180,35 +180,12 @@ This toolbox has been created and is maintained by * [Rémi Flamary](http://remi.flamary.com/) * [Nicolas Courty](http://people.irisa.fr/Nicolas.Courty/) -The contributors to this library are - -* [Alexandre Gramfort](http://alexandre.gramfort.net/) (CI, documentation) -* [Laetitia Chapel](http://people.irisa.fr/Laetitia.Chapel/) (Partial OT) -* [Michael Perrot](http://perso.univ-st-etienne.fr/pem82055/) (Mapping estimation) -* [Léo Gautheron](https://github.com/aje) (Initial GPU implementation) -* [Nathalie Gayraud](https://www.linkedin.com/in/nathalie-t-h-gayraud/?ppe=1) (DA classes) -* [Stanislas Chambon](https://slasnista.github.io/) (DA classes) -* [Antoine Rolet](https://arolet.github.io/) (EMD solver debug) -* Erwan Vautier (Gromov-Wasserstein) -* [Kilian Fatras](https://kilianfatras.github.io/) (Stochastic solvers) -* [Alain Rakotomamonjy](https://sites.google.com/site/alainrakotomamonjy/home) -* [Vayer Titouan](https://tvayer.github.io/) (Gromov-Wasserstein -, Fused-Gromov-Wasserstein) -* [Hicham Janati](https://hichamjanati.github.io/) (Unbalanced OT, Debiased barycenters) -* [Romain Tavenard](https://rtavenar.github.io/) (1d Wasserstein) -* [Mokhtar Z. Alaya](http://mzalaya.github.io/) (Screenkhorn) -* [Ievgen Redko](https://ievred.github.io/) (Laplacian DA, JCPOT) -* [Adrien Corenflos](https://adriencorenflos.github.io/) (Sliced Wasserstein Distance) -* [Tanguy Kerdoncuff](https://hv0nnus.github.io/) (Sampled Gromov Wasserstein) -* [Minhui Huang](https://mhhuang95.github.io) (Projection Robust Wasserstein Distance) -* [Nathan Cassereau](https://github.com/ncassereau-idris) (Backends) -* [Cédric Vincent-Cuaz](https://github.com/cedricvincentcuaz) (Graph Dictionary Learning) - -This toolbox benefit a lot from open source research and we would like to thank the following persons for providing some code (in various languages): - -* [Gabriel Peyré](http://gpeyre.github.io/) (Wasserstein Barycenters in Matlab) -* [Mathieu Blondel](https://mblondel.org/) (original implementation smooth OT) -* [Nicolas Bonneel](http://liris.cnrs.fr/~nbonneel/) (C++ code for EMD) -* [Marco Cuturi](http://marcocuturi.net/) (Sinkhorn Knopp in Matlab/Cuda) +The numerous contributors to this library are listed [here](CONTRIBUTORS.md). + +POT has benefited from the financing or manpower from the following partners: + +ANRCNRS3IA + ## Contributions and code of conduct diff --git a/RELEASES.md b/RELEASES.md index 33d1ab6..be2192e 100644 --- a/RELEASES.md +++ b/RELEASES.md @@ -1,22 +1,61 @@ # Releases -## 0.8.2dev Development +## 0.8.2 + +This releases introduces several new notable features. The less important +but most exiting one being that we now have a logo for the toolbox (color +and dark background) : + +![](https://pythonot.github.io/master/_images/logo.svg)![](https://pythonot.github.io/master/_static/logo_dark.svg) + +This logo is generated using with matplotlib and using the solution of an OT +problem provided by POT (with `ot.emd`). Generating the logo can be done with a +simple python script also provided in the [documentation gallery](https://pythonot.github.io/auto_examples/others/plot_logo.html#sphx-glr-auto-examples-others-plot-logo-py). + +New OT solvers include [Weak +OT](https://pythonot.github.io/gen_modules/ot.weak.html#ot.weak.weak_optimal_transport) + and [OT with factored +coupling](https://pythonot.github.io/gen_modules/ot.factored.html#ot.factored.factored_optimal_transport) +that can be used on large datasets. The [Majorization Minimization](https://pythonot.github.io/gen_modules/ot.unbalanced.html?highlight=mm_#ot.unbalanced.mm_unbalanced) solvers for +non-regularized Unbalanced OT are now also available. We also now provide an +implementation of [GW and FGW unmixing](https://pythonot.github.io/gen_modules/ot.gromov.html#ot.gromov.gromov_wasserstein_linear_unmixing) and [dictionary learning](https://pythonot.github.io/gen_modules/ot.gromov.html#ot.gromov.gromov_wasserstein_dictionary_learning). It is now +possible to use autodiff to solve entropic an quadratic regularized OT in the +dual for full or stochastic optimization thanks to the new functions to compute +the dual loss for [entropic](https://pythonot.github.io/gen_modules/ot.stochastic.html#ot.stochastic.loss_dual_entropic) and [quadratic](https://pythonot.github.io/gen_modules/ot.stochastic.html#ot.stochastic.loss_dual_quadratic) regularized OT and reconstruct the [OT +plan](https://pythonot.github.io/gen_modules/ot.stochastic.html#ot.stochastic.plan_dual_entropic) on part or all of the data. They can be used for instance to solve OT +problems with stochastic gradient or for estimating the [dual potentials as +neural networks](https://pythonot.github.io/auto_examples/backends/plot_stoch_continuous_ot_pytorch.html#sphx-glr-auto-examples-backends-plot-stoch-continuous-ot-pytorch-py). + +On the backend front, we now have backend compatible functions and classes in +the domain adaptation [`ot.da`](https://pythonot.github.io/gen_modules/ot.da.html#module-ot.da) and unbalanced OT [`ot.unbalanced`](https://pythonot.github.io/gen_modules/ot.unbalanced.html) modules. This +means that the DA classes can be used on tensors from all compatible backends. +The [free support Wasserstein barycenter](https://pythonot.github.io/gen_modules/ot.lp.html?highlight=free%20support#ot.lp.free_support_barycenter) solver is now also backend compatible. + +Finally we have worked on the documentation to provide an update of existing +examples in the gallery and and several new examples including [GW dictionary +learning](https://pythonot.github.io/auto_examples/gromov/plot_gromov_wasserstein_dictionary_learning.html#sphx-glr-auto-examples-gromov-plot-gromov-wasserstein-dictionary-learning-py) +[weak Optimal +Transport](https://pythonot.github.io/auto_examples/others/plot_WeakOT_VS_OT.html#sphx-glr-auto-examples-others-plot-weakot-vs-ot-py), +[NN based dual potentials +estimation](https://pythonot.github.io/auto_examples/backends/plot_stoch_continuous_ot_pytorch.html#sphx-glr-auto-examples-backends-plot-stoch-continuous-ot-pytorch-py) +and [Factored coupling OT](https://pythonot.github.io/auto_examples/others/plot_factored_coupling.html#sphx-glr-auto-examples-others-plot-factored-coupling-py). +. #### New features - Remove deprecated `ot.gpu` submodule (PR #361) -- Update examples in the gallery (PR #359). +- Update examples in the gallery (PR #359) - Add stochastic loss and OT plan computation for regularized OT and - backend examples(PR #360). -- Implementation of factored OT with emd and sinkhorn (PR #358). + backend examples(PR #360) +- Implementation of factored OT with emd and sinkhorn (PR #358) - A brand new logo for POT (PR #357) -- Better list of related examples in quick start guide with `minigallery` (PR #334). +- Better list of related examples in quick start guide with `minigallery` (PR #334) - Add optional log-domain Sinkhorn implementation in WDA to support smaller values - of the regularization parameter (PR #336). -- Backend implementation for `ot.lp.free_support_barycenter` (PR #340). -- Add weak OT solver + example (PR #341). -- Add backend support for Domain Adaptation and Unbalanced solvers (PR #343). + of the regularization parameter (PR #336) +- Backend implementation for `ot.lp.free_support_barycenter` (PR #340) +- Add weak OT solver + example (PR #341) +- Add backend support for Domain Adaptation and Unbalanced solvers (PR #343) - Add (F)GW linear dictionary learning solvers + example (PR #319) - Add links to related PR and Issues in the doc release page (PR #350) - Add new minimization-maximization algorithms for solving exact Unbalanced OT + example (PR #362) diff --git a/docs/source/_static/images/logo_3ia.jpg b/docs/source/_static/images/logo_3ia.jpg new file mode 100644 index 0000000..ecc56b2 Binary files /dev/null and b/docs/source/_static/images/logo_3ia.jpg differ diff --git a/docs/source/_static/images/logo_anr.jpg b/docs/source/_static/images/logo_anr.jpg new file mode 100644 index 0000000..dcef212 Binary files /dev/null and b/docs/source/_static/images/logo_anr.jpg differ diff --git a/docs/source/_static/images/logo_cnrs.jpg b/docs/source/_static/images/logo_cnrs.jpg new file mode 100644 index 0000000..902cf6f Binary files /dev/null and b/docs/source/_static/images/logo_cnrs.jpg differ diff --git a/docs/source/contributors.rst b/docs/source/contributors.rst new file mode 100644 index 0000000..f0acea6 --- /dev/null +++ b/docs/source/contributors.rst @@ -0,0 +1,6 @@ +Contributors +============ + +.. include:: ../../CONTRIBUTORS.md + :parser: myst_parser.sphinx_ + :start-line: 2 diff --git a/docs/source/index.rst b/docs/source/index.rst index 7ff7d22..3d53ef4 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -22,6 +22,7 @@ Contents auto_examples/index releases .github/CONTRIBUTING + contributors .github/CODE_OF_CONDUCT diff --git a/docs/source/releases.rst b/docs/source/releases.rst index 8250a4d..b2c7a44 100644 --- a/docs/source/releases.rst +++ b/docs/source/releases.rst @@ -3,4 +3,4 @@ Releases .. include:: ../../RELEASES.md :parser: myst_parser.sphinx_ - :start-line: 3 + :start-line: 2 diff --git a/examples/others/plot_logo.py b/examples/others/plot_logo.py index 9414371..bb4f640 100644 --- a/examples/others/plot_logo.py +++ b/examples/others/plot_logo.py @@ -18,7 +18,7 @@ matplotlib and ploting teh solution of the EMD solver from POT. # sphinx_gallery_thumbnail_number = 1 -# %% +# %% Load modules import numpy as np import matplotlib.pyplot as pl import ot @@ -36,21 +36,21 @@ p2 = np.array([[1.5, 6], [2, 4], [2, 5], [1.5, 3], [0.5, 2], [.5, 1], ]) o1 = np.array([[0, 6.], [-1, 5], [-1.5, 4], [-1.5, 3], [-1, 2], [0, 1], ]) o2 = np.array([[1, 6.], [2, 5], [2.5, 4], [2.5, 3], [2, 2], [1, 1], ]) -# scaling and translation for letter O +# Scaling and translation for letter O o1[:, 0] += 6.4 o2[:, 0] += 6.4 o1[:, 0] *= 0.6 o2[:, 0] *= 0.6 -# letter T +# Letter T t1 = np.array([[-1, 6.], [-1, 5], [0, 4], [0, 3], [0, 2], [0, 1], ]) t2 = np.array([[1.5, 6.], [1.5, 5], [0.5, 4], [0.5, 3], [0.5, 2], [0.5, 1], ]) -# translatin the T +# Translating the T t1[:, 0] += 7.1 t2[:, 0] += 7.1 -# Cocatenate all letters +# Concatenate all letters x1 = np.concatenate((p1, o1, t1), axis=0) x2 = np.concatenate((p2, o2, t2), axis=0) diff --git a/ot/__init__.py b/ot/__init__.py index c5e1967..86ed94e 100644 --- a/ot/__init__.py +++ b/ot/__init__.py @@ -51,7 +51,7 @@ from .factored import factored_optimal_transport # utils functions from .utils import dist, unif, tic, toc, toq -__version__ = "0.8.2dev" +__version__ = "0.8.2" __all__ = ['emd', 'emd2', 'emd_1d', 'sinkhorn', 'sinkhorn2', 'utils', 'datasets', 'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov', -- cgit v1.2.3