From e885d78cc9608d791a9d1561d2f4e0b783ba0761 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Tue, 28 Aug 2018 17:24:07 -0700 Subject: debug sgd dual --- test/test_bregman.py | 10 +++++----- test/test_da.py | 52 ++++++++++++++++++++++++++-------------------------- test/test_dr.py | 4 ++-- test/test_gromov.py | 16 ++++++++-------- test/test_optim.py | 8 ++++---- test/test_ot.py | 2 +- test/test_plot.py | 8 ++++---- 7 files changed, 50 insertions(+), 50 deletions(-) (limited to 'test') diff --git a/test/test_bregman.py b/test/test_bregman.py index 4a800fd..c8e9179 100644 --- a/test/test_bregman.py +++ b/test/test_bregman.py @@ -83,8 +83,8 @@ def test_bary(): n_bins = 100 # nb bins # Gaussian distributions - a1 = ot.datasets.get_1D_gauss(n_bins, m=30, s=10) # m= mean, s= std - a2 = ot.datasets.get_1D_gauss(n_bins, m=40, s=10) + a1 = ot.datasets.make_1D_gauss(n_bins, m=30, s=10) # m= mean, s= std + a2 = ot.datasets.make_1D_gauss(n_bins, m=40, s=10) # creating matrix A containing all distributions A = np.vstack((a1, a2)).T @@ -110,10 +110,10 @@ def test_unmix(): n_bins = 50 # nb bins # Gaussian distributions - a1 = ot.datasets.get_1D_gauss(n_bins, m=20, s=10) # m= mean, s= std - a2 = ot.datasets.get_1D_gauss(n_bins, m=40, s=10) + a1 = ot.datasets.make_1D_gauss(n_bins, m=20, s=10) # m= mean, s= std + a2 = ot.datasets.make_1D_gauss(n_bins, m=40, s=10) - a = ot.datasets.get_1D_gauss(n_bins, m=30, s=10) + a = ot.datasets.make_1D_gauss(n_bins, m=30, s=10) # creating matrix A containing all distributions D = np.vstack((a1, a2)).T diff --git a/test/test_da.py b/test/test_da.py index 3022721..97e23da 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -8,7 +8,7 @@ import numpy as np from numpy.testing.utils import assert_allclose, assert_equal import ot -from ot.datasets import get_data_classif +from ot.datasets import make_data_classif from ot.utils import unif @@ -19,8 +19,8 @@ def test_sinkhorn_lpl1_transport_class(): ns = 150 nt = 200 - Xs, ys = get_data_classif('3gauss', ns) - Xt, yt = get_data_classif('3gauss2', nt) + Xs, ys = make_data_classif('3gauss', ns) + Xt, yt = make_data_classif('3gauss2', nt) otda = ot.da.SinkhornLpl1Transport() @@ -45,7 +45,7 @@ def test_sinkhorn_lpl1_transport_class(): transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) - Xs_new, _ = get_data_classif('3gauss', ns + 1) + Xs_new, _ = make_data_classif('3gauss', ns + 1) transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working @@ -55,7 +55,7 @@ def test_sinkhorn_lpl1_transport_class(): transp_Xt = otda.inverse_transform(Xt=Xt) assert_equal(transp_Xt.shape, Xt.shape) - Xt_new, _ = get_data_classif('3gauss2', nt + 1) + Xt_new, _ = make_data_classif('3gauss2', nt + 1) transp_Xt_new = otda.inverse_transform(Xt=Xt_new) # check that the oos method is working @@ -92,8 +92,8 @@ def test_sinkhorn_l1l2_transport_class(): ns = 150 nt = 200 - Xs, ys = get_data_classif('3gauss', ns) - Xt, yt = get_data_classif('3gauss2', nt) + Xs, ys = make_data_classif('3gauss', ns) + Xt, yt = make_data_classif('3gauss2', nt) otda = ot.da.SinkhornL1l2Transport() @@ -119,7 +119,7 @@ def test_sinkhorn_l1l2_transport_class(): transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) - Xs_new, _ = get_data_classif('3gauss', ns + 1) + Xs_new, _ = make_data_classif('3gauss', ns + 1) transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working @@ -129,7 +129,7 @@ def test_sinkhorn_l1l2_transport_class(): transp_Xt = otda.inverse_transform(Xt=Xt) assert_equal(transp_Xt.shape, Xt.shape) - Xt_new, _ = get_data_classif('3gauss2', nt + 1) + Xt_new, _ = make_data_classif('3gauss2', nt + 1) transp_Xt_new = otda.inverse_transform(Xt=Xt_new) # check that the oos method is working @@ -173,8 +173,8 @@ def test_sinkhorn_transport_class(): ns = 150 nt = 200 - Xs, ys = get_data_classif('3gauss', ns) - Xt, yt = get_data_classif('3gauss2', nt) + Xs, ys = make_data_classif('3gauss', ns) + Xt, yt = make_data_classif('3gauss2', nt) otda = ot.da.SinkhornTransport() @@ -200,7 +200,7 @@ def test_sinkhorn_transport_class(): transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) - Xs_new, _ = get_data_classif('3gauss', ns + 1) + Xs_new, _ = make_data_classif('3gauss', ns + 1) transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working @@ -210,7 +210,7 @@ def test_sinkhorn_transport_class(): transp_Xt = otda.inverse_transform(Xt=Xt) assert_equal(transp_Xt.shape, Xt.shape) - Xt_new, _ = get_data_classif('3gauss2', nt + 1) + Xt_new, _ = make_data_classif('3gauss2', nt + 1) transp_Xt_new = otda.inverse_transform(Xt=Xt_new) # check that the oos method is working @@ -252,8 +252,8 @@ def test_emd_transport_class(): ns = 150 nt = 200 - Xs, ys = get_data_classif('3gauss', ns) - Xt, yt = get_data_classif('3gauss2', nt) + Xs, ys = make_data_classif('3gauss', ns) + Xt, yt = make_data_classif('3gauss2', nt) otda = ot.da.EMDTransport() @@ -278,7 +278,7 @@ def test_emd_transport_class(): transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) - Xs_new, _ = get_data_classif('3gauss', ns + 1) + Xs_new, _ = make_data_classif('3gauss', ns + 1) transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working @@ -288,7 +288,7 @@ def test_emd_transport_class(): transp_Xt = otda.inverse_transform(Xt=Xt) assert_equal(transp_Xt.shape, Xt.shape) - Xt_new, _ = get_data_classif('3gauss2', nt + 1) + Xt_new, _ = make_data_classif('3gauss2', nt + 1) transp_Xt_new = otda.inverse_transform(Xt=Xt_new) # check that the oos method is working @@ -329,9 +329,9 @@ def test_mapping_transport_class(): ns = 60 nt = 120 - Xs, ys = get_data_classif('3gauss', ns) - Xt, yt = get_data_classif('3gauss2', nt) - Xs_new, _ = get_data_classif('3gauss', ns + 1) + Xs, ys = make_data_classif('3gauss', ns) + Xt, yt = make_data_classif('3gauss2', nt) + Xs_new, _ = make_data_classif('3gauss', ns + 1) ########################################################################## # kernel == linear mapping tests @@ -449,8 +449,8 @@ def test_linear_mapping(): ns = 150 nt = 200 - Xs, ys = get_data_classif('3gauss', ns) - Xt, yt = get_data_classif('3gauss2', nt) + Xs, ys = make_data_classif('3gauss', ns) + Xt, yt = make_data_classif('3gauss2', nt) A, b = ot.da.OT_mapping_linear(Xs, Xt) @@ -467,8 +467,8 @@ def test_linear_mapping_class(): ns = 150 nt = 200 - Xs, ys = get_data_classif('3gauss', ns) - Xt, yt = get_data_classif('3gauss2', nt) + Xs, ys = make_data_classif('3gauss', ns) + Xt, yt = make_data_classif('3gauss2', nt) otmap = ot.da.LinearTransport() @@ -491,8 +491,8 @@ def test_otda(): n_samples = 150 # nb samples np.random.seed(0) - xs, ys = ot.datasets.get_data_classif('3gauss', n_samples) - xt, yt = ot.datasets.get_data_classif('3gauss2', n_samples) + xs, ys = ot.datasets.make_data_classif('3gauss', n_samples) + xt, yt = ot.datasets.make_data_classif('3gauss2', n_samples) a, b = ot.unif(n_samples), ot.unif(n_samples) diff --git a/test/test_dr.py b/test/test_dr.py index 915012d..c5df287 100644 --- a/test/test_dr.py +++ b/test/test_dr.py @@ -22,7 +22,7 @@ def test_fda(): np.random.seed(0) # generate gaussian dataset - xs, ys = ot.datasets.get_data_classif('gaussrot', n_samples) + xs, ys = ot.datasets.make_data_classif('gaussrot', n_samples) n_features_noise = 8 @@ -44,7 +44,7 @@ def test_wda(): np.random.seed(0) # generate gaussian dataset - xs, ys = ot.datasets.get_data_classif('gaussrot', n_samples) + xs, ys = ot.datasets.make_data_classif('gaussrot', n_samples) n_features_noise = 8 diff --git a/test/test_gromov.py b/test/test_gromov.py index bb23469..fb86274 100644 --- a/test/test_gromov.py +++ b/test/test_gromov.py @@ -15,7 +15,7 @@ def test_gromov(): mu_s = np.array([0, 0]) cov_s = np.array([[1, 0], [0, 1]]) - xs = ot.datasets.get_2D_samples_gauss(n_samples, mu_s, cov_s) + xs = ot.datasets.make_2D_samples_gauss(n_samples, mu_s, cov_s) xt = xs[::-1].copy() @@ -55,7 +55,7 @@ def test_entropic_gromov(): mu_s = np.array([0, 0]) cov_s = np.array([[1, 0], [0, 1]]) - xs = ot.datasets.get_2D_samples_gauss(n_samples, mu_s, cov_s) + xs = ot.datasets.make_2D_samples_gauss(n_samples, mu_s, cov_s) xt = xs[::-1].copy() @@ -96,8 +96,8 @@ def test_gromov_barycenter(): ns = 50 nt = 60 - Xs, ys = ot.datasets.get_data_classif('3gauss', ns) - Xt, yt = ot.datasets.get_data_classif('3gauss2', nt) + Xs, ys = ot.datasets.make_data_classif('3gauss', ns) + Xt, yt = ot.datasets.make_data_classif('3gauss2', nt) C1 = ot.dist(Xs) C2 = ot.dist(Xt) @@ -123,8 +123,8 @@ def test_gromov_entropic_barycenter(): ns = 50 nt = 60 - Xs, ys = ot.datasets.get_data_classif('3gauss', ns) - Xt, yt = ot.datasets.get_data_classif('3gauss2', nt) + Xs, ys = ot.datasets.make_data_classif('3gauss', ns) + Xt, yt = ot.datasets.make_data_classif('3gauss2', nt) C1 = ot.dist(Xs) C2 = ot.dist(Xt) @@ -133,13 +133,13 @@ def test_gromov_entropic_barycenter(): Cb = ot.gromov.entropic_gromov_barycenters(n_samples, [C1, C2], [ot.unif(ns), ot.unif(nt) ], ot.unif(n_samples), [.5, .5], - 'square_loss', 1e-3, + 'square_loss', 2e-3, max_iter=100, tol=1e-3) np.testing.assert_allclose(Cb.shape, (n_samples, n_samples)) Cb2 = ot.gromov.entropic_gromov_barycenters(n_samples, [C1, C2], [ot.unif(ns), ot.unif(nt) ], ot.unif(n_samples), [.5, .5], - 'kl_loss', 1e-3, + 'kl_loss', 2e-3, max_iter=100, tol=1e-3) np.testing.assert_allclose(Cb2.shape, (n_samples, n_samples)) diff --git a/test/test_optim.py b/test/test_optim.py index 69496a5..dfefe59 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -16,8 +16,8 @@ def test_conditional_gradient(): x = np.arange(n_bins, dtype=np.float64) # Gaussian distributions - a = ot.datasets.get_1D_gauss(n_bins, m=20, s=5) # m= mean, s= std - b = ot.datasets.get_1D_gauss(n_bins, m=60, s=10) + a = ot.datasets.make_1D_gauss(n_bins, m=20, s=5) # m= mean, s= std + b = ot.datasets.make_1D_gauss(n_bins, m=60, s=10) # loss matrix M = ot.dist(x.reshape((n_bins, 1)), x.reshape((n_bins, 1))) @@ -45,8 +45,8 @@ def test_generalized_conditional_gradient(): x = np.arange(n_bins, dtype=np.float64) # Gaussian distributions - a = ot.datasets.get_1D_gauss(n_bins, m=20, s=5) # m= mean, s= std - b = ot.datasets.get_1D_gauss(n_bins, m=60, s=10) + a = ot.datasets.make_1D_gauss(n_bins, m=20, s=5) # m= mean, s= std + b = ot.datasets.make_1D_gauss(n_bins, m=60, s=10) # loss matrix M = ot.dist(x.reshape((n_bins, 1)), x.reshape((n_bins, 1))) diff --git a/test/test_ot.py b/test/test_ot.py index cc25bf4..399e549 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -9,7 +9,7 @@ import warnings import numpy as np import ot -from ot.datasets import get_1D_gauss as gauss +from ot.datasets import make_1D_gauss as gauss import pytest diff --git a/test/test_plot.py b/test/test_plot.py index a50ed14..f77d879 100644 --- a/test/test_plot.py +++ b/test/test_plot.py @@ -20,8 +20,8 @@ def test_plot1D_mat(): x = np.arange(n_bins, dtype=np.float64) # Gaussian distributions - a = ot.datasets.get_1D_gauss(n_bins, m=20, s=5) # m= mean, s= std - b = ot.datasets.get_1D_gauss(n_bins, m=60, s=10) + a = ot.datasets.make_1D_gauss(n_bins, m=20, s=5) # m= mean, s= std + b = ot.datasets.make_1D_gauss(n_bins, m=60, s=10) # loss matrix M = ot.dist(x.reshape((n_bins, 1)), x.reshape((n_bins, 1))) @@ -43,8 +43,8 @@ def test_plot2D_samples_mat(): mu_t = np.array([4, 4]) cov_t = np.array([[1, -.8], [-.8, 1]]) - xs = ot.datasets.get_2D_samples_gauss(n_bins, mu_s, cov_s) - xt = ot.datasets.get_2D_samples_gauss(n_bins, mu_t, cov_t) + xs = ot.datasets.make_2D_samples_gauss(n_bins, mu_s, cov_s) + xt = ot.datasets.make_2D_samples_gauss(n_bins, mu_t, cov_t) G = 1.0 * (np.random.rand(n_bins, n_bins) < 0.01) -- cgit v1.2.3 From b13feb07eaff4d971b749663652e5f8811c1992c Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Tue, 28 Aug 2018 18:18:37 -0700 Subject: added gaussian test --- README.md | 7 +++++-- test/test_stochastic.py | 47 ++++++++++++++++++++++++++++++++++++++--------- 2 files changed, 43 insertions(+), 11 deletions(-) (limited to 'test') diff --git a/README.md b/README.md index 677a23b..1d3b097 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ It provides the following solvers: * Wasserstein Discriminant Analysis [11] (requires autograd + pymanopt). * Gromov-Wasserstein distances and barycenters ([13] and regularized [12]) * Stochastic Optimization for Large-scale Optimal Transport (semi-dual problem [18] and dual problem [19]) +* Non regularized free support Wasserstein barycenters [20]. Some demonstrations (both in Python and Jupyter Notebook format) are available in the examples folder. @@ -163,7 +164,7 @@ The contributors to this library are: * [Stanislas Chambon](https://slasnista.github.io/) * [Antoine Rolet](https://arolet.github.io/) * Erwan Vautier (Gromov-Wasserstein) -* [Kilian Fatras](https://kilianfatras.github.io/) (Stochastic optimization) +* [Kilian Fatras](https://kilianfatras.github.io/) This toolbox benefit a lot from open source research and we would like to thank the following persons for providing some code (in various languages): @@ -222,6 +223,8 @@ You can also post bug reports and feature requests in Github issues. Make sure t [17] Blondel, M., Seguy, V., & Rolet, A. (2018). [Smooth and Sparse Optimal Transport](https://arxiv.org/abs/1710.06276). Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS). -[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) [Stochastic Optimization for Large-scale Optimal Transport](arXiv preprint arxiv:1605.08527). Advances in Neural Information Processing Systems (2016). +[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) [Stochastic Optimization for Large-scale Optimal Transport](https://arxiv.org/abs/1605.08527). Advances in Neural Information Processing Systems (2016). [19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A.& Blondel, M. [Large-scale Optimal Transport and Mapping Estimation](https://arxiv.org/pdf/1711.02283.pdf). International Conference on Learning Representation (2018) + +[20] Cuturi, M. and Doucet, A. (2014) [Fast Computation of Wasserstein Barycenters](http://proceedings.mlr.press/v32/cuturi14.html). International Conference in Machine Learning diff --git a/test/test_stochastic.py b/test/test_stochastic.py index f315c88..88ad666 100644 --- a/test/test_stochastic.py +++ b/test/test_stochastic.py @@ -137,8 +137,8 @@ def test_stochastic_dual_sgd(): # test sgd n = 10 reg = 1 - numItermax = 300000 - batch_size = 8 + numItermax = 15000 + batch_size = 10 rng = np.random.RandomState(0) x = rng.randn(n, 2) @@ -151,9 +151,9 @@ def test_stochastic_dual_sgd(): # check constratints np.testing.assert_allclose( - u, G.sum(1), atol=1e-02) # cf convergence sgd + u, G.sum(1), atol=1e-04) # cf convergence sgd np.testing.assert_allclose( - u, G.sum(0), atol=1e-02) # cf convergence sgd + u, G.sum(0), atol=1e-04) # cf convergence sgd ############################################################################# @@ -168,10 +168,11 @@ def test_dual_sgd_sinkhorn(): # test all dual algorithms n = 10 reg = 1 - nb_iter = 300000 - batch_size = 8 + nb_iter = 150000 + batch_size = 10 rng = np.random.RandomState(0) +# Test uniform x = rng.randn(n, 2) u = ot.utils.unif(n) zero = np.zeros(n) @@ -184,8 +185,36 @@ def test_dual_sgd_sinkhorn(): # check constratints np.testing.assert_allclose( - zero, (G_sgd - G_sinkhorn).sum(1), atol=1e-02) # cf convergence sgd + zero, (G_sgd - G_sinkhorn).sum(1), atol=1e-04) # cf convergence sgd + np.testing.assert_allclose( + zero, (G_sgd - G_sinkhorn).sum(0), atol=1e-04) # cf convergence sgd + np.testing.assert_allclose( + G_sgd, G_sinkhorn, atol=1e-04) # cf convergence sgd + +# Test gaussian + n = 30 + n_source = n + n_target = n + reg = 1 + numItermax = 150000 + batch_size = 30 + + a = ot.datasets.get_1D_gauss(n_source, m=15, s=5) # m= mean, s= std + b = ot.datasets.get_1D_gauss(n_target, m=15, s=5) + X_source = np.arange(n_source,dtype=np.float64) + Y_target = np.arange(n_target,dtype=np.float64) + M = ot.dist(X_source.reshape((n_source, 1)), Y_target.reshape((n_target, 1))) + M /= M.max() + + G_sgd = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size, + numItermax=nb_iter) + + G_sinkhorn = ot.sinkhorn(a, b, M, reg) + + # check constratints + np.testing.assert_allclose( + zero, (G_sgd - G_sinkhorn).sum(1), atol=1e-04) # cf convergence sgd np.testing.assert_allclose( - zero, (G_sgd - G_sinkhorn).sum(0), atol=1e-02) # cf convergence sgd + zero, (G_sgd - G_sinkhorn).sum(0), atol=1e-04) # cf convergence sgd np.testing.assert_allclose( - G_sgd, G_sinkhorn, atol=1e-02) # cf convergence sgd + G_sgd, G_sinkhorn, atol=1e-04) # cf convergence sgd -- cgit v1.2.3 From cd193f78d392143ea9421da0f7e55ca8b75b8a0d Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Tue, 28 Aug 2018 18:25:40 -0700 Subject: updated README and fixed pep8 --- test/test_stochastic.py | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'test') diff --git a/test/test_stochastic.py b/test/test_stochastic.py index 88ad666..4bbe230 100644 --- a/test/test_stochastic.py +++ b/test/test_stochastic.py @@ -193,17 +193,14 @@ def test_dual_sgd_sinkhorn(): # Test gaussian n = 30 - n_source = n - n_target = n reg = 1 - numItermax = 150000 batch_size = 30 - a = ot.datasets.get_1D_gauss(n_source, m=15, s=5) # m= mean, s= std - b = ot.datasets.get_1D_gauss(n_target, m=15, s=5) - X_source = np.arange(n_source,dtype=np.float64) - Y_target = np.arange(n_target,dtype=np.float64) - M = ot.dist(X_source.reshape((n_source, 1)), Y_target.reshape((n_target, 1))) + a = ot.datasets.get_1D_gauss(n, m=15, s=5) # m= mean, s= std + b = ot.datasets.get_1D_gauss(n, m=15, s=5) + X_source = np.arange(n, dtype=np.float64) + Y_target = np.arange(n, dtype=np.float64) + M = ot.dist(X_source.reshape((n, 1)), Y_target.reshape((n, 1))) M /= M.max() G_sgd = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size, -- cgit v1.2.3 From 37e3b29595223399ebe4710ac2bb061004814118 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Tue, 28 Aug 2018 18:51:28 -0700 Subject: fixed argument functions --- ot/stochastic.py | 8 ++++---- test/test_stochastic.py | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'test') diff --git a/ot/stochastic.py b/ot/stochastic.py index 0788f61..e33f6a0 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -435,7 +435,7 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None, ############################################################################## -def batch_grad_dual(M, reg, a, b, alpha, beta, batch_size, batch_alpha, +def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha, batch_beta): ''' Computes the partial gradient of F_\W_varepsilon @@ -528,7 +528,7 @@ def batch_grad_dual(M, reg, a, b, alpha, beta, batch_size, batch_alpha, return grad_alpha, grad_beta -def sgd_entropic_regularization(M, reg, a, b, batch_size, numItermax, lr): +def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr): ''' Compute the sgd algorithm to solve the regularized discrete measures optimal transport dual problem @@ -612,7 +612,7 @@ def sgd_entropic_regularization(M, reg, a, b, batch_size, numItermax, lr): k = np.sqrt(cur_iter / 100 + 1) batch_alpha = np.random.choice(n_source, batch_size, replace=False) batch_beta = np.random.choice(n_target, batch_size, replace=False) - update_alpha, update_beta = batch_grad_dual(M, reg, a, b, cur_alpha, + update_alpha, update_beta = batch_grad_dual(a, b, M, reg, cur_alpha, cur_beta, batch_size, batch_alpha, batch_beta) cur_alpha += (lr / k) * update_alpha @@ -698,7 +698,7 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1, arXiv preprint arxiv:1711.02283. ''' - opt_alpha, opt_beta = sgd_entropic_regularization(M, reg, a, b, batch_size, + opt_alpha, opt_beta = sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr) pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) * a[:, None] * b[None, :]) diff --git a/test/test_stochastic.py b/test/test_stochastic.py index 4bbe230..f1d4825 100644 --- a/test/test_stochastic.py +++ b/test/test_stochastic.py @@ -196,8 +196,8 @@ def test_dual_sgd_sinkhorn(): reg = 1 batch_size = 30 - a = ot.datasets.get_1D_gauss(n, m=15, s=5) # m= mean, s= std - b = ot.datasets.get_1D_gauss(n, m=15, s=5) + a = ot.datasets.get_1D_gauss(n, 15, 5) # m= mean, s= std + b = ot.datasets.get_1D_gauss(n, 15, 5) X_source = np.arange(n, dtype=np.float64) Y_target = np.arange(n, dtype=np.float64) M = ot.dist(X_source.reshape((n, 1)), Y_target.reshape((n, 1))) -- cgit v1.2.3 From e5087799c1cf93a76579b88bda930cfcce36b084 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Tue, 28 Aug 2018 23:22:05 -0700 Subject: fixed test gauss --- test/test_stochastic.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'test') diff --git a/test/test_stochastic.py b/test/test_stochastic.py index f1d4825..10e62c1 100644 --- a/test/test_stochastic.py +++ b/test/test_stochastic.py @@ -196,8 +196,8 @@ def test_dual_sgd_sinkhorn(): reg = 1 batch_size = 30 - a = ot.datasets.get_1D_gauss(n, 15, 5) # m= mean, s= std - b = ot.datasets.get_1D_gauss(n, 15, 5) + a = ot.datasets.make_1D_gauss(n, 15, 5) # m= mean, s= std + b = ot.datasets.make_1D_gauss(n, 15, 5) X_source = np.arange(n, dtype=np.float64) Y_target = np.arange(n, dtype=np.float64) M = ot.dist(X_source.reshape((n, 1)), Y_target.reshape((n, 1))) -- cgit v1.2.3 From b2b5ffc529a7a3dbc51408cd2df59617a7e49a9a Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Tue, 28 Aug 2018 23:32:55 -0700 Subject: fixed test error --- test/test_stochastic.py | 1 + 1 file changed, 1 insertion(+) (limited to 'test') diff --git a/test/test_stochastic.py b/test/test_stochastic.py index 10e62c1..74228a3 100644 --- a/test/test_stochastic.py +++ b/test/test_stochastic.py @@ -195,6 +195,7 @@ def test_dual_sgd_sinkhorn(): n = 30 reg = 1 batch_size = 30 + zero = np.zeros(n) a = ot.datasets.make_1D_gauss(n, 15, 5) # m= mean, s= std b = ot.datasets.make_1D_gauss(n, 15, 5) -- cgit v1.2.3 From fd6371cc557ba73c4f5d1142fa8de8d956a850f0 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Wed, 29 Aug 2018 14:06:58 -0700 Subject: replaced marginal tests --- ot/lp/__init__.py | 2 -- ot/stochastic.py | 36 +++++++++++++++++++----------------- test/test_stochastic.py | 27 ++++++++++++--------------- 3 files changed, 31 insertions(+), 34 deletions(-) (limited to 'test') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 4c0d170..5dda82a 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -18,8 +18,6 @@ from .emd_wrap import emd_c, check_result from ..utils import parmap from .cvx import barycenter -__all__=['emd', 'emd2', 'barycenter', 'cvx'] - def emd(a, b, M, numItermax=100000, log=False): """Solves the Earth Movers distance problem and returns the OT matrix diff --git a/ot/stochastic.py b/ot/stochastic.py index e33f6a0..a369ba8 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -450,24 +450,29 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha, \forall j in batch_alpha, grad_beta_j = beta_j * batch_size/len(alpha) - - sum_{j in batch_alpha} exp((alpha_i + beta_j - M_{i,j})/reg) + sum_{i in batch_alpha} exp((alpha_i + beta_j - M_{i,j})/reg) * a_i * b_j where : - M is the (ns,nt) metric cost matrix - alpha, beta are dual variables in R^ixR^J - reg is the regularization term - - batch_alpha and batch_beta are list of index + - batch_alpha and batch_beta are lists of index + - a and b are source and target weights (sum to 1) + The algorithm used for solving the dual problem is the SGD algorithm as proposed in [19]_ [alg.1] Parameters ---------- - - reg : float number, - Regularization term > 0 + a : np.ndarray(ns,), + source measure + b : np.ndarray(nt,), + target measure M : np.ndarray(ns, nt), cost matrix + reg : float number, + Regularization term > 0 alpha : np.ndarray(ns,) dual variable beta : np.ndarray(nt,) @@ -516,8 +521,8 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha, ''' G = - (np.exp((alpha[batch_alpha, None] + beta[None, batch_beta] - - M[batch_alpha, :][:, batch_beta]) / reg) * a[batch_alpha, None] * - b[None, batch_beta]) + M[batch_alpha, :][:, batch_beta]) / reg) * + a[batch_alpha, None] * b[None, batch_beta]) grad_beta = np.zeros(np.shape(M)[1]) grad_alpha = np.zeros(np.shape(M)[0]) grad_beta[batch_beta] = (b[batch_beta] * len(batch_alpha) / np.shape(M)[0] + @@ -548,23 +553,20 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr): Parameters ---------- - + a : np.ndarray(ns,), + source measure + b : np.ndarray(nt,), + target measure M : np.ndarray(ns, nt), cost matrix reg : float number, Regularization term > 0 - alpha : np.ndarray(ns,) - dual variable - beta : np.ndarray(nt,) - dual variable batch_size : int number size of the batch numItermax : int number number of iteration lr : float number learning rate - alternate : bool, optional - alternating algorithm Returns ------- @@ -591,8 +593,8 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr): >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, - batch_size, - numItermax, lr, log) + batch_size, + numItermax, lr, log) >>> print(log['alpha'], log['beta']) >>> print(sgd_dual_pi) @@ -609,7 +611,7 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr): cur_alpha = np.zeros(n_source) cur_beta = np.zeros(n_target) for cur_iter in range(numItermax): - k = np.sqrt(cur_iter / 100 + 1) + k = np.sqrt(cur_iter + 1) batch_alpha = np.random.choice(n_source, batch_size, replace=False) batch_beta = np.random.choice(n_target, batch_size, replace=False) update_alpha, update_beta = batch_grad_dual(a, b, M, reg, cur_alpha, diff --git a/test/test_stochastic.py b/test/test_stochastic.py index 74228a3..0128317 100644 --- a/test/test_stochastic.py +++ b/test/test_stochastic.py @@ -97,7 +97,6 @@ def test_sag_asgd_sinkhorn(): x = rng.randn(n, 2) u = ot.utils.unif(n) - zero = np.zeros(n) M = ot.dist(x, x) G_asgd = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "asgd", @@ -108,13 +107,13 @@ def test_sag_asgd_sinkhorn(): # check constratints np.testing.assert_allclose( - zero, (G_sag - G_sinkhorn).sum(1), atol=1e-03) # cf convergence sag + G_sag.sum(1), G_sinkhorn.sum(1), atol=1e-03) np.testing.assert_allclose( - zero, (G_sag - G_sinkhorn).sum(0), atol=1e-03) # cf convergence sag + G_sag.sum(0), G_sinkhorn.sum(0), atol=1e-03) np.testing.assert_allclose( - zero, (G_asgd - G_sinkhorn).sum(1), atol=1e-03) # cf convergence asgd + G_asgd.sum(1), G_sinkhorn.sum(1), atol=1e-03) np.testing.assert_allclose( - zero, (G_asgd - G_sinkhorn).sum(0), atol=1e-03) # cf convergence asgd + G_asgd.sum(0), G_sinkhorn.sum(0), atol=1e-03) np.testing.assert_allclose( G_sag, G_sinkhorn, atol=1e-03) # cf convergence sag np.testing.assert_allclose( @@ -151,9 +150,9 @@ def test_stochastic_dual_sgd(): # check constratints np.testing.assert_allclose( - u, G.sum(1), atol=1e-04) # cf convergence sgd + u, G.sum(1), atol=1e-03) # cf convergence sgd np.testing.assert_allclose( - u, G.sum(0), atol=1e-04) # cf convergence sgd + u, G.sum(0), atol=1e-03) # cf convergence sgd ############################################################################# @@ -175,7 +174,6 @@ def test_dual_sgd_sinkhorn(): # Test uniform x = rng.randn(n, 2) u = ot.utils.unif(n) - zero = np.zeros(n) M = ot.dist(x, x) G_sgd = ot.stochastic.solve_dual_entropic(u, u, M, reg, batch_size, @@ -185,17 +183,16 @@ def test_dual_sgd_sinkhorn(): # check constratints np.testing.assert_allclose( - zero, (G_sgd - G_sinkhorn).sum(1), atol=1e-04) # cf convergence sgd + G_sgd.sum(1), G_sinkhorn.sum(1), atol=1e-03) np.testing.assert_allclose( - zero, (G_sgd - G_sinkhorn).sum(0), atol=1e-04) # cf convergence sgd + G_sgd.sum(0), G_sinkhorn.sum(0), atol=1e-03) np.testing.assert_allclose( - G_sgd, G_sinkhorn, atol=1e-04) # cf convergence sgd + G_sgd, G_sinkhorn, atol=1e-03) # cf convergence sgd # Test gaussian n = 30 reg = 1 batch_size = 30 - zero = np.zeros(n) a = ot.datasets.make_1D_gauss(n, 15, 5) # m= mean, s= std b = ot.datasets.make_1D_gauss(n, 15, 5) @@ -211,8 +208,8 @@ def test_dual_sgd_sinkhorn(): # check constratints np.testing.assert_allclose( - zero, (G_sgd - G_sinkhorn).sum(1), atol=1e-04) # cf convergence sgd + G_sgd.sum(1), G_sinkhorn.sum(1), atol=1e-03) np.testing.assert_allclose( - zero, (G_sgd - G_sinkhorn).sum(0), atol=1e-04) # cf convergence sgd + G_sgd.sum(0), G_sinkhorn.sum(0), atol=1e-03) np.testing.assert_allclose( - G_sgd, G_sinkhorn, atol=1e-04) # cf convergence sgd + G_sgd, G_sinkhorn, atol=1e-03) # cf convergence sgd -- cgit v1.2.3 From 63b34bf012076eb89ed112122fdaa65667464ae7 Mon Sep 17 00:00:00 2001 From: Kilian Fatras Date: Wed, 29 Aug 2018 14:21:33 -0700 Subject: fixed conflicts --- test/test_da.py | 63 --------------------------------------------------------- 1 file changed, 63 deletions(-) (limited to 'test') diff --git a/test/test_da.py b/test/test_da.py index 97e23da..f7f3a9d 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -484,66 +484,3 @@ def test_linear_mapping_class(): Cst = np.cov(Xst.T) np.testing.assert_allclose(Ct, Cst, rtol=1e-2, atol=1e-2) - - -def test_otda(): - - n_samples = 150 # nb samples - np.random.seed(0) - - xs, ys = ot.datasets.make_data_classif('3gauss', n_samples) - xt, yt = ot.datasets.make_data_classif('3gauss2', n_samples) - - a, b = ot.unif(n_samples), ot.unif(n_samples) - - # LP problem - da_emd = ot.da.OTDA() # init class - da_emd.fit(xs, xt) # fit distributions - da_emd.interp() # interpolation of source samples - da_emd.predict(xs) # interpolation of source samples - - np.testing.assert_allclose(a, np.sum(da_emd.G, 1)) - np.testing.assert_allclose(b, np.sum(da_emd.G, 0)) - - # sinkhorn regularization - lambd = 1e-1 - da_entrop = ot.da.OTDA_sinkhorn() - da_entrop.fit(xs, xt, reg=lambd) - da_entrop.interp() - da_entrop.predict(xs) - - np.testing.assert_allclose( - a, np.sum(da_entrop.G, 1), rtol=1e-3, atol=1e-3) - np.testing.assert_allclose(b, np.sum(da_entrop.G, 0), rtol=1e-3, atol=1e-3) - - # non-convex Group lasso regularization - reg = 1e-1 - eta = 1e0 - da_lpl1 = ot.da.OTDA_lpl1() - da_lpl1.fit(xs, ys, xt, reg=reg, eta=eta) - da_lpl1.interp() - da_lpl1.predict(xs) - - np.testing.assert_allclose(a, np.sum(da_lpl1.G, 1), rtol=1e-3, atol=1e-3) - np.testing.assert_allclose(b, np.sum(da_lpl1.G, 0), rtol=1e-3, atol=1e-3) - - # True Group lasso regularization - reg = 1e-1 - eta = 2e0 - da_l1l2 = ot.da.OTDA_l1l2() - da_l1l2.fit(xs, ys, xt, reg=reg, eta=eta, numItermax=20, verbose=True) - da_l1l2.interp() - da_l1l2.predict(xs) - - np.testing.assert_allclose(a, np.sum(da_l1l2.G, 1), rtol=1e-3, atol=1e-3) - np.testing.assert_allclose(b, np.sum(da_l1l2.G, 0), rtol=1e-3, atol=1e-3) - - # linear mapping - da_emd = ot.da.OTDA_mapping_linear() # init class - da_emd.fit(xs, xt, numItermax=10) # fit distributions - da_emd.predict(xs) # interpolation of source samples - - # nonlinear mapping - da_emd = ot.da.OTDA_mapping_kernel() # init class - da_emd.fit(xs, xt, numItermax=10) # fit distributions - da_emd.predict(xs) # interpolation of source samples -- cgit v1.2.3