diff options
-rw-r--r-- | README.md | 8 | ||||
-rw-r--r-- | ot/stochastic.py | 179 | ||||
-rw-r--r-- | test/test_stochastic.py | 54 |
3 files changed, 89 insertions, 152 deletions
@@ -17,7 +17,6 @@ It provides the following solvers: * Entropic regularization OT solver with Sinkhorn Knopp Algorithm [2] and stabilized version [9][10] with optional GPU implementation (requires cudamat). * Smooth optimal transport solvers (dual and semi-dual) for KL and squared L2 regularizations [17]. * Non regularized Wasserstein barycenters [16] with LP solver (only small scale). -* Non regularized free support Wasserstein barycenters [20]. * Bregman projections for Wasserstein barycenter [3] and unmixing [4]. * Optimal transport for domain adaptation with group lasso regularization [5] * Conditional gradient [6] and Generalized conditional gradient for regularized OT [7]. @@ -25,6 +24,7 @@ It provides the following solvers: * Wasserstein Discriminant Analysis [11] (requires autograd + pymanopt). * Gromov-Wasserstein distances and barycenters ([13] and regularized [12]) * Stochastic Optimization for Large-scale Optimal Transport (semi-dual problem [18] and dual problem [19]) +* Non regularized free support Wasserstein barycenters [20]. Some demonstrations (both in Python and Jupyter Notebook format) are available in the examples folder. @@ -164,7 +164,7 @@ The contributors to this library are: * [Stanislas Chambon](https://slasnista.github.io/) * [Antoine Rolet](https://arolet.github.io/) * Erwan Vautier (Gromov-Wasserstein) -* [Kilian Fatras](https://kilianfatras.github.io/) (Stochastic optimization) +* [Kilian Fatras](https://kilianfatras.github.io/) This toolbox benefit a lot from open source research and we would like to thank the following persons for providing some code (in various languages): @@ -223,8 +223,8 @@ You can also post bug reports and feature requests in Github issues. Make sure t [17] Blondel, M., Seguy, V., & Rolet, A. (2018). [Smooth and Sparse Optimal Transport](https://arxiv.org/abs/1710.06276). Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS). -[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) [Stochastic Optimization for Large-scale Optimal Transport](arXiv preprint arxiv:1605.08527). Advances in Neural Information Processing Systems (2016). +[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) [Stochastic Optimization for Large-scale Optimal Transport](https://arxiv.org/abs/1605.08527). Advances in Neural Information Processing Systems (2016). [19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A.& Blondel, M. [Large-scale Optimal Transport and Mapping Estimation](https://arxiv.org/pdf/1711.02283.pdf). International Conference on Learning Representation (2018) -[20] Cuturi, M. and Doucet, A. (2014) [Fast Computation of Wasserstein Barycenters](http://proceedings.mlr.press/v32/cuturi14.html). International Conference in Machine Learning
\ No newline at end of file +[20] Cuturi, M. and Doucet, A. (2014) [Fast Computation of Wasserstein Barycenters](http://proceedings.mlr.press/v32/cuturi14.html). International Conference in Machine Learning diff --git a/ot/stochastic.py b/ot/stochastic.py index 5e8206e..a369ba8 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -435,8 +435,8 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None, ############################################################################## -def batch_grad_dual_alpha(M, reg, alpha, beta, batch_size, batch_alpha, - batch_beta): +def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha, + batch_beta): ''' Computes the partial gradient of F_\W_varepsilon @@ -444,104 +444,31 @@ def batch_grad_dual_alpha(M, reg, alpha, beta, batch_size, batch_alpha, ..math: \forall i in batch_alpha, - grad_alpha_i = 1 * batch_size - - sum_{j in batch_beta} exp((alpha_i + beta_j - M_{i,j})/reg) + grad_alpha_i = alpha_i * batch_size/len(beta) - + sum_{j in batch_beta} exp((alpha_i + beta_j - M_{i,j})/reg) + * a_i * b_j - where : - - M is the (ns,nt) metric cost matrix - - alpha, beta are dual variables in R^ixR^J - - reg is the regularization term - - batch_alpha and batch_beta are list of index - - The algorithm used for solving the dual problem is the SGD algorithm - as proposed in [19]_ [alg.1] - - Parameters - ---------- - - reg : float number, - Regularization term > 0 - M : np.ndarray(ns, nt), - cost matrix - alpha : np.ndarray(ns,) - dual variable - beta : np.ndarray(nt,) - dual variable - batch_size : int number - size of the batch - batch_alpha : np.ndarray(bs,) - batch of index of alpha - batch_beta : np.ndarray(bs,) - batch of index of beta - - Returns - ------- - - grad : np.ndarray(ns,) - partial grad F in alpha - - Examples - -------- - - >>> n_source = 7 - >>> n_target = 4 - >>> reg = 1 - >>> numItermax = 20000 - >>> lr = 0.1 - >>> batch_size = 3 - >>> log = True - >>> a = ot.utils.unif(n_source) - >>> b = ot.utils.unif(n_target) - >>> rng = np.random.RandomState(0) - >>> X_source = rng.randn(n_source, 2) - >>> Y_target = rng.randn(n_target, 2) - >>> M = ot.dist(X_source, Y_target) - >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, - batch_size, - numItermax, lr, log) - >>> print(log['alpha'], log['beta']) - >>> print(sgd_dual_pi) - - References - ---------- - - [Seguy et al., 2018] : - International Conference on Learning Representation (2018), - arXiv preprint arxiv:1711.02283. - ''' - - grad_alpha = np.zeros(batch_size) - grad_alpha[:] = batch_size - for j in batch_beta: - grad_alpha -= np.exp((alpha[batch_alpha] + beta[j] - - M[batch_alpha, j]) / reg) - return grad_alpha - - -def batch_grad_dual_beta(M, reg, alpha, beta, batch_size, batch_alpha, - batch_beta): - ''' - Computes the partial gradient of F_\W_varepsilon - - Compute the partial gradient of the dual problem: - - ..math: - \forall j in batch_beta, - grad_beta_j = 1 * batch_size - + \forall j in batch_alpha, + grad_beta_j = beta_j * batch_size/len(alpha) - sum_{i in batch_alpha} exp((alpha_i + beta_j - M_{i,j})/reg) - + * a_i * b_j where : - M is the (ns,nt) metric cost matrix - alpha, beta are dual variables in R^ixR^J - reg is the regularization term - - batch_alpha and batch_beta are list of index + - batch_alpha and batch_beta are lists of index + - a and b are source and target weights (sum to 1) + The algorithm used for solving the dual problem is the SGD algorithm as proposed in [19]_ [alg.1] Parameters ---------- - + a : np.ndarray(ns,), + source measure + b : np.ndarray(nt,), + target measure M : np.ndarray(ns, nt), cost matrix reg : float number, @@ -561,7 +488,7 @@ def batch_grad_dual_beta(M, reg, alpha, beta, batch_size, batch_alpha, ------- grad : np.ndarray(ns,) - partial grad F in beta + partial grad F Examples -------- @@ -591,19 +518,22 @@ def batch_grad_dual_beta(M, reg, alpha, beta, batch_size, batch_alpha, [Seguy et al., 2018] : International Conference on Learning Representation (2018), arXiv preprint arxiv:1711.02283. - ''' - grad_beta = np.zeros(batch_size) - grad_beta[:] = batch_size - for i in batch_alpha: - grad_beta -= np.exp((alpha[i] + - beta[batch_beta] - M[i, batch_beta]) / reg) - return grad_beta + G = - (np.exp((alpha[batch_alpha, None] + beta[None, batch_beta] - + M[batch_alpha, :][:, batch_beta]) / reg) * + a[batch_alpha, None] * b[None, batch_beta]) + grad_beta = np.zeros(np.shape(M)[1]) + grad_alpha = np.zeros(np.shape(M)[0]) + grad_beta[batch_beta] = (b[batch_beta] * len(batch_alpha) / np.shape(M)[0] + + G.sum(0)) + grad_alpha[batch_alpha] = (a[batch_alpha] * len(batch_beta) / + np.shape(M)[1] + G.sum(1)) + + return grad_alpha, grad_beta -def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr, - alternate=True): +def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr): ''' Compute the sgd algorithm to solve the regularized discrete measures optimal transport dual problem @@ -623,7 +553,10 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr, Parameters ---------- - + a : np.ndarray(ns,), + source measure + b : np.ndarray(nt,), + target measure M : np.ndarray(ns, nt), cost matrix reg : float number, @@ -634,8 +567,6 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr, number of iteration lr : float number learning rate - alternate : bool, optional - alternating algorithm Returns ------- @@ -662,8 +593,8 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr, >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, - batch_size, - numItermax, lr, log) + batch_size, + numItermax, lr, log) >>> print(log['alpha'], log['beta']) >>> print(sgd_dual_pi) @@ -677,35 +608,17 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr, n_source = np.shape(M)[0] n_target = np.shape(M)[1] - cur_alpha = np.random.randn(n_source) - cur_beta = np.random.randn(n_target) - if alternate: - for cur_iter in range(numItermax): - k = np.sqrt(cur_iter + 1) - batch_alpha = np.random.choice(n_source, batch_size, replace=False) - batch_beta = np.random.choice(n_target, batch_size, replace=False) - grad_F_alpha = batch_grad_dual_alpha(M, reg, cur_alpha, cur_beta, - batch_size, batch_alpha, - batch_beta) - cur_alpha[batch_alpha] += (lr / k) * grad_F_alpha - grad_F_beta = batch_grad_dual_beta(M, reg, cur_alpha, cur_beta, - batch_size, batch_alpha, - batch_beta) - cur_beta[batch_beta] += (lr / k) * grad_F_beta - - else: - for cur_iter in range(numItermax): - k = np.sqrt(cur_iter + 1) - batch_alpha = np.random.choice(n_source, batch_size, replace=False) - batch_beta = np.random.choice(n_target, batch_size, replace=False) - grad_F_alpha = batch_grad_dual_alpha(M, reg, cur_alpha, cur_beta, - batch_size, batch_alpha, - batch_beta) - grad_F_beta = batch_grad_dual_beta(M, reg, cur_alpha, cur_beta, - batch_size, batch_alpha, - batch_beta) - cur_alpha[batch_alpha] += (lr / k) * grad_F_alpha - cur_beta[batch_beta] += (lr / k) * grad_F_beta + cur_alpha = np.zeros(n_source) + cur_beta = np.zeros(n_target) + for cur_iter in range(numItermax): + k = np.sqrt(cur_iter + 1) + batch_alpha = np.random.choice(n_source, batch_size, replace=False) + batch_beta = np.random.choice(n_target, batch_size, replace=False) + update_alpha, update_beta = batch_grad_dual(a, b, M, reg, cur_alpha, + cur_beta, batch_size, + batch_alpha, batch_beta) + cur_alpha += (lr / k) * update_alpha + cur_beta += (lr / k) * update_beta return cur_alpha, cur_beta @@ -787,7 +700,7 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1, arXiv preprint arxiv:1711.02283. ''' - opt_alpha, opt_beta = sgd_entropic_regularization(M, reg, batch_size, + opt_alpha, opt_beta = sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr) pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) * a[:, None] * b[None, :]) diff --git a/test/test_stochastic.py b/test/test_stochastic.py index f315c88..0128317 100644 --- a/test/test_stochastic.py +++ b/test/test_stochastic.py @@ -97,7 +97,6 @@ def test_sag_asgd_sinkhorn(): x = rng.randn(n, 2) u = ot.utils.unif(n) - zero = np.zeros(n) M = ot.dist(x, x) G_asgd = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "asgd", @@ -108,13 +107,13 @@ def test_sag_asgd_sinkhorn(): # check constratints np.testing.assert_allclose( - zero, (G_sag - G_sinkhorn).sum(1), atol=1e-03) # cf convergence sag + G_sag.sum(1), G_sinkhorn.sum(1), atol=1e-03) np.testing.assert_allclose( - zero, (G_sag - G_sinkhorn).sum(0), atol=1e-03) # cf convergence sag + G_sag.sum(0), G_sinkhorn.sum(0), atol=1e-03) np.testing.assert_allclose( - zero, (G_asgd - G_sinkhorn).sum(1), atol=1e-03) # cf convergence asgd + G_asgd.sum(1), G_sinkhorn.sum(1), atol=1e-03) np.testing.assert_allclose( - zero, (G_asgd - G_sinkhorn).sum(0), atol=1e-03) # cf convergence asgd + G_asgd.sum(0), G_sinkhorn.sum(0), atol=1e-03) np.testing.assert_allclose( G_sag, G_sinkhorn, atol=1e-03) # cf convergence sag np.testing.assert_allclose( @@ -137,8 +136,8 @@ def test_stochastic_dual_sgd(): # test sgd n = 10 reg = 1 - numItermax = 300000 - batch_size = 8 + numItermax = 15000 + batch_size = 10 rng = np.random.RandomState(0) x = rng.randn(n, 2) @@ -151,9 +150,9 @@ def test_stochastic_dual_sgd(): # check constratints np.testing.assert_allclose( - u, G.sum(1), atol=1e-02) # cf convergence sgd + u, G.sum(1), atol=1e-03) # cf convergence sgd np.testing.assert_allclose( - u, G.sum(0), atol=1e-02) # cf convergence sgd + u, G.sum(0), atol=1e-03) # cf convergence sgd ############################################################################# @@ -168,13 +167,13 @@ def test_dual_sgd_sinkhorn(): # test all dual algorithms n = 10 reg = 1 - nb_iter = 300000 - batch_size = 8 + nb_iter = 150000 + batch_size = 10 rng = np.random.RandomState(0) +# Test uniform x = rng.randn(n, 2) u = ot.utils.unif(n) - zero = np.zeros(n) M = ot.dist(x, x) G_sgd = ot.stochastic.solve_dual_entropic(u, u, M, reg, batch_size, @@ -184,8 +183,33 @@ def test_dual_sgd_sinkhorn(): # check constratints np.testing.assert_allclose( - zero, (G_sgd - G_sinkhorn).sum(1), atol=1e-02) # cf convergence sgd + G_sgd.sum(1), G_sinkhorn.sum(1), atol=1e-03) + np.testing.assert_allclose( + G_sgd.sum(0), G_sinkhorn.sum(0), atol=1e-03) + np.testing.assert_allclose( + G_sgd, G_sinkhorn, atol=1e-03) # cf convergence sgd + +# Test gaussian + n = 30 + reg = 1 + batch_size = 30 + + a = ot.datasets.make_1D_gauss(n, 15, 5) # m= mean, s= std + b = ot.datasets.make_1D_gauss(n, 15, 5) + X_source = np.arange(n, dtype=np.float64) + Y_target = np.arange(n, dtype=np.float64) + M = ot.dist(X_source.reshape((n, 1)), Y_target.reshape((n, 1))) + M /= M.max() + + G_sgd = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size, + numItermax=nb_iter) + + G_sinkhorn = ot.sinkhorn(a, b, M, reg) + + # check constratints + np.testing.assert_allclose( + G_sgd.sum(1), G_sinkhorn.sum(1), atol=1e-03) np.testing.assert_allclose( - zero, (G_sgd - G_sinkhorn).sum(0), atol=1e-02) # cf convergence sgd + G_sgd.sum(0), G_sinkhorn.sum(0), atol=1e-03) np.testing.assert_allclose( - G_sgd, G_sinkhorn, atol=1e-02) # cf convergence sgd + G_sgd, G_sinkhorn, atol=1e-03) # cf convergence sgd |