diff options
author | Kilian Fatras <kilianfatras@dhcp-206-12-53-210.eduroam.wireless.ubc.ca> | 2018-06-18 17:56:28 -0700 |
---|---|---|
committer | Kilian Fatras <kilianfatras@dhcp-206-12-53-210.eduroam.wireless.ubc.ca> | 2018-06-18 17:56:28 -0700 |
commit | 74cfe5ac77c3e964a85ef90c11d8ebffa16ddcfe (patch) | |
tree | cbff519da38279cc0e0cc4a4e8ab35d0169d16b5 | |
parent | 055417ee06917ff8bac5d07b2d2a17d80e5da4b6 (diff) |
add sgd
-rw-r--r-- | README.md | 8 | ||||
-rw-r--r-- | examples/plot_stochastic.py | 95 | ||||
-rw-r--r-- | ot/stochastic.py | 468 | ||||
-rw-r--r-- | test/test_stochastic.py | 104 |
4 files changed, 608 insertions, 67 deletions
@@ -22,6 +22,7 @@ It provides the following solvers: * Linear OT [14] and Joint OT matrix and mapping estimation [8]. * Wasserstein Discriminant Analysis [11] (requires autograd + pymanopt). * Gromov-Wasserstein distances and barycenters ([13] and regularized [12]) +* Stochastic Optimization for Large-scale Optimal Transport (semi-dual problem [18] and dual problem [19]) Some demonstrations (both in Python and Jupyter Notebook format) are available in the examples folder. @@ -149,6 +150,7 @@ The contributors to this library are: * [Stanislas Chambon](https://slasnista.github.io/) * [Antoine Rolet](https://arolet.github.io/) * Erwan Vautier (Gromov-Wasserstein) +* [Kilian Fatras](https://kilianfatras.github.io/) (Stochastic optimization) This toolbox benefit a lot from open source research and we would like to thank the following persons for providing some code (in various languages): @@ -213,3 +215,9 @@ You can also post bug reports and feature requests in Github issues. Make sure t [15] Peyré, G., & Cuturi, M. (2018). [Computational Optimal Transport](https://arxiv.org/pdf/1803.00567.pdf) . [16] Agueh, M., & Carlier, G. (2011). [Barycenters in the Wasserstein space](https://hal.archives-ouvertes.fr/hal-00637399/document). SIAM Journal on Mathematical Analysis, 43(2), 904-924. + +[17] Blondel, M., Seguy, V., & Rolet, A. (2018). [Smooth and Sparse Optimal Transport](https://arxiv.org/pdf/1710.06276.pdf). Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS). + +[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) [Stochastic Optimization for Large-scale Optimal Transport](arXiv preprint arxiv:1605.08527). Advances in Neural Information Processing Systems (2016). + +[19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A.& Blondel, M. [Large-scale Optimal Transport and Mapping Estimation](https://arxiv.org/pdf/1711.02283.pdf). International Conference on Learning Representation (2018) diff --git a/examples/plot_stochastic.py b/examples/plot_stochastic.py index 9071ddb..3fc1955 100644 --- a/examples/plot_stochastic.py +++ b/examples/plot_stochastic.py @@ -18,9 +18,9 @@ import ot ############################################################################# -# COMPUTE TRANSPORTATION MATRIX +# COMPUTE TRANSPORTATION MATRIX FOR SEMI-DUAL PROBLEM ############################################################################# - +print("------------SEMI-DUAL PROBLEM------------") ############################################################################# # DISCRETE CASE # Sample two discrete measures for the discrete case @@ -48,12 +48,12 @@ M = ot.dist(X_source, Y_target) # Call the "SAG" method to find the transportation matrix in the discrete case # --------------------------------------------- # -# Define the method "SAG", call ot.transportation_matrix_entropic and plot the +# Define the method "SAG", call ot.solve_semi_dual_entropic and plot the # results. method = "SAG" -sag_pi = ot.stochastic.transportation_matrix_entropic(a, b, M, reg, method, - numItermax, lr) +sag_pi = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method, + numItermax, lr) print(sag_pi) ############################################################################# @@ -68,8 +68,9 @@ print(sag_pi) n_source = 7 n_target = 4 reg = 1 -numItermax = 500000 +numItermax = 100000 lr = 1 +log = True a = ot.utils.unif(n_source) b = ot.utils.unif(n_target) @@ -85,12 +86,13 @@ M = ot.dist(X_source, Y_target) # case # --------------------------------------------- # -# Define the method "ASGD", call ot.transportation_matrix_entropic and plot the +# Define the method "ASGD", call ot.solve_semi_dual_entropic and plot the # results. method = "ASGD" -asgd_pi = ot.stochastic.transportation_matrix_entropic(a, b, M, reg, method, - numItermax, lr) +asgd_pi, log = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method, + numItermax, lr, log) +print(log['alpha'], log['beta']) print(asgd_pi) ############################################################################# @@ -100,7 +102,7 @@ print(asgd_pi) # # Call the Sinkhorn algorithm from POT -sinkhorn_pi = ot.sinkhorn(a, b, M, 1) +sinkhorn_pi = ot.sinkhorn(a, b, M, reg) print(sinkhorn_pi) @@ -113,7 +115,7 @@ print(sinkhorn_pi) # ---------------- pl.figure(4, figsize=(5, 5)) -ot.plot.plot1D_mat(a, b, sag_pi, 'OT matrix SAG') +ot.plot.plot1D_mat(a, b, sag_pi, 'semi-dual : OT matrix SAG') pl.show() @@ -122,7 +124,76 @@ pl.show() # ----------------- pl.figure(4, figsize=(5, 5)) -ot.plot.plot1D_mat(a, b, asgd_pi, 'OT matrix ASGD') +ot.plot.plot1D_mat(a, b, asgd_pi, 'semi-dual : OT matrix ASGD') +pl.show() + + +############################################################################## +# Plot Sinkhorn results +# --------------------- + +pl.figure(4, figsize=(5, 5)) +ot.plot.plot1D_mat(a, b, sinkhorn_pi, 'OT matrix Sinkhorn') +pl.show() + +############################################################################# +# COMPUTE TRANSPORTATION MATRIX FOR DUAL PROBLEM +############################################################################# +print("------------DUAL PROBLEM------------") +############################################################################# +# SEMICONTINOUS CASE +# Sample one general measure a, one discrete measures b for the semicontinous +# case +# --------------------------------------------- +# +# Define one general measure a, one discrete measures b, the points where +# are defined the source and the target measures and finally the cost matrix c. + +n_source = 7 +n_target = 4 +reg = 1 +numItermax = 100000 +lr = 0.1 +batch_size = 3 +log = True + +a = ot.utils.unif(n_source) +b = ot.utils.unif(n_target) + +rng = np.random.RandomState(0) +X_source = rng.randn(n_source, 2) +Y_target = rng.randn(n_target, 2) +M = ot.dist(X_source, Y_target) + +############################################################################# +# +# Call the "SGD" dual method to find the transportation matrix in the semicontinous +# case +# --------------------------------------------- +# +# Call ot.solve_dual_entropic and plot the results. + +sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size, + numItermax, lr, log) +print(log['alpha'], log['beta']) +print(sgd_dual_pi) + +############################################################################# +# +# Compare the results with the Sinkhorn algorithm +# --------------------------------------------- +# +# Call the Sinkhorn algorithm from POT + +sinkhorn_pi = ot.sinkhorn(a, b, M, reg) +print(sinkhorn_pi) + +############################################################################## +# Plot SGD results +# ----------------- + +pl.figure(4, figsize=(5, 5)) +ot.plot.plot1D_mat(a, b, sgd_dual_pi, 'dual : OT matrix SGD') pl.show() diff --git a/ot/stochastic.py b/ot/stochastic.py index 9912223..31c99be 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -5,7 +5,10 @@ import numpy as np -def coordinate_gradient(b, M, reg, v, i): +############################################################################## +# Optimization toolbox for SEMI - DUAL problem +############################################################################## +def coordinate_gradient(b, M, reg, beta, i): ''' Compute the coordinate gradient update for regularized discrete distributions for (i, :) @@ -59,7 +62,7 @@ def coordinate_gradient(b, M, reg, v, i): >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) >>> method = "ASGD" - >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, method, numItermax, lr) >>> print(asgd_pi) @@ -74,9 +77,9 @@ def coordinate_gradient(b, M, reg, v, i): ''' - r = M[i, :] - v - exp_v = np.exp(-r / reg) * b - khi = exp_v / (np.sum(exp_v)) + r = M[i, :] - beta + exp_beta = np.exp(-r/reg) * b + khi = exp_beta/(np.sum(exp_beta)) return b - khi @@ -137,7 +140,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1): >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) >>> method = "SAG" - >>> sag_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + >>> sag_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, method, numItermax, lr) >>> print(asgd_pi) @@ -153,16 +156,16 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1): n_source = np.shape(M)[0] n_target = np.shape(M)[1] - v = np.zeros(n_target) + cur_beta = np.zeros(n_target) stored_gradient = np.zeros((n_source, n_target)) sum_stored_gradient = np.zeros(n_target) for _ in range(numItermax): i = np.random.randint(n_source) - cur_coord_grad = a[i] * coordinate_gradient(b, M, reg, v, i) + cur_coord_grad = a[i] * coordinate_gradient(b, M, reg, cur_beta, i) sum_stored_gradient += (cur_coord_grad - stored_gradient[i]) stored_gradient[i] = cur_coord_grad - v += lr * (1. / n_source) * sum_stored_gradient - return v + cur_beta += lr * (1./n_source) * sum_stored_gradient + return cur_beta def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): @@ -170,19 +173,19 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): Compute the ASGD algorithm to solve the regularized semi contibous measures optimal transport max problem - The function solves the following optimization problem: - .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) - s.t. \gamma 1 = a - \gamma^T 1= b - \gamma \geq 0 - where : - - M is the (ns,nt) metric cost matrix - - :math:`\Omega` is the entropic regularization term - :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - a and b are source and target weights (sum to 1) - The algorithm used for solving the problem is the ASGD algorithm - as proposed in [18]_ [alg.2] + The function solves the following optimization problem: + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + s.t. \gamma 1 = a + \gamma^T 1= b + \gamma \geq 0 + where : + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term + :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + The algorithm used for solving the problem is the ASGD algorithm + as proposed in [18]_ [alg.2] Parameters @@ -221,7 +224,7 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) >>> method = "ASGD" - >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, method, numItermax, lr) >>> print(asgd_pi) @@ -237,18 +240,18 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1): n_source = np.shape(M)[0] n_target = np.shape(M)[1] - cur_v = np.zeros(n_target) - ave_v = np.zeros(n_target) + cur_beta = np.zeros(n_target) + ave_beta = np.zeros(n_target) for cur_iter in range(numItermax): k = cur_iter + 1 i = np.random.randint(n_source) - cur_coord_grad = coordinate_gradient(b, M, reg, cur_v, i) - cur_v += (lr / np.sqrt(k)) * cur_coord_grad - ave_v = (1. / k) * cur_v + (1 - 1. / k) * ave_v - return ave_v + cur_coord_grad = coordinate_gradient(b, M, reg, cur_beta, i) + cur_beta += (lr/np.sqrt(k)) * cur_coord_grad + ave_beta = (1./k) * cur_beta + (1 - 1./k) * ave_beta + return ave_beta -def c_transform_entropic(b, M, reg, v): +def c_transform_entropic(b, M, reg, beta): ''' The goal is to recover u from the c-transform. @@ -298,7 +301,7 @@ def c_transform_entropic(b, M, reg, v): >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) >>> method = "ASGD" - >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, method, numItermax, lr) >>> print(asgd_pi) @@ -313,16 +316,18 @@ def c_transform_entropic(b, M, reg, v): ''' n_source = np.shape(M)[0] - u = np.zeros(n_source) + n_target = np.shape(M)[1] + alpha = np.zeros(n_source) for i in range(n_source): - r = M[i, :] - v - exp_v = np.exp(-r / reg) * b - u[i] = - reg * np.log(np.sum(exp_v)) - return u + r = M[i, :] - beta + min_r = np.min(r) + exp_beta = np.exp(-(r - min_r)/reg) * b + alpha[i] = min_r - reg * np.log(np.sum(exp_beta)) + return alpha -def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000, - lr=0.1): +def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=0.1, + log=False): ''' Compute the transportation matrix to solve the regularized discrete measures optimal transport max problem @@ -363,12 +368,16 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000, size of the source measure n_target : int number size of the target measure + log : bool, optional + record log if True Returns ------- pi : np.ndarray(ns, nt) transportation matrix + log : dict + log dictionary return only if log==True in parameters Examples -------- @@ -385,7 +394,7 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000, >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) >>> method = "ASGD" - >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg, + >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, method, numItermax, lr) >>> print(asgd_pi) @@ -398,15 +407,384 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000, Advances in Neural Information Processing Systems (2016), arXiv preprint arxiv:1605.08527. ''' + n_source = 7 + n_target = 4 if method.lower() == "sag": - opt_v = sag_entropic_transport(a, b, M, reg, numItermax, lr) + opt_beta = sag_entropic_transport(a, b, M, reg, numItermax, lr) elif method.lower() == "asgd": - opt_v = averaged_sgd_entropic_transport(b, M, reg, numItermax, lr) + opt_beta = averaged_sgd_entropic_transport(b, M, reg, numItermax, lr) else: print("Please, select your method between SAG and ASGD") return None - opt_u = c_transform_entropic(b, M, reg, opt_v) - pi = (np.exp((opt_u[:, None] + opt_v[None, :] - M[:, :]) / reg) * + opt_alpha = c_transform_entropic(b, M, reg, opt_beta) + pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :])/reg) * a[:, None] * b[None, :]) - return pi + + if log: + log = {} + log['alpha'] = opt_alpha + log['beta'] = opt_beta + return pi, log + else: + return pi + + +############################################################################## +# Optimization toolbox for DUAL problem +############################################################################## + + +def grad_dF_dalpha(M, reg, alpha, beta, batch_size, batch_alpha, batch_beta): + ''' + Computes the partial gradient of F_\W_varepsilon + + Compute the partial gradient of the dual problem: + ..Math: + \forall i in batch_alpha, + grad_alpha_i = 1 * batch_size - + sum_{j in batch_beta} exp((alpha_i + beta_j - M_{i,j})/reg) + + where : + - M is the (ns,nt) metric cost matrix + - alpha, beta are dual variables in R^ixR^J + - reg is the regularization term + - batch_alpha and batch_beta are list of index + + The algorithm used for solving the dual problem is the SGD algorithm + as proposed in [19]_ [alg.1] + + Parameters + ---------- + + reg : float number, + Regularization term > 0 + M : np.ndarray(ns, nt), + cost matrix + alpha : np.ndarray(ns,) + dual variable + beta : np.ndarray(nt,) + dual variable + batch_size : int number + size of the batch + batch_alpha : np.ndarray(bs,) + batch of index of alpha + batch_beta : np.ndarray(bs,) + batch of index of beta + + Returns + ------- + + grad : np.ndarray(ns,) + partial grad F in alpha + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 20000 + >>> lr = 0.1 + >>> batch_size = 3 + >>> log = True + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, + batch_size, + numItermax, lr, log) + >>> print(log['alpha'], log['beta']) + >>> print(sgd_dual_pi) + + References + ---------- + + [Seguy et al., 2018] : + International Conference on Learning Representation (2018), + arXiv preprint arxiv:1711.02283. + ''' + + grad_alpha = np.zeros(batch_size) + grad_alpha[:] = batch_size + for j in batch_beta: + grad_alpha -= np.exp((alpha[batch_alpha] + beta[j] + - M[batch_alpha, j])/reg) + return grad_alpha + + +def grad_dF_dbeta(M, reg, alpha, beta, batch_size, batch_alpha, batch_beta): + ''' + Computes the partial gradient of F_\W_varepsilon + + Compute the partial gradient of the dual problem: + ..Math: + \forall j in batch_beta, + grad_beta_j = 1 * batch_size - + sum_{i in batch_alpha} exp((alpha_i + beta_j - M_{i,j})/reg) + + where : + - M is the (ns,nt) metric cost matrix + - alpha, beta are dual variables in R^ixR^J + - reg is the regularization term + - batch_alpha and batch_beta are list of index + + The algorithm used for solving the dual problem is the SGD algorithm + as proposed in [19]_ [alg.1] + + Parameters + ---------- + + M : np.ndarray(ns, nt), + cost matrix + reg : float number, + Regularization term > 0 + alpha : np.ndarray(ns,) + dual variable + beta : np.ndarray(nt,) + dual variable + batch_size : int number + size of the batch + batch_alpha : np.ndarray(bs,) + batch of index of alpha + batch_beta : np.ndarray(bs,) + batch of index of beta + + Returns + ------- + + grad : np.ndarray(ns,) + partial grad F in beta + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 20000 + >>> lr = 0.1 + >>> batch_size = 3 + >>> log = True + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, + batch_size, + numItermax, lr, log) + >>> print(log['alpha'], log['beta']) + >>> print(sgd_dual_pi) + + References + ---------- + + [Seguy et al., 2018] : + International Conference on Learning Representation (2018), + arXiv preprint arxiv:1711.02283. + + ''' + + grad_beta = np.zeros(batch_size) + grad_beta[:] = batch_size + for i in batch_alpha: + grad_beta -= np.exp((alpha[i] + + beta[batch_beta] - M[i, batch_beta])/reg) + return grad_beta + + +def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr, + alternate=True): + ''' + Compute the sgd algorithm to solve the regularized discrete measures + optimal transport dual problem + + The function solves the following optimization problem: + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + s.t. \gamma 1 = a + \gamma^T 1= b + \gamma \geq 0 + where : + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term + :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + + Parameters + ---------- + + M : np.ndarray(ns, nt), + cost matrix + reg : float number, + Regularization term > 0 + batch_size : int number + size of the batch + numItermax : int number + number of iteration + lr : float number + learning rate + alternate : bool, optional + alternating algorithm + + Returns + ------- + + alpha : np.ndarray(ns,) + dual variable + beta : np.ndarray(nt,) + dual variable + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 20000 + >>> lr = 0.1 + >>> batch_size = 3 + >>> log = True + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, + batch_size, + numItermax, lr, log) + >>> print(log['alpha'], log['beta']) + >>> print(sgd_dual_pi) + + References + ---------- + + [Seguy et al., 2018] : + International Conference on Learning Representation (2018), + arXiv preprint arxiv:1711.02283. + ''' + + n_source = np.shape(M)[0] + n_target = np.shape(M)[1] + cur_alpha = np.random.randn(n_source) + cur_beta = np.random.randn(n_target) + if alternate: + for cur_iter in range(numItermax): + k = np.sqrt(cur_iter + 1) + batch_alpha = np.random.choice(n_source, batch_size, replace=False) + batch_beta = np.random.choice(n_target, batch_size, replace=False) + grad_F_alpha = grad_dF_dalpha(M, reg, cur_alpha, cur_beta, + batch_size, batch_alpha, batch_beta) + cur_alpha[batch_alpha] += (lr/k) * grad_F_alpha + grad_F_beta = grad_dF_dbeta(M, reg, cur_alpha, cur_beta, + batch_size, batch_alpha, batch_beta) + cur_beta[batch_beta] += (lr/k) * grad_F_beta + + else: + for cur_iter in range(numItermax): + k = np.sqrt(cur_iter + 1) + batch_alpha = np.random.choice(n_source, batch_size, replace=False) + batch_beta = np.random.choice(n_target, batch_size, replace=False) + grad_F_alpha = grad_dF_dalpha(M, reg, cur_alpha, cur_beta, + batch_size, batch_alpha, batch_beta) + grad_F_beta = grad_dF_dbeta(M, reg, cur_alpha, cur_beta, + batch_size, batch_alpha, batch_beta) + cur_alpha[batch_alpha] += (lr/k) * grad_F_alpha + cur_beta[batch_beta] += (lr/k) * grad_F_beta + + return cur_alpha, cur_beta + + +def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1, + log=False): + ''' + Compute the transportation matrix to solve the regularized discrete measures + optimal transport dual problem + + The function solves the following optimization problem: + .. math:: + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + s.t. \gamma 1 = a + \gamma^T 1= b + \gamma \geq 0 + where : + - M is the (ns,nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term + :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights (sum to 1) + + Parameters + ---------- + + a : np.ndarray(ns,), + source measure + b : np.ndarray(nt,), + target measure + M : np.ndarray(ns, nt), + cost matrix + reg : float number, + Regularization term > 0 + batch_size : int number + size of the batch + numItermax : int number + number of iteration + lr : float number + learning rate + log : bool, optional + record log if True + + Returns + ------- + + pi : np.ndarray(ns, nt) + transportation matrix + log : dict + log dictionary return only if log==True in parameters + + Examples + -------- + + >>> n_source = 7 + >>> n_target = 4 + >>> reg = 1 + >>> numItermax = 20000 + >>> lr = 0.1 + >>> batch_size = 3 + >>> log = True + >>> a = ot.utils.unif(n_source) + >>> b = ot.utils.unif(n_target) + >>> rng = np.random.RandomState(0) + >>> X_source = rng.randn(n_source, 2) + >>> Y_target = rng.randn(n_target, 2) + >>> M = ot.dist(X_source, Y_target) + >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, + batch_size, + numItermax, lr, log) + >>> print(log['alpha'], log['beta']) + >>> print(sgd_dual_pi) + + References + ---------- + + [Seguy et al., 2018] : + International Conference on Learning Representation (2018), + arXiv preprint arxiv:1711.02283. + ''' + + opt_alpha, opt_beta = sgd_entropic_regularization(M, reg, batch_size, + numItermax, lr) + pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :])/reg) * + a[:, None] * b[None, :]) + if log: + log = {} + log['alpha'] = opt_alpha + log['beta'] = opt_beta + return pi, log + else: + return pi diff --git a/test/test_stochastic.py b/test/test_stochastic.py index 5fe5ea9..bc0cebb 100644 --- a/test/test_stochastic.py +++ b/test/test_stochastic.py @@ -15,6 +15,11 @@ for descrete and semicontinous measures from the POT library. import numpy as np import ot + +############################################################################# +# COMPUTE TEST FOR SEMI-DUAL PROBLEM +############################################################################# + ############################################################################# # # TEST SAG algorithm @@ -35,8 +40,8 @@ def test_stochastic_sag(): M = ot.dist(x, x) - G = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "sag", - numItermax=numItermax) + G = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "sag", + numItermax=numItermax) # check constratints np.testing.assert_allclose( @@ -65,9 +70,9 @@ def test_stochastic_asgd(): M = ot.dist(x, x) - G = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "asgd", - numItermax=numItermax, - lr=lr) + G = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "asgd", + numItermax=numItermax, + lr=lr) # check constratints np.testing.assert_allclose( @@ -89,6 +94,7 @@ def test_sag_asgd_sinkhorn(): n = 15 reg = 1 nb_iter = 300000 + lr = 1 rng = np.random.RandomState(0) x = rng.randn(n, 2) @@ -96,11 +102,10 @@ def test_sag_asgd_sinkhorn(): zero = np.zeros(n) M = ot.dist(x, x) - G_asgd = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "asgd", - numItermax=nb_iter, - lr=1) - G_sag = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "sag", - numItermax=nb_iter) + G_asgd = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "asgd", + numItermax=nb_iter, lr=lr) + G_sag = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "sag", + numItermax=nb_iter) G_sinkhorn = ot.sinkhorn(u, u, M, reg) # check constratints @@ -112,3 +117,82 @@ def test_sag_asgd_sinkhorn(): zero, (G_asgd - G_sinkhorn).sum(1), atol=1e-03) # cf convergence asgd np.testing.assert_allclose( zero, (G_asgd - G_sinkhorn).sum(0), atol=1e-03) # cf convergence asgd + np.testing.assert_allclose( + G_sag, G_sinkhorn, atol=1e-03) # cf convergence sag + np.testing.assert_allclose( + G_asgd, G_sinkhorn, atol=1e-03) # cf convergence asgd + + +############################################################################# +# COMPUTE TEST FOR DUAL PROBLEM +############################################################################# + +############################################################################# +# +# TEST SGD algorithm +# --------------------------------------------- +# 2 identical discrete measures u defined on the same space with a +# regularization term, a batch_size and a number of iteration + +def test_stochastic_dual_sgd(): + # test sgd + print("SGD") + n = 10 + reg = 1 + numItermax = 300000 + batch_size = 8 + rng = np.random.RandomState(0) + + x = rng.randn(n, 2) + u = ot.utils.unif(n) + + M = ot.dist(x, x) + + G = ot.stochastic.solve_dual_entropic(u, u, M, reg, batch_size, + numItermax=numItermax) + + # check constratints + np.testing.assert_allclose( + u, G.sum(1), atol=1e-02) # cf convergence sgd + np.testing.assert_allclose( + u, G.sum(0), atol=1e-02) # cf convergence sgd + +############################################################################# +# +# TEST Convergence SGD toward Sinkhorn's solution +# -------------------------------------------------------- +# 2 identical discrete measures u defined on the same space with a +# regularization term, a batch_size and a number of iteration + + +def test_dual_sgd_sinkhorn(): + # test all dual algorithms + print("SGD vs Sinkhorn") + n = 10 + reg = 1 + nb_iter = 300000 + batch_size = 8 + rng = np.random.RandomState(0) + + x = rng.randn(n, 2) + u = ot.utils.unif(n) + zero = np.zeros(n) + M = ot.dist(x, x) + + G_sgd = ot.stochastic.solve_dual_entropic(u, u, M, reg, batch_size, + numItermax=nb_iter) + + G_sinkhorn = ot.sinkhorn(u, u, M, reg) + + # check constratints + np.testing.assert_allclose( + zero, (G_sgd - G_sinkhorn).sum(1), atol=1e-02) # cf convergence sgd + np.testing.assert_allclose( + zero, (G_sgd - G_sinkhorn).sum(0), atol=1e-02) # cf convergence sgd + np.testing.assert_allclose( + G_sgd, G_sinkhorn, atol=1e-02) # cf convergence sgd + + +if __name__ == '__main__': + test_stochastic_dual_sgd() + test_dual_sgd_sinkhorn() |