summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKilian Fatras <kilianfatras@dhcp-206-12-53-210.eduroam.wireless.ubc.ca>2018-06-18 17:56:28 -0700
committerKilian Fatras <kilianfatras@dhcp-206-12-53-210.eduroam.wireless.ubc.ca>2018-06-18 17:56:28 -0700
commit74cfe5ac77c3e964a85ef90c11d8ebffa16ddcfe (patch)
treecbff519da38279cc0e0cc4a4e8ab35d0169d16b5
parent055417ee06917ff8bac5d07b2d2a17d80e5da4b6 (diff)
add sgd
-rw-r--r--README.md8
-rw-r--r--examples/plot_stochastic.py95
-rw-r--r--ot/stochastic.py468
-rw-r--r--test/test_stochastic.py104
4 files changed, 608 insertions, 67 deletions
diff --git a/README.md b/README.md
index 466c09c..8e8dcd4 100644
--- a/README.md
+++ b/README.md
@@ -22,6 +22,7 @@ It provides the following solvers:
* Linear OT [14] and Joint OT matrix and mapping estimation [8].
* Wasserstein Discriminant Analysis [11] (requires autograd + pymanopt).
* Gromov-Wasserstein distances and barycenters ([13] and regularized [12])
+* Stochastic Optimization for Large-scale Optimal Transport (semi-dual problem [18] and dual problem [19])
Some demonstrations (both in Python and Jupyter Notebook format) are available in the examples folder.
@@ -149,6 +150,7 @@ The contributors to this library are:
* [Stanislas Chambon](https://slasnista.github.io/)
* [Antoine Rolet](https://arolet.github.io/)
* Erwan Vautier (Gromov-Wasserstein)
+* [Kilian Fatras](https://kilianfatras.github.io/) (Stochastic optimization)
This toolbox benefit a lot from open source research and we would like to thank the following persons for providing some code (in various languages):
@@ -213,3 +215,9 @@ You can also post bug reports and feature requests in Github issues. Make sure t
[15] Peyré, G., & Cuturi, M. (2018). [Computational Optimal Transport](https://arxiv.org/pdf/1803.00567.pdf) .
[16] Agueh, M., & Carlier, G. (2011). [Barycenters in the Wasserstein space](https://hal.archives-ouvertes.fr/hal-00637399/document). SIAM Journal on Mathematical Analysis, 43(2), 904-924.
+
+[17] Blondel, M., Seguy, V., & Rolet, A. (2018). [Smooth and Sparse Optimal Transport](https://arxiv.org/pdf/1710.06276.pdf). Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS).
+
+[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) [Stochastic Optimization for Large-scale Optimal Transport](arXiv preprint arxiv:1605.08527). Advances in Neural Information Processing Systems (2016).
+
+[19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A.& Blondel, M. [Large-scale Optimal Transport and Mapping Estimation](https://arxiv.org/pdf/1711.02283.pdf). International Conference on Learning Representation (2018)
diff --git a/examples/plot_stochastic.py b/examples/plot_stochastic.py
index 9071ddb..3fc1955 100644
--- a/examples/plot_stochastic.py
+++ b/examples/plot_stochastic.py
@@ -18,9 +18,9 @@ import ot
#############################################################################
-# COMPUTE TRANSPORTATION MATRIX
+# COMPUTE TRANSPORTATION MATRIX FOR SEMI-DUAL PROBLEM
#############################################################################
-
+print("------------SEMI-DUAL PROBLEM------------")
#############################################################################
# DISCRETE CASE
# Sample two discrete measures for the discrete case
@@ -48,12 +48,12 @@ M = ot.dist(X_source, Y_target)
# Call the "SAG" method to find the transportation matrix in the discrete case
# ---------------------------------------------
#
-# Define the method "SAG", call ot.transportation_matrix_entropic and plot the
+# Define the method "SAG", call ot.solve_semi_dual_entropic and plot the
# results.
method = "SAG"
-sag_pi = ot.stochastic.transportation_matrix_entropic(a, b, M, reg, method,
- numItermax, lr)
+sag_pi = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method,
+ numItermax, lr)
print(sag_pi)
#############################################################################
@@ -68,8 +68,9 @@ print(sag_pi)
n_source = 7
n_target = 4
reg = 1
-numItermax = 500000
+numItermax = 100000
lr = 1
+log = True
a = ot.utils.unif(n_source)
b = ot.utils.unif(n_target)
@@ -85,12 +86,13 @@ M = ot.dist(X_source, Y_target)
# case
# ---------------------------------------------
#
-# Define the method "ASGD", call ot.transportation_matrix_entropic and plot the
+# Define the method "ASGD", call ot.solve_semi_dual_entropic and plot the
# results.
method = "ASGD"
-asgd_pi = ot.stochastic.transportation_matrix_entropic(a, b, M, reg, method,
- numItermax, lr)
+asgd_pi, log = ot.stochastic.solve_semi_dual_entropic(a, b, M, reg, method,
+ numItermax, lr, log)
+print(log['alpha'], log['beta'])
print(asgd_pi)
#############################################################################
@@ -100,7 +102,7 @@ print(asgd_pi)
#
# Call the Sinkhorn algorithm from POT
-sinkhorn_pi = ot.sinkhorn(a, b, M, 1)
+sinkhorn_pi = ot.sinkhorn(a, b, M, reg)
print(sinkhorn_pi)
@@ -113,7 +115,7 @@ print(sinkhorn_pi)
# ----------------
pl.figure(4, figsize=(5, 5))
-ot.plot.plot1D_mat(a, b, sag_pi, 'OT matrix SAG')
+ot.plot.plot1D_mat(a, b, sag_pi, 'semi-dual : OT matrix SAG')
pl.show()
@@ -122,7 +124,76 @@ pl.show()
# -----------------
pl.figure(4, figsize=(5, 5))
-ot.plot.plot1D_mat(a, b, asgd_pi, 'OT matrix ASGD')
+ot.plot.plot1D_mat(a, b, asgd_pi, 'semi-dual : OT matrix ASGD')
+pl.show()
+
+
+##############################################################################
+# Plot Sinkhorn results
+# ---------------------
+
+pl.figure(4, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, sinkhorn_pi, 'OT matrix Sinkhorn')
+pl.show()
+
+#############################################################################
+# COMPUTE TRANSPORTATION MATRIX FOR DUAL PROBLEM
+#############################################################################
+print("------------DUAL PROBLEM------------")
+#############################################################################
+# SEMICONTINOUS CASE
+# Sample one general measure a, one discrete measures b for the semicontinous
+# case
+# ---------------------------------------------
+#
+# Define one general measure a, one discrete measures b, the points where
+# are defined the source and the target measures and finally the cost matrix c.
+
+n_source = 7
+n_target = 4
+reg = 1
+numItermax = 100000
+lr = 0.1
+batch_size = 3
+log = True
+
+a = ot.utils.unif(n_source)
+b = ot.utils.unif(n_target)
+
+rng = np.random.RandomState(0)
+X_source = rng.randn(n_source, 2)
+Y_target = rng.randn(n_target, 2)
+M = ot.dist(X_source, Y_target)
+
+#############################################################################
+#
+# Call the "SGD" dual method to find the transportation matrix in the semicontinous
+# case
+# ---------------------------------------------
+#
+# Call ot.solve_dual_entropic and plot the results.
+
+sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size,
+ numItermax, lr, log)
+print(log['alpha'], log['beta'])
+print(sgd_dual_pi)
+
+#############################################################################
+#
+# Compare the results with the Sinkhorn algorithm
+# ---------------------------------------------
+#
+# Call the Sinkhorn algorithm from POT
+
+sinkhorn_pi = ot.sinkhorn(a, b, M, reg)
+print(sinkhorn_pi)
+
+##############################################################################
+# Plot SGD results
+# -----------------
+
+pl.figure(4, figsize=(5, 5))
+ot.plot.plot1D_mat(a, b, sgd_dual_pi, 'dual : OT matrix SGD')
pl.show()
diff --git a/ot/stochastic.py b/ot/stochastic.py
index 9912223..31c99be 100644
--- a/ot/stochastic.py
+++ b/ot/stochastic.py
@@ -5,7 +5,10 @@
import numpy as np
-def coordinate_gradient(b, M, reg, v, i):
+##############################################################################
+# Optimization toolbox for SEMI - DUAL problem
+##############################################################################
+def coordinate_gradient(b, M, reg, beta, i):
'''
Compute the coordinate gradient update for regularized discrete
distributions for (i, :)
@@ -59,7 +62,7 @@ def coordinate_gradient(b, M, reg, v, i):
>>> Y_target = rng.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
>>> method = "ASGD"
- >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg,
+ >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
method, numItermax,
lr)
>>> print(asgd_pi)
@@ -74,9 +77,9 @@ def coordinate_gradient(b, M, reg, v, i):
'''
- r = M[i, :] - v
- exp_v = np.exp(-r / reg) * b
- khi = exp_v / (np.sum(exp_v))
+ r = M[i, :] - beta
+ exp_beta = np.exp(-r/reg) * b
+ khi = exp_beta/(np.sum(exp_beta))
return b - khi
@@ -137,7 +140,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1):
>>> Y_target = rng.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
>>> method = "SAG"
- >>> sag_pi = stochastic.transportation_matrix_entropic(a, b, M, reg,
+ >>> sag_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
method, numItermax,
lr)
>>> print(asgd_pi)
@@ -153,16 +156,16 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1):
n_source = np.shape(M)[0]
n_target = np.shape(M)[1]
- v = np.zeros(n_target)
+ cur_beta = np.zeros(n_target)
stored_gradient = np.zeros((n_source, n_target))
sum_stored_gradient = np.zeros(n_target)
for _ in range(numItermax):
i = np.random.randint(n_source)
- cur_coord_grad = a[i] * coordinate_gradient(b, M, reg, v, i)
+ cur_coord_grad = a[i] * coordinate_gradient(b, M, reg, cur_beta, i)
sum_stored_gradient += (cur_coord_grad - stored_gradient[i])
stored_gradient[i] = cur_coord_grad
- v += lr * (1. / n_source) * sum_stored_gradient
- return v
+ cur_beta += lr * (1./n_source) * sum_stored_gradient
+ return cur_beta
def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1):
@@ -170,19 +173,19 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1):
Compute the ASGD algorithm to solve the regularized semi contibous measures
optimal transport max problem
- The function solves the following optimization problem:
- .. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
- s.t. \gamma 1 = a
- \gamma^T 1= b
- \gamma \geq 0
- where :
- - M is the (ns,nt) metric cost matrix
- - :math:`\Omega` is the entropic regularization term
- :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target weights (sum to 1)
- The algorithm used for solving the problem is the ASGD algorithm
- as proposed in [18]_ [alg.2]
+ The function solves the following optimization problem:
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ s.t. \gamma 1 = a
+ \gamma^T 1= b
+ \gamma \geq 0
+ where :
+ - M is the (ns,nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights (sum to 1)
+ The algorithm used for solving the problem is the ASGD algorithm
+ as proposed in [18]_ [alg.2]
Parameters
@@ -221,7 +224,7 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1):
>>> Y_target = rng.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
>>> method = "ASGD"
- >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg,
+ >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
method, numItermax,
lr)
>>> print(asgd_pi)
@@ -237,18 +240,18 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1):
n_source = np.shape(M)[0]
n_target = np.shape(M)[1]
- cur_v = np.zeros(n_target)
- ave_v = np.zeros(n_target)
+ cur_beta = np.zeros(n_target)
+ ave_beta = np.zeros(n_target)
for cur_iter in range(numItermax):
k = cur_iter + 1
i = np.random.randint(n_source)
- cur_coord_grad = coordinate_gradient(b, M, reg, cur_v, i)
- cur_v += (lr / np.sqrt(k)) * cur_coord_grad
- ave_v = (1. / k) * cur_v + (1 - 1. / k) * ave_v
- return ave_v
+ cur_coord_grad = coordinate_gradient(b, M, reg, cur_beta, i)
+ cur_beta += (lr/np.sqrt(k)) * cur_coord_grad
+ ave_beta = (1./k) * cur_beta + (1 - 1./k) * ave_beta
+ return ave_beta
-def c_transform_entropic(b, M, reg, v):
+def c_transform_entropic(b, M, reg, beta):
'''
The goal is to recover u from the c-transform.
@@ -298,7 +301,7 @@ def c_transform_entropic(b, M, reg, v):
>>> Y_target = rng.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
>>> method = "ASGD"
- >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg,
+ >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
method, numItermax,
lr)
>>> print(asgd_pi)
@@ -313,16 +316,18 @@ def c_transform_entropic(b, M, reg, v):
'''
n_source = np.shape(M)[0]
- u = np.zeros(n_source)
+ n_target = np.shape(M)[1]
+ alpha = np.zeros(n_source)
for i in range(n_source):
- r = M[i, :] - v
- exp_v = np.exp(-r / reg) * b
- u[i] = - reg * np.log(np.sum(exp_v))
- return u
+ r = M[i, :] - beta
+ min_r = np.min(r)
+ exp_beta = np.exp(-(r - min_r)/reg) * b
+ alpha[i] = min_r - reg * np.log(np.sum(exp_beta))
+ return alpha
-def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000,
- lr=0.1):
+def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=0.1,
+ log=False):
'''
Compute the transportation matrix to solve the regularized discrete
measures optimal transport max problem
@@ -363,12 +368,16 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000,
size of the source measure
n_target : int number
size of the target measure
+ log : bool, optional
+ record log if True
Returns
-------
pi : np.ndarray(ns, nt)
transportation matrix
+ log : dict
+ log dictionary return only if log==True in parameters
Examples
--------
@@ -385,7 +394,7 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000,
>>> Y_target = rng.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
>>> method = "ASGD"
- >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg,
+ >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
method, numItermax,
lr)
>>> print(asgd_pi)
@@ -398,15 +407,384 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000,
Advances in Neural Information Processing Systems (2016),
arXiv preprint arxiv:1605.08527.
'''
+ n_source = 7
+ n_target = 4
if method.lower() == "sag":
- opt_v = sag_entropic_transport(a, b, M, reg, numItermax, lr)
+ opt_beta = sag_entropic_transport(a, b, M, reg, numItermax, lr)
elif method.lower() == "asgd":
- opt_v = averaged_sgd_entropic_transport(b, M, reg, numItermax, lr)
+ opt_beta = averaged_sgd_entropic_transport(b, M, reg, numItermax, lr)
else:
print("Please, select your method between SAG and ASGD")
return None
- opt_u = c_transform_entropic(b, M, reg, opt_v)
- pi = (np.exp((opt_u[:, None] + opt_v[None, :] - M[:, :]) / reg) *
+ opt_alpha = c_transform_entropic(b, M, reg, opt_beta)
+ pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :])/reg) *
a[:, None] * b[None, :])
- return pi
+
+ if log:
+ log = {}
+ log['alpha'] = opt_alpha
+ log['beta'] = opt_beta
+ return pi, log
+ else:
+ return pi
+
+
+##############################################################################
+# Optimization toolbox for DUAL problem
+##############################################################################
+
+
+def grad_dF_dalpha(M, reg, alpha, beta, batch_size, batch_alpha, batch_beta):
+ '''
+ Computes the partial gradient of F_\W_varepsilon
+
+ Compute the partial gradient of the dual problem:
+ ..Math:
+ \forall i in batch_alpha,
+ grad_alpha_i = 1 * batch_size -
+ sum_{j in batch_beta} exp((alpha_i + beta_j - M_{i,j})/reg)
+
+ where :
+ - M is the (ns,nt) metric cost matrix
+ - alpha, beta are dual variables in R^ixR^J
+ - reg is the regularization term
+ - batch_alpha and batch_beta are list of index
+
+ The algorithm used for solving the dual problem is the SGD algorithm
+ as proposed in [19]_ [alg.1]
+
+ Parameters
+ ----------
+
+ reg : float number,
+ Regularization term > 0
+ M : np.ndarray(ns, nt),
+ cost matrix
+ alpha : np.ndarray(ns,)
+ dual variable
+ beta : np.ndarray(nt,)
+ dual variable
+ batch_size : int number
+ size of the batch
+ batch_alpha : np.ndarray(bs,)
+ batch of index of alpha
+ batch_beta : np.ndarray(bs,)
+ batch of index of beta
+
+ Returns
+ -------
+
+ grad : np.ndarray(ns,)
+ partial grad F in alpha
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 20000
+ >>> lr = 0.1
+ >>> batch_size = 3
+ >>> log = True
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+ batch_size,
+ numItermax, lr, log)
+ >>> print(log['alpha'], log['beta'])
+ >>> print(sgd_dual_pi)
+
+ References
+ ----------
+
+ [Seguy et al., 2018] :
+ International Conference on Learning Representation (2018),
+ arXiv preprint arxiv:1711.02283.
+ '''
+
+ grad_alpha = np.zeros(batch_size)
+ grad_alpha[:] = batch_size
+ for j in batch_beta:
+ grad_alpha -= np.exp((alpha[batch_alpha] + beta[j]
+ - M[batch_alpha, j])/reg)
+ return grad_alpha
+
+
+def grad_dF_dbeta(M, reg, alpha, beta, batch_size, batch_alpha, batch_beta):
+ '''
+ Computes the partial gradient of F_\W_varepsilon
+
+ Compute the partial gradient of the dual problem:
+ ..Math:
+ \forall j in batch_beta,
+ grad_beta_j = 1 * batch_size -
+ sum_{i in batch_alpha} exp((alpha_i + beta_j - M_{i,j})/reg)
+
+ where :
+ - M is the (ns,nt) metric cost matrix
+ - alpha, beta are dual variables in R^ixR^J
+ - reg is the regularization term
+ - batch_alpha and batch_beta are list of index
+
+ The algorithm used for solving the dual problem is the SGD algorithm
+ as proposed in [19]_ [alg.1]
+
+ Parameters
+ ----------
+
+ M : np.ndarray(ns, nt),
+ cost matrix
+ reg : float number,
+ Regularization term > 0
+ alpha : np.ndarray(ns,)
+ dual variable
+ beta : np.ndarray(nt,)
+ dual variable
+ batch_size : int number
+ size of the batch
+ batch_alpha : np.ndarray(bs,)
+ batch of index of alpha
+ batch_beta : np.ndarray(bs,)
+ batch of index of beta
+
+ Returns
+ -------
+
+ grad : np.ndarray(ns,)
+ partial grad F in beta
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 20000
+ >>> lr = 0.1
+ >>> batch_size = 3
+ >>> log = True
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+ batch_size,
+ numItermax, lr, log)
+ >>> print(log['alpha'], log['beta'])
+ >>> print(sgd_dual_pi)
+
+ References
+ ----------
+
+ [Seguy et al., 2018] :
+ International Conference on Learning Representation (2018),
+ arXiv preprint arxiv:1711.02283.
+
+ '''
+
+ grad_beta = np.zeros(batch_size)
+ grad_beta[:] = batch_size
+ for i in batch_alpha:
+ grad_beta -= np.exp((alpha[i] +
+ beta[batch_beta] - M[i, batch_beta])/reg)
+ return grad_beta
+
+
+def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr,
+ alternate=True):
+ '''
+ Compute the sgd algorithm to solve the regularized discrete measures
+ optimal transport dual problem
+
+ The function solves the following optimization problem:
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ s.t. \gamma 1 = a
+ \gamma^T 1= b
+ \gamma \geq 0
+ where :
+ - M is the (ns,nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights (sum to 1)
+
+ Parameters
+ ----------
+
+ M : np.ndarray(ns, nt),
+ cost matrix
+ reg : float number,
+ Regularization term > 0
+ batch_size : int number
+ size of the batch
+ numItermax : int number
+ number of iteration
+ lr : float number
+ learning rate
+ alternate : bool, optional
+ alternating algorithm
+
+ Returns
+ -------
+
+ alpha : np.ndarray(ns,)
+ dual variable
+ beta : np.ndarray(nt,)
+ dual variable
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 20000
+ >>> lr = 0.1
+ >>> batch_size = 3
+ >>> log = True
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+ batch_size,
+ numItermax, lr, log)
+ >>> print(log['alpha'], log['beta'])
+ >>> print(sgd_dual_pi)
+
+ References
+ ----------
+
+ [Seguy et al., 2018] :
+ International Conference on Learning Representation (2018),
+ arXiv preprint arxiv:1711.02283.
+ '''
+
+ n_source = np.shape(M)[0]
+ n_target = np.shape(M)[1]
+ cur_alpha = np.random.randn(n_source)
+ cur_beta = np.random.randn(n_target)
+ if alternate:
+ for cur_iter in range(numItermax):
+ k = np.sqrt(cur_iter + 1)
+ batch_alpha = np.random.choice(n_source, batch_size, replace=False)
+ batch_beta = np.random.choice(n_target, batch_size, replace=False)
+ grad_F_alpha = grad_dF_dalpha(M, reg, cur_alpha, cur_beta,
+ batch_size, batch_alpha, batch_beta)
+ cur_alpha[batch_alpha] += (lr/k) * grad_F_alpha
+ grad_F_beta = grad_dF_dbeta(M, reg, cur_alpha, cur_beta,
+ batch_size, batch_alpha, batch_beta)
+ cur_beta[batch_beta] += (lr/k) * grad_F_beta
+
+ else:
+ for cur_iter in range(numItermax):
+ k = np.sqrt(cur_iter + 1)
+ batch_alpha = np.random.choice(n_source, batch_size, replace=False)
+ batch_beta = np.random.choice(n_target, batch_size, replace=False)
+ grad_F_alpha = grad_dF_dalpha(M, reg, cur_alpha, cur_beta,
+ batch_size, batch_alpha, batch_beta)
+ grad_F_beta = grad_dF_dbeta(M, reg, cur_alpha, cur_beta,
+ batch_size, batch_alpha, batch_beta)
+ cur_alpha[batch_alpha] += (lr/k) * grad_F_alpha
+ cur_beta[batch_beta] += (lr/k) * grad_F_beta
+
+ return cur_alpha, cur_beta
+
+
+def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
+ log=False):
+ '''
+ Compute the transportation matrix to solve the regularized discrete measures
+ optimal transport dual problem
+
+ The function solves the following optimization problem:
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ s.t. \gamma 1 = a
+ \gamma^T 1= b
+ \gamma \geq 0
+ where :
+ - M is the (ns,nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights (sum to 1)
+
+ Parameters
+ ----------
+
+ a : np.ndarray(ns,),
+ source measure
+ b : np.ndarray(nt,),
+ target measure
+ M : np.ndarray(ns, nt),
+ cost matrix
+ reg : float number,
+ Regularization term > 0
+ batch_size : int number
+ size of the batch
+ numItermax : int number
+ number of iteration
+ lr : float number
+ learning rate
+ log : bool, optional
+ record log if True
+
+ Returns
+ -------
+
+ pi : np.ndarray(ns, nt)
+ transportation matrix
+ log : dict
+ log dictionary return only if log==True in parameters
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 20000
+ >>> lr = 0.1
+ >>> batch_size = 3
+ >>> log = True
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+ batch_size,
+ numItermax, lr, log)
+ >>> print(log['alpha'], log['beta'])
+ >>> print(sgd_dual_pi)
+
+ References
+ ----------
+
+ [Seguy et al., 2018] :
+ International Conference on Learning Representation (2018),
+ arXiv preprint arxiv:1711.02283.
+ '''
+
+ opt_alpha, opt_beta = sgd_entropic_regularization(M, reg, batch_size,
+ numItermax, lr)
+ pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :])/reg) *
+ a[:, None] * b[None, :])
+ if log:
+ log = {}
+ log['alpha'] = opt_alpha
+ log['beta'] = opt_beta
+ return pi, log
+ else:
+ return pi
diff --git a/test/test_stochastic.py b/test/test_stochastic.py
index 5fe5ea9..bc0cebb 100644
--- a/test/test_stochastic.py
+++ b/test/test_stochastic.py
@@ -15,6 +15,11 @@ for descrete and semicontinous measures from the POT library.
import numpy as np
import ot
+
+#############################################################################
+# COMPUTE TEST FOR SEMI-DUAL PROBLEM
+#############################################################################
+
#############################################################################
#
# TEST SAG algorithm
@@ -35,8 +40,8 @@ def test_stochastic_sag():
M = ot.dist(x, x)
- G = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "sag",
- numItermax=numItermax)
+ G = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "sag",
+ numItermax=numItermax)
# check constratints
np.testing.assert_allclose(
@@ -65,9 +70,9 @@ def test_stochastic_asgd():
M = ot.dist(x, x)
- G = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "asgd",
- numItermax=numItermax,
- lr=lr)
+ G = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "asgd",
+ numItermax=numItermax,
+ lr=lr)
# check constratints
np.testing.assert_allclose(
@@ -89,6 +94,7 @@ def test_sag_asgd_sinkhorn():
n = 15
reg = 1
nb_iter = 300000
+ lr = 1
rng = np.random.RandomState(0)
x = rng.randn(n, 2)
@@ -96,11 +102,10 @@ def test_sag_asgd_sinkhorn():
zero = np.zeros(n)
M = ot.dist(x, x)
- G_asgd = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "asgd",
- numItermax=nb_iter,
- lr=1)
- G_sag = ot.stochastic.transportation_matrix_entropic(u, u, M, reg, "sag",
- numItermax=nb_iter)
+ G_asgd = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "asgd",
+ numItermax=nb_iter, lr=lr)
+ G_sag = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "sag",
+ numItermax=nb_iter)
G_sinkhorn = ot.sinkhorn(u, u, M, reg)
# check constratints
@@ -112,3 +117,82 @@ def test_sag_asgd_sinkhorn():
zero, (G_asgd - G_sinkhorn).sum(1), atol=1e-03) # cf convergence asgd
np.testing.assert_allclose(
zero, (G_asgd - G_sinkhorn).sum(0), atol=1e-03) # cf convergence asgd
+ np.testing.assert_allclose(
+ G_sag, G_sinkhorn, atol=1e-03) # cf convergence sag
+ np.testing.assert_allclose(
+ G_asgd, G_sinkhorn, atol=1e-03) # cf convergence asgd
+
+
+#############################################################################
+# COMPUTE TEST FOR DUAL PROBLEM
+#############################################################################
+
+#############################################################################
+#
+# TEST SGD algorithm
+# ---------------------------------------------
+# 2 identical discrete measures u defined on the same space with a
+# regularization term, a batch_size and a number of iteration
+
+def test_stochastic_dual_sgd():
+ # test sgd
+ print("SGD")
+ n = 10
+ reg = 1
+ numItermax = 300000
+ batch_size = 8
+ rng = np.random.RandomState(0)
+
+ x = rng.randn(n, 2)
+ u = ot.utils.unif(n)
+
+ M = ot.dist(x, x)
+
+ G = ot.stochastic.solve_dual_entropic(u, u, M, reg, batch_size,
+ numItermax=numItermax)
+
+ # check constratints
+ np.testing.assert_allclose(
+ u, G.sum(1), atol=1e-02) # cf convergence sgd
+ np.testing.assert_allclose(
+ u, G.sum(0), atol=1e-02) # cf convergence sgd
+
+#############################################################################
+#
+# TEST Convergence SGD toward Sinkhorn's solution
+# --------------------------------------------------------
+# 2 identical discrete measures u defined on the same space with a
+# regularization term, a batch_size and a number of iteration
+
+
+def test_dual_sgd_sinkhorn():
+ # test all dual algorithms
+ print("SGD vs Sinkhorn")
+ n = 10
+ reg = 1
+ nb_iter = 300000
+ batch_size = 8
+ rng = np.random.RandomState(0)
+
+ x = rng.randn(n, 2)
+ u = ot.utils.unif(n)
+ zero = np.zeros(n)
+ M = ot.dist(x, x)
+
+ G_sgd = ot.stochastic.solve_dual_entropic(u, u, M, reg, batch_size,
+ numItermax=nb_iter)
+
+ G_sinkhorn = ot.sinkhorn(u, u, M, reg)
+
+ # check constratints
+ np.testing.assert_allclose(
+ zero, (G_sgd - G_sinkhorn).sum(1), atol=1e-02) # cf convergence sgd
+ np.testing.assert_allclose(
+ zero, (G_sgd - G_sinkhorn).sum(0), atol=1e-02) # cf convergence sgd
+ np.testing.assert_allclose(
+ G_sgd, G_sinkhorn, atol=1e-02) # cf convergence sgd
+
+
+if __name__ == '__main__':
+ test_stochastic_dual_sgd()
+ test_dual_sgd_sinkhorn()