summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolas Courty <ncourty@irisa.fr>2018-09-07 14:36:18 +0200
committerGitHub <noreply@github.com>2018-09-07 14:36:18 +0200
commitdd200d544393cb6538f789141758bdbe8251bffc (patch)
tree103d2c807405263b60e239dcf2a11b1012e0a209
parente8c6d2fc9c6b08bbed11628326711ab29c155bac (diff)
parentda5d07b4949877148f1582a9f0649c34282afa30 (diff)
Merge branch 'master' into convolution
-rw-r--r--README.md8
-rw-r--r--ot/stochastic.py179
-rw-r--r--test/test_stochastic.py54
3 files changed, 89 insertions, 152 deletions
diff --git a/README.md b/README.md
index 1105362..5f37ad6 100644
--- a/README.md
+++ b/README.md
@@ -17,7 +17,6 @@ It provides the following solvers:
* Entropic regularization OT solver with Sinkhorn Knopp Algorithm [2] and stabilized version [9][10] with optional GPU implementation (requires cudamat).
* Smooth optimal transport solvers (dual and semi-dual) for KL and squared L2 regularizations [17].
* Non regularized Wasserstein barycenters [16] with LP solver (only small scale).
-* Non regularized free support Wasserstein barycenters [20].
* Bregman projections for Wasserstein barycenter [3] and unmixing [4].
* Optimal transport for domain adaptation with group lasso regularization [5]
* Conditional gradient [6] and Generalized conditional gradient for regularized OT [7].
@@ -25,6 +24,7 @@ It provides the following solvers:
* Wasserstein Discriminant Analysis [11] (requires autograd + pymanopt).
* Gromov-Wasserstein distances and barycenters ([13] and regularized [12])
* Stochastic Optimization for Large-scale Optimal Transport (semi-dual problem [18] and dual problem [19])
+* Non regularized free support Wasserstein barycenters [20].
Some demonstrations (both in Python and Jupyter Notebook format) are available in the examples folder.
@@ -164,7 +164,7 @@ The contributors to this library are:
* [Stanislas Chambon](https://slasnista.github.io/)
* [Antoine Rolet](https://arolet.github.io/)
* Erwan Vautier (Gromov-Wasserstein)
-* [Kilian Fatras](https://kilianfatras.github.io/) (Stochastic optimization)
+* [Kilian Fatras](https://kilianfatras.github.io/)
This toolbox benefit a lot from open source research and we would like to thank the following persons for providing some code (in various languages):
@@ -223,10 +223,10 @@ You can also post bug reports and feature requests in Github issues. Make sure t
[17] Blondel, M., Seguy, V., & Rolet, A. (2018). [Smooth and Sparse Optimal Transport](https://arxiv.org/abs/1710.06276). Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS).
-[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) [Stochastic Optimization for Large-scale Optimal Transport](arXiv preprint arxiv:1605.08527). Advances in Neural Information Processing Systems (2016).
+[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) [Stochastic Optimization for Large-scale Optimal Transport](https://arxiv.org/abs/1605.08527). Advances in Neural Information Processing Systems (2016).
[19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A.& Blondel, M. [Large-scale Optimal Transport and Mapping Estimation](https://arxiv.org/pdf/1711.02283.pdf). International Conference on Learning Representation (2018)
[20] Cuturi, M. and Doucet, A. (2014) [Fast Computation of Wasserstein Barycenters](http://proceedings.mlr.press/v32/cuturi14.html). International Conference in Machine Learning
-[21] Solomon, J., De Goes, F., Peyré, G., Cuturi, M., Butscher, A., Nguyen, A. & Guibas, L. (2015). [Convolutional wasserstein distances: Efficient optimal transportation on geometric domains](https://dl.acm.org/citation.cfm?id=2766963). ACM Transactions on Graphics (TOG), 34(4), 66. \ No newline at end of file
+[21] Solomon, J., De Goes, F., Peyré, G., Cuturi, M., Butscher, A., Nguyen, A. & Guibas, L. (2015). [Convolutional wasserstein distances: Efficient optimal transportation on geometric domains](https://dl.acm.org/citation.cfm?id=2766963). ACM Transactions on Graphics (TOG), 34(4), 66.
diff --git a/ot/stochastic.py b/ot/stochastic.py
index 5e8206e..a369ba8 100644
--- a/ot/stochastic.py
+++ b/ot/stochastic.py
@@ -435,8 +435,8 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
##############################################################################
-def batch_grad_dual_alpha(M, reg, alpha, beta, batch_size, batch_alpha,
- batch_beta):
+def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
+ batch_beta):
'''
Computes the partial gradient of F_\W_varepsilon
@@ -444,104 +444,31 @@ def batch_grad_dual_alpha(M, reg, alpha, beta, batch_size, batch_alpha,
..math:
\forall i in batch_alpha,
- grad_alpha_i = 1 * batch_size -
- sum_{j in batch_beta} exp((alpha_i + beta_j - M_{i,j})/reg)
+ grad_alpha_i = alpha_i * batch_size/len(beta) -
+ sum_{j in batch_beta} exp((alpha_i + beta_j - M_{i,j})/reg)
+ * a_i * b_j
- where :
- - M is the (ns,nt) metric cost matrix
- - alpha, beta are dual variables in R^ixR^J
- - reg is the regularization term
- - batch_alpha and batch_beta are list of index
-
- The algorithm used for solving the dual problem is the SGD algorithm
- as proposed in [19]_ [alg.1]
-
- Parameters
- ----------
-
- reg : float number,
- Regularization term > 0
- M : np.ndarray(ns, nt),
- cost matrix
- alpha : np.ndarray(ns,)
- dual variable
- beta : np.ndarray(nt,)
- dual variable
- batch_size : int number
- size of the batch
- batch_alpha : np.ndarray(bs,)
- batch of index of alpha
- batch_beta : np.ndarray(bs,)
- batch of index of beta
-
- Returns
- -------
-
- grad : np.ndarray(ns,)
- partial grad F in alpha
-
- Examples
- --------
-
- >>> n_source = 7
- >>> n_target = 4
- >>> reg = 1
- >>> numItermax = 20000
- >>> lr = 0.1
- >>> batch_size = 3
- >>> log = True
- >>> a = ot.utils.unif(n_source)
- >>> b = ot.utils.unif(n_target)
- >>> rng = np.random.RandomState(0)
- >>> X_source = rng.randn(n_source, 2)
- >>> Y_target = rng.randn(n_target, 2)
- >>> M = ot.dist(X_source, Y_target)
- >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
- batch_size,
- numItermax, lr, log)
- >>> print(log['alpha'], log['beta'])
- >>> print(sgd_dual_pi)
-
- References
- ----------
-
- [Seguy et al., 2018] :
- International Conference on Learning Representation (2018),
- arXiv preprint arxiv:1711.02283.
- '''
-
- grad_alpha = np.zeros(batch_size)
- grad_alpha[:] = batch_size
- for j in batch_beta:
- grad_alpha -= np.exp((alpha[batch_alpha] + beta[j] -
- M[batch_alpha, j]) / reg)
- return grad_alpha
-
-
-def batch_grad_dual_beta(M, reg, alpha, beta, batch_size, batch_alpha,
- batch_beta):
- '''
- Computes the partial gradient of F_\W_varepsilon
-
- Compute the partial gradient of the dual problem:
-
- ..math:
- \forall j in batch_beta,
- grad_beta_j = 1 * batch_size -
+ \forall j in batch_alpha,
+ grad_beta_j = beta_j * batch_size/len(alpha) -
sum_{i in batch_alpha} exp((alpha_i + beta_j - M_{i,j})/reg)
-
+ * a_i * b_j
where :
- M is the (ns,nt) metric cost matrix
- alpha, beta are dual variables in R^ixR^J
- reg is the regularization term
- - batch_alpha and batch_beta are list of index
+ - batch_alpha and batch_beta are lists of index
+ - a and b are source and target weights (sum to 1)
+
The algorithm used for solving the dual problem is the SGD algorithm
as proposed in [19]_ [alg.1]
Parameters
----------
-
+ a : np.ndarray(ns,),
+ source measure
+ b : np.ndarray(nt,),
+ target measure
M : np.ndarray(ns, nt),
cost matrix
reg : float number,
@@ -561,7 +488,7 @@ def batch_grad_dual_beta(M, reg, alpha, beta, batch_size, batch_alpha,
-------
grad : np.ndarray(ns,)
- partial grad F in beta
+ partial grad F
Examples
--------
@@ -591,19 +518,22 @@ def batch_grad_dual_beta(M, reg, alpha, beta, batch_size, batch_alpha,
[Seguy et al., 2018] :
International Conference on Learning Representation (2018),
arXiv preprint arxiv:1711.02283.
-
'''
- grad_beta = np.zeros(batch_size)
- grad_beta[:] = batch_size
- for i in batch_alpha:
- grad_beta -= np.exp((alpha[i] +
- beta[batch_beta] - M[i, batch_beta]) / reg)
- return grad_beta
+ G = - (np.exp((alpha[batch_alpha, None] + beta[None, batch_beta] -
+ M[batch_alpha, :][:, batch_beta]) / reg) *
+ a[batch_alpha, None] * b[None, batch_beta])
+ grad_beta = np.zeros(np.shape(M)[1])
+ grad_alpha = np.zeros(np.shape(M)[0])
+ grad_beta[batch_beta] = (b[batch_beta] * len(batch_alpha) / np.shape(M)[0] +
+ G.sum(0))
+ grad_alpha[batch_alpha] = (a[batch_alpha] * len(batch_beta) /
+ np.shape(M)[1] + G.sum(1))
+
+ return grad_alpha, grad_beta
-def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr,
- alternate=True):
+def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
'''
Compute the sgd algorithm to solve the regularized discrete measures
optimal transport dual problem
@@ -623,7 +553,10 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr,
Parameters
----------
-
+ a : np.ndarray(ns,),
+ source measure
+ b : np.ndarray(nt,),
+ target measure
M : np.ndarray(ns, nt),
cost matrix
reg : float number,
@@ -634,8 +567,6 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr,
number of iteration
lr : float number
learning rate
- alternate : bool, optional
- alternating algorithm
Returns
-------
@@ -662,8 +593,8 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr,
>>> Y_target = rng.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
>>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
- batch_size,
- numItermax, lr, log)
+ batch_size,
+ numItermax, lr, log)
>>> print(log['alpha'], log['beta'])
>>> print(sgd_dual_pi)
@@ -677,35 +608,17 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr,
n_source = np.shape(M)[0]
n_target = np.shape(M)[1]
- cur_alpha = np.random.randn(n_source)
- cur_beta = np.random.randn(n_target)
- if alternate:
- for cur_iter in range(numItermax):
- k = np.sqrt(cur_iter + 1)
- batch_alpha = np.random.choice(n_source, batch_size, replace=False)
- batch_beta = np.random.choice(n_target, batch_size, replace=False)
- grad_F_alpha = batch_grad_dual_alpha(M, reg, cur_alpha, cur_beta,
- batch_size, batch_alpha,
- batch_beta)
- cur_alpha[batch_alpha] += (lr / k) * grad_F_alpha
- grad_F_beta = batch_grad_dual_beta(M, reg, cur_alpha, cur_beta,
- batch_size, batch_alpha,
- batch_beta)
- cur_beta[batch_beta] += (lr / k) * grad_F_beta
-
- else:
- for cur_iter in range(numItermax):
- k = np.sqrt(cur_iter + 1)
- batch_alpha = np.random.choice(n_source, batch_size, replace=False)
- batch_beta = np.random.choice(n_target, batch_size, replace=False)
- grad_F_alpha = batch_grad_dual_alpha(M, reg, cur_alpha, cur_beta,
- batch_size, batch_alpha,
- batch_beta)
- grad_F_beta = batch_grad_dual_beta(M, reg, cur_alpha, cur_beta,
- batch_size, batch_alpha,
- batch_beta)
- cur_alpha[batch_alpha] += (lr / k) * grad_F_alpha
- cur_beta[batch_beta] += (lr / k) * grad_F_beta
+ cur_alpha = np.zeros(n_source)
+ cur_beta = np.zeros(n_target)
+ for cur_iter in range(numItermax):
+ k = np.sqrt(cur_iter + 1)
+ batch_alpha = np.random.choice(n_source, batch_size, replace=False)
+ batch_beta = np.random.choice(n_target, batch_size, replace=False)
+ update_alpha, update_beta = batch_grad_dual(a, b, M, reg, cur_alpha,
+ cur_beta, batch_size,
+ batch_alpha, batch_beta)
+ cur_alpha += (lr / k) * update_alpha
+ cur_beta += (lr / k) * update_beta
return cur_alpha, cur_beta
@@ -787,7 +700,7 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
arXiv preprint arxiv:1711.02283.
'''
- opt_alpha, opt_beta = sgd_entropic_regularization(M, reg, batch_size,
+ opt_alpha, opt_beta = sgd_entropic_regularization(a, b, M, reg, batch_size,
numItermax, lr)
pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) *
a[:, None] * b[None, :])
diff --git a/test/test_stochastic.py b/test/test_stochastic.py
index f315c88..0128317 100644
--- a/test/test_stochastic.py
+++ b/test/test_stochastic.py
@@ -97,7 +97,6 @@ def test_sag_asgd_sinkhorn():
x = rng.randn(n, 2)
u = ot.utils.unif(n)
- zero = np.zeros(n)
M = ot.dist(x, x)
G_asgd = ot.stochastic.solve_semi_dual_entropic(u, u, M, reg, "asgd",
@@ -108,13 +107,13 @@ def test_sag_asgd_sinkhorn():
# check constratints
np.testing.assert_allclose(
- zero, (G_sag - G_sinkhorn).sum(1), atol=1e-03) # cf convergence sag
+ G_sag.sum(1), G_sinkhorn.sum(1), atol=1e-03)
np.testing.assert_allclose(
- zero, (G_sag - G_sinkhorn).sum(0), atol=1e-03) # cf convergence sag
+ G_sag.sum(0), G_sinkhorn.sum(0), atol=1e-03)
np.testing.assert_allclose(
- zero, (G_asgd - G_sinkhorn).sum(1), atol=1e-03) # cf convergence asgd
+ G_asgd.sum(1), G_sinkhorn.sum(1), atol=1e-03)
np.testing.assert_allclose(
- zero, (G_asgd - G_sinkhorn).sum(0), atol=1e-03) # cf convergence asgd
+ G_asgd.sum(0), G_sinkhorn.sum(0), atol=1e-03)
np.testing.assert_allclose(
G_sag, G_sinkhorn, atol=1e-03) # cf convergence sag
np.testing.assert_allclose(
@@ -137,8 +136,8 @@ def test_stochastic_dual_sgd():
# test sgd
n = 10
reg = 1
- numItermax = 300000
- batch_size = 8
+ numItermax = 15000
+ batch_size = 10
rng = np.random.RandomState(0)
x = rng.randn(n, 2)
@@ -151,9 +150,9 @@ def test_stochastic_dual_sgd():
# check constratints
np.testing.assert_allclose(
- u, G.sum(1), atol=1e-02) # cf convergence sgd
+ u, G.sum(1), atol=1e-03) # cf convergence sgd
np.testing.assert_allclose(
- u, G.sum(0), atol=1e-02) # cf convergence sgd
+ u, G.sum(0), atol=1e-03) # cf convergence sgd
#############################################################################
@@ -168,13 +167,13 @@ def test_dual_sgd_sinkhorn():
# test all dual algorithms
n = 10
reg = 1
- nb_iter = 300000
- batch_size = 8
+ nb_iter = 150000
+ batch_size = 10
rng = np.random.RandomState(0)
+# Test uniform
x = rng.randn(n, 2)
u = ot.utils.unif(n)
- zero = np.zeros(n)
M = ot.dist(x, x)
G_sgd = ot.stochastic.solve_dual_entropic(u, u, M, reg, batch_size,
@@ -184,8 +183,33 @@ def test_dual_sgd_sinkhorn():
# check constratints
np.testing.assert_allclose(
- zero, (G_sgd - G_sinkhorn).sum(1), atol=1e-02) # cf convergence sgd
+ G_sgd.sum(1), G_sinkhorn.sum(1), atol=1e-03)
+ np.testing.assert_allclose(
+ G_sgd.sum(0), G_sinkhorn.sum(0), atol=1e-03)
+ np.testing.assert_allclose(
+ G_sgd, G_sinkhorn, atol=1e-03) # cf convergence sgd
+
+# Test gaussian
+ n = 30
+ reg = 1
+ batch_size = 30
+
+ a = ot.datasets.make_1D_gauss(n, 15, 5) # m= mean, s= std
+ b = ot.datasets.make_1D_gauss(n, 15, 5)
+ X_source = np.arange(n, dtype=np.float64)
+ Y_target = np.arange(n, dtype=np.float64)
+ M = ot.dist(X_source.reshape((n, 1)), Y_target.reshape((n, 1)))
+ M /= M.max()
+
+ G_sgd = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size,
+ numItermax=nb_iter)
+
+ G_sinkhorn = ot.sinkhorn(a, b, M, reg)
+
+ # check constratints
+ np.testing.assert_allclose(
+ G_sgd.sum(1), G_sinkhorn.sum(1), atol=1e-03)
np.testing.assert_allclose(
- zero, (G_sgd - G_sinkhorn).sum(0), atol=1e-02) # cf convergence sgd
+ G_sgd.sum(0), G_sinkhorn.sum(0), atol=1e-03)
np.testing.assert_allclose(
- G_sgd, G_sinkhorn, atol=1e-02) # cf convergence sgd
+ G_sgd, G_sinkhorn, atol=1e-03) # cf convergence sgd