summaryrefslogtreecommitdiff
path: root/ot
diff options
context:
space:
mode:
authorKilian Fatras <kilianfatras@dhcp-206-12-53-20.eduroam.wireless.ubc.ca>2018-08-28 17:24:07 -0700
committerKilian Fatras <kilianfatras@dhcp-206-12-53-20.eduroam.wireless.ubc.ca>2018-08-28 17:24:07 -0700
commite885d78cc9608d791a9d1561d2f4e0b783ba0761 (patch)
treee03a553873f110d1b8e0f15cc6f9248c916a405c /ot
parent77b68901c5415ddc5d9ab5215a6fa97723de3de9 (diff)
debug sgd dual
Diffstat (limited to 'ot')
-rw-r--r--ot/datasets.py60
-rw-r--r--ot/gromov.py1
-rw-r--r--ot/lp/__init__.py2
-rw-r--r--ot/stochastic.py437
-rw-r--r--ot/utils.py20
5 files changed, 505 insertions, 15 deletions
diff --git a/ot/datasets.py b/ot/datasets.py
index e4fe118..362a89b 100644
--- a/ot/datasets.py
+++ b/ot/datasets.py
@@ -9,9 +9,10 @@ Simple example datasets for OT
import numpy as np
import scipy as sp
+from .utils import check_random_state, deprecated
-def get_1D_gauss(n, m, s):
+def make_1D_gauss(n, m, s):
"""return a 1D histogram for a gaussian distribution (n bins, mean m and std s)
Parameters
@@ -36,19 +37,29 @@ def get_1D_gauss(n, m, s):
return h / h.sum()
-def get_2D_samples_gauss(n, m, sigma):
+@deprecated()
+def get_1D_gauss(n, m, sigma, random_state=None):
+ """ Deprecated see make_1D_gauss """
+ return make_1D_gauss(n, m, sigma, random_state=None)
+
+
+def make_2D_samples_gauss(n, m, sigma, random_state=None):
"""return n samples drawn from 2D gaussian N(m,sigma)
Parameters
----------
n : int
- number of bins in the histogram
+ number of samples to make
m : np.array (2,)
mean value of the gaussian distribution
sigma : np.array (2,2)
covariance matrix of the gaussian distribution
-
+ random_state : int, RandomState instance or None, optional (default=None)
+ If int, random_state is the seed used by the random number generator;
+ If RandomState instance, random_state is the random number generator;
+ If None, the random number generator is the RandomState instance used
+ by `np.random`.
Returns
-------
@@ -56,17 +67,25 @@ def get_2D_samples_gauss(n, m, sigma):
n samples drawn from N(m,sigma)
"""
+
+ generator = check_random_state(random_state)
if np.isscalar(sigma):
sigma = np.array([sigma, ])
if len(sigma) > 1:
P = sp.linalg.sqrtm(sigma)
- res = np.random.randn(n, 2).dot(P) + m
+ res = generator.randn(n, 2).dot(P) + m
else:
- res = np.random.randn(n, 2) * np.sqrt(sigma) + m
+ res = generator.randn(n, 2) * np.sqrt(sigma) + m
return res
-def get_data_classif(dataset, n, nz=.5, theta=0, **kwargs):
+@deprecated()
+def get_2D_samples_gauss(n, m, sigma, random_state=None):
+ """ Deprecated see make_2D_samples_gauss """
+ return make_2D_samples_gauss(n, m, sigma, random_state=None)
+
+
+def make_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs):
""" dataset generation for classification problems
Parameters
@@ -78,7 +97,11 @@ def get_data_classif(dataset, n, nz=.5, theta=0, **kwargs):
number of training samples
nz : float
noise level (>0)
-
+ random_state : int, RandomState instance or None, optional (default=None)
+ If int, random_state is the seed used by the random number generator;
+ If RandomState instance, random_state is the random number generator;
+ If None, the random number generator is the RandomState instance used
+ by `np.random`.
Returns
-------
@@ -88,6 +111,9 @@ def get_data_classif(dataset, n, nz=.5, theta=0, **kwargs):
labels of the samples
"""
+
+ generator = check_random_state(random_state)
+
if dataset.lower() == '3gauss':
y = np.floor((np.arange(n) * 1.0 / n * 3)) + 1
x = np.zeros((n, 2))
@@ -99,8 +125,8 @@ def get_data_classif(dataset, n, nz=.5, theta=0, **kwargs):
x[y == 3, 0] = 1.
x[y == 3, 1] = 0
- x[y != 3, :] += 1.5 * nz * np.random.randn(sum(y != 3), 2)
- x[y == 3, :] += 2 * nz * np.random.randn(sum(y == 3), 2)
+ x[y != 3, :] += 1.5 * nz * generator.randn(sum(y != 3), 2)
+ x[y == 3, :] += 2 * nz * generator.randn(sum(y == 3), 2)
elif dataset.lower() == '3gauss2':
y = np.floor((np.arange(n) * 1.0 / n * 3)) + 1
@@ -114,8 +140,8 @@ def get_data_classif(dataset, n, nz=.5, theta=0, **kwargs):
x[y == 3, 0] = 2.
x[y == 3, 1] = 0
- x[y != 3, :] += nz * np.random.randn(sum(y != 3), 2)
- x[y == 3, :] += 2 * nz * np.random.randn(sum(y == 3), 2)
+ x[y != 3, :] += nz * generator.randn(sum(y != 3), 2)
+ x[y == 3, :] += 2 * nz * generator.randn(sum(y == 3), 2)
elif dataset.lower() == 'gaussrot':
rot = np.array(
@@ -127,8 +153,8 @@ def get_data_classif(dataset, n, nz=.5, theta=0, **kwargs):
n2 = np.sum(y == 2)
x = np.zeros((n, 2))
- x[y == 1, :] = get_2D_samples_gauss(n1, m1, nz)
- x[y == 2, :] = get_2D_samples_gauss(n2, m2, nz)
+ x[y == 1, :] = get_2D_samples_gauss(n1, m1, nz, random_state=generator)
+ x[y == 2, :] = get_2D_samples_gauss(n2, m2, nz, random_state=generator)
x = x.dot(rot)
@@ -138,3 +164,9 @@ def get_data_classif(dataset, n, nz=.5, theta=0, **kwargs):
print("unknown dataset")
return x, y.astype(int)
+
+
+@deprecated()
+def get_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs):
+ """ Deprecated see make_data_classif """
+ return make_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs)
diff --git a/ot/gromov.py b/ot/gromov.py
index 65b2e29..0278e99 100644
--- a/ot/gromov.py
+++ b/ot/gromov.py
@@ -2,7 +2,6 @@
# -*- coding: utf-8 -*-
"""
Gromov-Wasserstein transport method
-===================================
"""
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 5dda82a..4c0d170 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -18,6 +18,8 @@ from .emd_wrap import emd_c, check_result
from ..utils import parmap
from .cvx import barycenter
+__all__=['emd', 'emd2', 'barycenter', 'cvx']
+
def emd(a, b, M, numItermax=100000, log=False):
"""Solves the Earth Movers distance problem and returns the OT matrix
diff --git a/ot/stochastic.py b/ot/stochastic.py
index 0788f61..f3d1bb5 100644
--- a/ot/stochastic.py
+++ b/ot/stochastic.py
@@ -435,6 +435,443 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
##############################################################################
+# Author: Kilian Fatras <kilian.fatras@gmail.com>
+#
+# License: MIT License
+
+import numpy as np
+
+
+##############################################################################
+# Optimization toolbox for SEMI - DUAL problems
+##############################################################################
+
+
+def coordinate_grad_semi_dual(b, M, reg, beta, i):
+ '''
+ Compute the coordinate gradient update for regularized discrete
+ distributions for (i, :)
+
+ The function computes the gradient of the semi dual problem:
+
+ .. math::
+ \W_\varepsilon(a, b) = \max_\v \sum_i (\sum_j v_j * b_j
+ - \reg log(\sum_j exp((v_j - M_{i,j})/reg) * b_j)) * a_i
+
+ where :
+ - M is the (ns,nt) metric cost matrix
+ - v is a dual variable in R^J
+ - reg is the regularization term
+ - a and b are source and target weights (sum to 1)
+
+ The algorithm used for solving the problem is the ASGD & SAG algorithms
+ as proposed in [18]_ [alg.1 & alg.2]
+
+
+ Parameters
+ ----------
+
+ b : np.ndarray(nt,),
+ target measure
+ M : np.ndarray(ns, nt),
+ cost matrix
+ reg : float nu,
+ Regularization term > 0
+ v : np.ndarray(nt,),
+ optimization vector
+ i : number int,
+ picked number i
+
+ Returns
+ -------
+
+ coordinate gradient : np.ndarray(nt,)
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 300000
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> method = "ASGD"
+ >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+ method, numItermax)
+ >>> print(asgd_pi)
+
+ References
+ ----------
+
+ [Genevay et al., 2016] :
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
+
+ '''
+
+ r = M[i, :] - beta
+ exp_beta = np.exp(-r / reg) * b
+ khi = exp_beta / (np.sum(exp_beta))
+ return b - khi
+
+
+def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
+ '''
+ Compute the SAG algorithm to solve the regularized discrete measures
+ optimal transport max problem
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ s.t. \gamma 1 = a
+ \gamma^T 1= b
+ \gamma \geq 0
+ where :
+ - M is the (ns,nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights (sum to 1)
+ The algorithm used for solving the problem is the SAG algorithm
+ as proposed in [18]_ [alg.1]
+
+
+ Parameters
+ ----------
+
+ a : np.ndarray(ns,),
+ source measure
+ b : np.ndarray(nt,),
+ target measure
+ M : np.ndarray(ns, nt),
+ cost matrix
+ reg : float number,
+ Regularization term > 0
+ numItermax : int number
+ number of iteration
+ lr : float number
+ learning rate
+
+ Returns
+ -------
+
+ v : np.ndarray(nt,)
+ dual variable
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 300000
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> method = "ASGD"
+ >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+ method, numItermax)
+ >>> print(asgd_pi)
+
+ References
+ ----------
+
+ [Genevay et al., 2016] :
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
+ '''
+
+ if lr is None:
+ lr = 1. / max(a / reg)
+ n_source = np.shape(M)[0]
+ n_target = np.shape(M)[1]
+ cur_beta = np.zeros(n_target)
+ stored_gradient = np.zeros((n_source, n_target))
+ sum_stored_gradient = np.zeros(n_target)
+ for _ in range(numItermax):
+ i = np.random.randint(n_source)
+ cur_coord_grad = a[i] * coordinate_grad_semi_dual(b, M, reg,
+ cur_beta, i)
+ sum_stored_gradient += (cur_coord_grad - stored_gradient[i])
+ stored_gradient[i] = cur_coord_grad
+ cur_beta += lr * (1. / n_source) * sum_stored_gradient
+ return cur_beta
+
+
+def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
+ '''
+ Compute the ASGD algorithm to solve the regularized semi contibous measures
+ optimal transport max problem
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ s.t. \gamma 1 = a
+ \gamma^T 1= b
+ \gamma \geq 0
+ where :
+ - M is the (ns,nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights (sum to 1)
+ The algorithm used for solving the problem is the ASGD algorithm
+ as proposed in [18]_ [alg.2]
+
+
+ Parameters
+ ----------
+
+ b : np.ndarray(nt,),
+ target measure
+ M : np.ndarray(ns, nt),
+ cost matrix
+ reg : float number,
+ Regularization term > 0
+ numItermax : int number
+ number of iteration
+ lr : float number
+ learning rate
+
+
+ Returns
+ -------
+
+ ave_v : np.ndarray(nt,)
+ optimization vector
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 300000
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> method = "ASGD"
+ >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+ method, numItermax)
+ >>> print(asgd_pi)
+
+ References
+ ----------
+
+ [Genevay et al., 2016] :
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
+ '''
+
+ if lr is None:
+ lr = 1. / max(a / reg)
+ n_source = np.shape(M)[0]
+ n_target = np.shape(M)[1]
+ cur_beta = np.zeros(n_target)
+ ave_beta = np.zeros(n_target)
+ for cur_iter in range(numItermax):
+ k = cur_iter + 1
+ i = np.random.randint(n_source)
+ cur_coord_grad = coordinate_grad_semi_dual(b, M, reg, cur_beta, i)
+ cur_beta += (lr / np.sqrt(k)) * cur_coord_grad
+ ave_beta = (1. / k) * cur_beta + (1 - 1. / k) * ave_beta
+ return ave_beta
+
+
+def c_transform_entropic(b, M, reg, beta):
+ '''
+ The goal is to recover u from the c-transform.
+
+ The function computes the c_transform of a dual variable from the other
+ dual variable:
+
+ .. math::
+ u = v^{c,reg} = -reg \sum_j exp((v - M)/reg) b_j
+
+ where :
+ - M is the (ns,nt) metric cost matrix
+ - u, v are dual variables in R^IxR^J
+ - reg is the regularization term
+
+ It is used to recover an optimal u from optimal v solving the semi dual
+ problem, see Proposition 2.1 of [18]_
+
+
+ Parameters
+ ----------
+
+ b : np.ndarray(nt,)
+ target measure
+ M : np.ndarray(ns, nt)
+ cost matrix
+ reg : float
+ regularization term > 0
+ v : np.ndarray(nt,)
+ dual variable
+
+ Returns
+ -------
+
+ u : np.ndarray(ns,)
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 300000
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> method = "ASGD"
+ >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+ method, numItermax)
+ >>> print(asgd_pi)
+
+ References
+ ----------
+
+ [Genevay et al., 2016] :
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
+ '''
+
+ n_source = np.shape(M)[0]
+ alpha = np.zeros(n_source)
+ for i in range(n_source):
+ r = M[i, :] - beta
+ min_r = np.min(r)
+ exp_beta = np.exp(-(r - min_r) / reg) * b
+ alpha[i] = min_r - reg * np.log(np.sum(exp_beta))
+ return alpha
+
+
+def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
+ log=False):
+ '''
+ Compute the transportation matrix to solve the regularized discrete
+ measures optimal transport max problem
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ s.t. \gamma 1 = a
+ \gamma^T 1= b
+ \gamma \geq 0
+ where :
+ - M is the (ns,nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights (sum to 1)
+ The algorithm used for solving the problem is the SAG or ASGD algorithms
+ as proposed in [18]_
+
+
+ Parameters
+ ----------
+
+ a : np.ndarray(ns,),
+ source measure
+ b : np.ndarray(nt,),
+ target measure
+ M : np.ndarray(ns, nt),
+ cost matrix
+ reg : float number,
+ Regularization term > 0
+ methode : str,
+ used method (SAG or ASGD)
+ numItermax : int number
+ number of iteration
+ lr : float number
+ learning rate
+ n_source : int number
+ size of the source measure
+ n_target : int number
+ size of the target measure
+ log : bool, optional
+ record log if True
+
+ Returns
+ -------
+
+ pi : np.ndarray(ns, nt)
+ transportation matrix
+ log : dict
+ log dictionary return only if log==True in parameters
+
+ Examples
+ --------
+
+ >>> n_source = 7
+ >>> n_target = 4
+ >>> reg = 1
+ >>> numItermax = 300000
+ >>> a = ot.utils.unif(n_source)
+ >>> b = ot.utils.unif(n_target)
+ >>> rng = np.random.RandomState(0)
+ >>> X_source = rng.randn(n_source, 2)
+ >>> Y_target = rng.randn(n_target, 2)
+ >>> M = ot.dist(X_source, Y_target)
+ >>> method = "ASGD"
+ >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+ method, numItermax)
+ >>> print(asgd_pi)
+
+ References
+ ----------
+
+ [Genevay et al., 2016] :
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
+ '''
+
+ if method.lower() == "sag":
+ opt_beta = sag_entropic_transport(a, b, M, reg, numItermax, lr)
+ elif method.lower() == "asgd":
+ opt_beta = averaged_sgd_entropic_transport(a, b, M, reg, numItermax, lr)
+ else:
+ print("Please, select your method between SAG and ASGD")
+ return None
+
+ opt_alpha = c_transform_entropic(b, M, reg, opt_beta)
+ pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) *
+ a[:, None] * b[None, :])
+
+ if log:
+ log = {}
+ log['alpha'] = opt_alpha
+ log['beta'] = opt_beta
+ return pi, log
+ else:
+ return pi
+
+
+##############################################################################
+# Optimization toolbox for DUAL problems
+##############################################################################
+
+
def batch_grad_dual(M, reg, a, b, alpha, beta, batch_size, batch_alpha,
batch_beta):
'''
diff --git a/ot/utils.py b/ot/utils.py
index 17983f2..7dac283 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -225,6 +225,26 @@ def check_params(**kwargs):
return check
+def check_random_state(seed):
+ """Turn seed into a np.random.RandomState instance
+ Parameters
+ ----------
+ seed : None | int | instance of RandomState
+ If seed is None, return the RandomState singleton used by np.random.
+ If seed is an int, return a new RandomState instance seeded with seed.
+ If seed is already a RandomState instance, return it.
+ Otherwise raise ValueError.
+ """
+ if seed is None or seed is np.random:
+ return np.random.mtrand._rand
+ if isinstance(seed, (int, np.integer)):
+ return np.random.RandomState(seed)
+ if isinstance(seed, np.random.RandomState):
+ return seed
+ raise ValueError('{} cannot be used to seed a numpy.random.RandomState'
+ ' instance'.format(seed))
+
+
class deprecated(object):
"""Decorator to mark a function or class as deprecated.