debug sgd dual

author: Kilian Fatras <kilianfatras@dhcp-206-12-53-20.eduroam.wireless.ubc.ca> 2018-08-28 17:24:07 -0700
committer: Kilian Fatras <kilianfatras@dhcp-206-12-53-20.eduroam.wireless.ubc.ca> 2018-08-28 17:24:07 -0700
commit: e885d78cc9608d791a9d1561d2f4e0b783ba0761 (patch)
tree: e03a553873f110d1b8e0f15cc6f9248c916a405c /ot/stochastic.py
parent: 77b68901c5415ddc5d9ab5215a6fa97723de3de9 (diff)
1 files changed, 437 insertions, 0 deletions
diff --git a/ot/stochastic.py b/ot/stochastic.py
index 0788f61..f3d1bb5 100644
--- a/ot/stochastic.py
+++ b/ot/stochastic.py
@@ -435,6 +435,443 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
 ##############################################################################
 
 
+# Author: Kilian Fatras <kilian.fatras@gmail.com>
+#
+# License: MIT License
+
+import numpy as np
+
+
+##############################################################################
+# Optimization toolbox for SEMI - DUAL problems
+##############################################################################
+
+
+def coordinate_grad_semi_dual(b, M, reg, beta, i):
+    '''
+    Compute the coordinate gradient update for regularized discrete
+        distributions for (i, :)
+
+    The function computes the gradient of the semi dual problem:
+
+    .. math::
+        \W_\varepsilon(a, b) = \max_\v \sum_i (\sum_j v_j * b_j
+            - \reg log(\sum_j exp((v_j - M_{i,j})/reg) * b_j)) * a_i
+
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - v is a dual variable in R^J
+    - reg is the regularization term
+    - a and b are source and target weights (sum to 1)
+
+    The algorithm used for solving the problem is the ASGD & SAG algorithms
+    as proposed in [18]_ [alg.1 & alg.2]
+
+
+    Parameters
+    ----------
+
+    b : np.ndarray(nt,),
+        target measure
+    M : np.ndarray(ns, nt),
+        cost matrix
+    reg : float nu,
+        Regularization term > 0
+    v : np.ndarray(nt,),
+        optimization vector
+    i : number int,
+        picked number i
+
+    Returns
+    -------
+
+    coordinate gradient : np.ndarray(nt,)
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 300000
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> method = "ASGD"
+    >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+                                                      method, numItermax)
+    >>> print(asgd_pi)
+
+    References
+    ----------
+
+    [Genevay et al., 2016] :
+                    Stochastic Optimization for Large-scale Optimal Transport,
+                     Advances in Neural Information Processing Systems (2016),
+                      arXiv preprint arxiv:1605.08527.
+
+    '''
+
+    r = M[i, :] - beta
+    exp_beta = np.exp(-r / reg) * b
+    khi = exp_beta / (np.sum(exp_beta))
+    return b - khi
+
+
+def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
+    '''
+    Compute the SAG algorithm to solve the regularized discrete measures
+        optimal transport max problem
+
+    The function solves the following optimization problem:
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma \geq 0
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term
+        :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are source and target weights (sum to 1)
+    The algorithm used for solving the problem is the SAG algorithm
+    as proposed in [18]_ [alg.1]
+
+
+    Parameters
+    ----------
+
+    a : np.ndarray(ns,),
+        source measure
+    b : np.ndarray(nt,),
+        target measure
+    M : np.ndarray(ns, nt),
+        cost matrix
+    reg : float number,
+        Regularization term > 0
+    numItermax : int number
+        number of iteration
+    lr : float number
+        learning rate
+
+    Returns
+    -------
+
+    v : np.ndarray(nt,)
+        dual variable
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 300000
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> method = "ASGD"
+    >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+                                                      method, numItermax)
+    >>> print(asgd_pi)
+
+    References
+    ----------
+
+    [Genevay et al., 2016] :
+                    Stochastic Optimization for Large-scale Optimal Transport,
+                     Advances in Neural Information Processing Systems (2016),
+                      arXiv preprint arxiv:1605.08527.
+    '''
+
+    if lr is None:
+        lr = 1. / max(a / reg)
+    n_source = np.shape(M)[0]
+    n_target = np.shape(M)[1]
+    cur_beta = np.zeros(n_target)
+    stored_gradient = np.zeros((n_source, n_target))
+    sum_stored_gradient = np.zeros(n_target)
+    for _ in range(numItermax):
+        i = np.random.randint(n_source)
+        cur_coord_grad = a[i] * coordinate_grad_semi_dual(b, M, reg,
+                                                          cur_beta, i)
+        sum_stored_gradient += (cur_coord_grad - stored_gradient[i])
+        stored_gradient[i] = cur_coord_grad
+        cur_beta += lr * (1. / n_source) * sum_stored_gradient
+    return cur_beta
+
+
+def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
+    '''
+    Compute the ASGD algorithm to solve the regularized semi contibous measures
+        optimal transport max problem
+
+    The function solves the following optimization problem:
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma \geq 0
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term
+        :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are source and target weights (sum to 1)
+    The algorithm used for solving the problem is the ASGD algorithm
+    as proposed in [18]_ [alg.2]
+
+
+    Parameters
+    ----------
+
+    b : np.ndarray(nt,),
+        target measure
+    M : np.ndarray(ns, nt),
+        cost matrix
+    reg : float number,
+        Regularization term > 0
+    numItermax : int number
+        number of iteration
+    lr : float number
+        learning rate
+
+
+    Returns
+    -------
+
+    ave_v : np.ndarray(nt,)
+        optimization vector
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 300000
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> method = "ASGD"
+    >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+                                                      method, numItermax)
+    >>> print(asgd_pi)
+
+    References
+    ----------
+
+    [Genevay et al., 2016] :
+                    Stochastic Optimization for Large-scale Optimal Transport,
+                     Advances in Neural Information Processing Systems (2016),
+                      arXiv preprint arxiv:1605.08527.
+    '''
+
+    if lr is None:
+        lr = 1. / max(a / reg)
+    n_source = np.shape(M)[0]
+    n_target = np.shape(M)[1]
+    cur_beta = np.zeros(n_target)
+    ave_beta = np.zeros(n_target)
+    for cur_iter in range(numItermax):
+        k = cur_iter + 1
+        i = np.random.randint(n_source)
+        cur_coord_grad = coordinate_grad_semi_dual(b, M, reg, cur_beta, i)
+        cur_beta += (lr / np.sqrt(k)) * cur_coord_grad
+        ave_beta = (1. / k) * cur_beta + (1 - 1. / k) * ave_beta
+    return ave_beta
+
+
+def c_transform_entropic(b, M, reg, beta):
+    '''
+    The goal is to recover u from the c-transform.
+
+    The function computes the c_transform of a dual variable from the other
+    dual variable:
+
+    .. math::
+        u = v^{c,reg} = -reg \sum_j exp((v - M)/reg) b_j
+
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - u, v are dual variables in R^IxR^J
+    - reg is the regularization term
+
+    It is used to recover an optimal u from optimal v solving the semi dual
+    problem, see Proposition 2.1 of [18]_
+
+
+    Parameters
+    ----------
+
+    b : np.ndarray(nt,)
+        target measure
+    M : np.ndarray(ns, nt)
+        cost matrix
+    reg : float
+        regularization term > 0
+    v : np.ndarray(nt,)
+        dual variable
+
+    Returns
+    -------
+
+    u : np.ndarray(ns,)
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 300000
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> method = "ASGD"
+    >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+                                                      method, numItermax)
+    >>> print(asgd_pi)
+
+    References
+    ----------
+
+    [Genevay et al., 2016] :
+                    Stochastic Optimization for Large-scale Optimal Transport,
+                     Advances in Neural Information Processing Systems (2016),
+                      arXiv preprint arxiv:1605.08527.
+    '''
+
+    n_source = np.shape(M)[0]
+    alpha = np.zeros(n_source)
+    for i in range(n_source):
+        r = M[i, :] - beta
+        min_r = np.min(r)
+        exp_beta = np.exp(-(r - min_r) / reg) * b
+        alpha[i] = min_r - reg * np.log(np.sum(exp_beta))
+    return alpha
+
+
+def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
+                                log=False):
+    '''
+    Compute the transportation matrix to solve the regularized discrete
+        measures optimal transport max problem
+
+    The function solves the following optimization problem:
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma \geq 0
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term
+        :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are source and target weights (sum to 1)
+    The algorithm used for solving the problem is the SAG or ASGD algorithms
+    as proposed in [18]_
+
+
+    Parameters
+    ----------
+
+    a : np.ndarray(ns,),
+        source measure
+    b : np.ndarray(nt,),
+        target measure
+    M : np.ndarray(ns, nt),
+        cost matrix
+    reg : float number,
+        Regularization term > 0
+    methode : str,
+        used method (SAG or ASGD)
+    numItermax : int number
+        number of iteration
+    lr : float number
+        learning rate
+    n_source : int number
+        size of the source measure
+    n_target : int number
+        size of the target measure
+    log : bool, optional
+        record log if True
+
+    Returns
+    -------
+
+    pi : np.ndarray(ns, nt)
+        transportation matrix
+    log : dict
+        log dictionary return only if log==True in parameters
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 300000
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> method = "ASGD"
+    >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
+                                                      method, numItermax)
+    >>> print(asgd_pi)
+
+    References
+    ----------
+
+    [Genevay et al., 2016] :
+                    Stochastic Optimization for Large-scale Optimal Transport,
+                     Advances in Neural Information Processing Systems (2016),
+                      arXiv preprint arxiv:1605.08527.
+    '''
+
+    if method.lower() == "sag":
+        opt_beta = sag_entropic_transport(a, b, M, reg, numItermax, lr)
+    elif method.lower() == "asgd":
+        opt_beta = averaged_sgd_entropic_transport(a, b, M, reg, numItermax, lr)
+    else:
+        print("Please, select your method between SAG and ASGD")
+        return None
+
+    opt_alpha = c_transform_entropic(b, M, reg, opt_beta)
+    pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) *
+          a[:, None] * b[None, :])
+
+    if log:
+        log = {}
+        log['alpha'] = opt_alpha
+        log['beta'] = opt_beta
+        return pi, log
+    else:
+        return pi
+
+
+##############################################################################
+# Optimization toolbox for DUAL problems
+##############################################################################
+
+
 def batch_grad_dual(M, reg, a, b, alpha, beta, batch_size, batch_alpha,
                     batch_beta):
     '''
author	Kilian Fatras <kilianfatras@dhcp-206-12-53-20.eduroam.wireless.ubc.ca>	2018-08-28 17:24:07 -0700
committer	Kilian Fatras <kilianfatras@dhcp-206-12-53-20.eduroam.wireless.ubc.ca>	2018-08-28 17:24:07 -0700
commit	e885d78cc9608d791a9d1561d2f4e0b783ba0761 (patch)
tree	e03a553873f110d1b8e0f15cc6f9248c916a405c /ot/stochastic.py
parent	77b68901c5415ddc5d9ab5215a6fa97723de3de9 (diff)