From 74cfe5ac77c3e964a85ef90c11d8ebffa16ddcfe Mon Sep 17 00:00:00 2001
From: Kilian Fatras <kilianfatras@dhcp-206-12-53-210.eduroam.wireless.ubc.ca>
Date: Mon, 18 Jun 2018 17:56:28 -0700
Subject: add sgd

---
 ot/stochastic.py | 468 +++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 423 insertions(+), 45 deletions(-)

(limited to 'ot')

diff --git a/ot/stochastic.py b/ot/stochastic.py
index 9912223..31c99be 100644
--- a/ot/stochastic.py
+++ b/ot/stochastic.py
@@ -5,7 +5,10 @@
 import numpy as np
 
 
-def coordinate_gradient(b, M, reg, v, i):
+##############################################################################
+# Optimization toolbox for SEMI - DUAL problem
+##############################################################################
+def coordinate_gradient(b, M, reg, beta, i):
     '''
     Compute the coordinate gradient update for regularized discrete
         distributions for (i, :)
@@ -59,7 +62,7 @@ def coordinate_gradient(b, M, reg, v, i):
     >>> Y_target = rng.randn(n_target, 2)
     >>> M = ot.dist(X_source, Y_target)
     >>> method = "ASGD"
-    >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg,
+    >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
                                                             method, numItermax,
                                                             lr)
     >>> print(asgd_pi)
@@ -74,9 +77,9 @@ def coordinate_gradient(b, M, reg, v, i):
 
     '''
 
-    r = M[i, :] - v
-    exp_v = np.exp(-r / reg) * b
-    khi = exp_v / (np.sum(exp_v))
+    r = M[i, :] - beta
+    exp_beta = np.exp(-r/reg) * b
+    khi = exp_beta/(np.sum(exp_beta))
     return b - khi
 
 
@@ -137,7 +140,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1):
     >>> Y_target = rng.randn(n_target, 2)
     >>> M = ot.dist(X_source, Y_target)
     >>> method = "SAG"
-    >>> sag_pi = stochastic.transportation_matrix_entropic(a, b, M, reg,
+    >>> sag_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
                                                             method, numItermax,
                                                             lr)
     >>> print(asgd_pi)
@@ -153,16 +156,16 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=0.1):
 
     n_source = np.shape(M)[0]
     n_target = np.shape(M)[1]
-    v = np.zeros(n_target)
+    cur_beta = np.zeros(n_target)
     stored_gradient = np.zeros((n_source, n_target))
     sum_stored_gradient = np.zeros(n_target)
     for _ in range(numItermax):
         i = np.random.randint(n_source)
-        cur_coord_grad = a[i] * coordinate_gradient(b, M, reg, v, i)
+        cur_coord_grad = a[i] * coordinate_gradient(b, M, reg, cur_beta, i)
         sum_stored_gradient += (cur_coord_grad - stored_gradient[i])
         stored_gradient[i] = cur_coord_grad
-        v += lr * (1. / n_source) * sum_stored_gradient
-    return v
+        cur_beta += lr * (1./n_source) * sum_stored_gradient
+    return cur_beta
 
 
 def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1):
@@ -170,19 +173,19 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1):
     Compute the ASGD algorithm to solve the regularized semi contibous measures
         optimal transport max problem
 
-        The function solves the following optimization problem:
-        .. math::
-            \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
-            s.t. \gamma 1 = a
-                 \gamma^T 1= b
-                 \gamma \geq 0
-        where :
-        - M is the (ns,nt) metric cost matrix
-        - :math:`\Omega` is the entropic regularization term
-            :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
-        - a and b are source and target weights (sum to 1)
-        The algorithm used for solving the problem is the ASGD algorithm
-        as proposed in [18]_ [alg.2]
+    The function solves the following optimization problem:
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma \geq 0
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term
+        :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are source and target weights (sum to 1)
+    The algorithm used for solving the problem is the ASGD algorithm
+    as proposed in [18]_ [alg.2]
 
 
     Parameters
@@ -221,7 +224,7 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1):
     >>> Y_target = rng.randn(n_target, 2)
     >>> M = ot.dist(X_source, Y_target)
     >>> method = "ASGD"
-    >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg,
+    >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
                                                             method, numItermax,
                                                             lr)
     >>> print(asgd_pi)
@@ -237,18 +240,18 @@ def averaged_sgd_entropic_transport(b, M, reg, numItermax=300000, lr=1):
 
     n_source = np.shape(M)[0]
     n_target = np.shape(M)[1]
-    cur_v = np.zeros(n_target)
-    ave_v = np.zeros(n_target)
+    cur_beta = np.zeros(n_target)
+    ave_beta = np.zeros(n_target)
     for cur_iter in range(numItermax):
         k = cur_iter + 1
         i = np.random.randint(n_source)
-        cur_coord_grad = coordinate_gradient(b, M, reg, cur_v, i)
-        cur_v += (lr / np.sqrt(k)) * cur_coord_grad
-        ave_v = (1. / k) * cur_v + (1 - 1. / k) * ave_v
-    return ave_v
+        cur_coord_grad = coordinate_gradient(b, M, reg, cur_beta, i)
+        cur_beta += (lr/np.sqrt(k)) * cur_coord_grad
+        ave_beta = (1./k) * cur_beta + (1 - 1./k) * ave_beta
+    return ave_beta
 
 
-def c_transform_entropic(b, M, reg, v):
+def c_transform_entropic(b, M, reg, beta):
     '''
     The goal is to recover u from the c-transform.
 
@@ -298,7 +301,7 @@ def c_transform_entropic(b, M, reg, v):
     >>> Y_target = rng.randn(n_target, 2)
     >>> M = ot.dist(X_source, Y_target)
     >>> method = "ASGD"
-    >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg,
+    >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
                                                             method, numItermax,
                                                             lr)
     >>> print(asgd_pi)
@@ -313,16 +316,18 @@ def c_transform_entropic(b, M, reg, v):
     '''
 
     n_source = np.shape(M)[0]
-    u = np.zeros(n_source)
+    n_target = np.shape(M)[1]
+    alpha = np.zeros(n_source)
     for i in range(n_source):
-        r = M[i, :] - v
-        exp_v = np.exp(-r / reg) * b
-        u[i] = - reg * np.log(np.sum(exp_v))
-    return u
+        r = M[i, :] - beta
+        min_r = np.min(r)
+        exp_beta = np.exp(-(r - min_r)/reg) * b
+        alpha[i] = min_r - reg * np.log(np.sum(exp_beta))
+    return alpha
 
 
-def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000,
-                                   lr=0.1):
+def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=0.1,
+                                log=False):
     '''
     Compute the transportation matrix to solve the regularized discrete
         measures optimal transport max problem
@@ -363,12 +368,16 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000,
         size of the source measure
     n_target : int number
         size of the target measure
+    log : bool, optional
+        record log if True
 
     Returns
     -------
 
     pi : np.ndarray(ns, nt)
         transportation matrix
+    log : dict
+        log dictionary return only if log==True in parameters
 
     Examples
     --------
@@ -385,7 +394,7 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000,
     >>> Y_target = rng.randn(n_target, 2)
     >>> M = ot.dist(X_source, Y_target)
     >>> method = "ASGD"
-    >>> asgd_pi = stochastic.transportation_matrix_entropic(a, b, M, reg,
+    >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
                                                             method, numItermax,
                                                             lr)
     >>> print(asgd_pi)
@@ -398,15 +407,384 @@ def transportation_matrix_entropic(a, b, M, reg, method, numItermax=10000,
                      Advances in Neural Information Processing Systems (2016),
                       arXiv preprint arxiv:1605.08527.
     '''
+    n_source = 7
+    n_target = 4
     if method.lower() == "sag":
-        opt_v = sag_entropic_transport(a, b, M, reg, numItermax, lr)
+        opt_beta = sag_entropic_transport(a, b, M, reg, numItermax, lr)
     elif method.lower() == "asgd":
-        opt_v = averaged_sgd_entropic_transport(b, M, reg, numItermax, lr)
+        opt_beta = averaged_sgd_entropic_transport(b, M, reg, numItermax, lr)
     else:
         print("Please, select your method between SAG and ASGD")
         return None
 
-    opt_u = c_transform_entropic(b, M, reg, opt_v)
-    pi = (np.exp((opt_u[:, None] + opt_v[None, :] - M[:, :]) / reg) *
+    opt_alpha = c_transform_entropic(b, M, reg, opt_beta)
+    pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :])/reg) *
           a[:, None] * b[None, :])
-    return pi
+
+    if log:
+        log = {}
+        log['alpha'] = opt_alpha
+        log['beta'] = opt_beta
+        return pi, log
+    else:
+        return pi
+
+
+##############################################################################
+# Optimization toolbox for DUAL problem
+##############################################################################
+
+
+def grad_dF_dalpha(M, reg, alpha, beta, batch_size, batch_alpha, batch_beta):
+    '''
+    Computes the partial gradient of F_\W_varepsilon
+
+    Compute the partial gradient of the dual problem:
+    ..Math:
+        \forall i in batch_alpha,
+            grad_alpha_i = 1 * batch_size -
+                    sum_{j in batch_beta} exp((alpha_i + beta_j - M_{i,j})/reg)
+
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - alpha, beta are dual variables in R^ixR^J
+    - reg is the regularization term
+    - batch_alpha and batch_beta are list of index
+
+    The algorithm used for solving the dual problem is the SGD algorithm
+    as proposed in [19]_ [alg.1]
+
+    Parameters
+    ----------
+
+    reg : float number,
+        Regularization term > 0
+    M : np.ndarray(ns, nt),
+        cost matrix
+    alpha : np.ndarray(ns,)
+        dual variable
+    beta : np.ndarray(nt,)
+        dual variable
+    batch_size : int number
+        size of the batch
+    batch_alpha : np.ndarray(bs,)
+        batch of index of alpha
+    batch_beta : np.ndarray(bs,)
+        batch of index of beta
+
+    Returns
+    -------
+
+    grad : np.ndarray(ns,)
+        partial grad F in alpha
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 20000
+    >>> lr = 0.1
+    >>> batch_size = 3
+    >>> log = True
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+                                                            batch_size,
+                                                            numItermax, lr, log)
+    >>> print(log['alpha'], log['beta'])
+    >>> print(sgd_dual_pi)
+
+    References
+    ----------
+
+    [Seguy et al., 2018] :
+                    International Conference on Learning Representation (2018),
+                      arXiv preprint arxiv:1711.02283.
+    '''
+
+    grad_alpha = np.zeros(batch_size)
+    grad_alpha[:] = batch_size
+    for j in batch_beta:
+        grad_alpha -= np.exp((alpha[batch_alpha] + beta[j]
+                              - M[batch_alpha, j])/reg)
+    return grad_alpha
+
+
+def grad_dF_dbeta(M, reg, alpha, beta, batch_size, batch_alpha, batch_beta):
+    '''
+    Computes the partial gradient of F_\W_varepsilon
+
+    Compute the partial gradient of the dual problem:
+    ..Math:
+        \forall j in batch_beta,
+            grad_beta_j = 1 * batch_size -
+                sum_{i in batch_alpha} exp((alpha_i + beta_j - M_{i,j})/reg)
+
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - alpha, beta are dual variables in R^ixR^J
+    - reg is the regularization term
+    - batch_alpha and batch_beta are list of index
+
+    The algorithm used for solving the dual problem is the SGD algorithm
+    as proposed in [19]_ [alg.1]
+
+    Parameters
+    ----------
+
+    M : np.ndarray(ns, nt),
+        cost matrix
+    reg : float number,
+        Regularization term > 0
+    alpha : np.ndarray(ns,)
+        dual variable
+    beta : np.ndarray(nt,)
+        dual variable
+    batch_size : int number
+        size of the batch
+    batch_alpha : np.ndarray(bs,)
+        batch of index of alpha
+    batch_beta : np.ndarray(bs,)
+        batch of index of beta
+
+    Returns
+    -------
+
+    grad : np.ndarray(ns,)
+        partial grad F in beta
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 20000
+    >>> lr = 0.1
+    >>> batch_size = 3
+    >>> log = True
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+                                                            batch_size,
+                                                            numItermax, lr, log)
+    >>> print(log['alpha'], log['beta'])
+    >>> print(sgd_dual_pi)
+
+    References
+    ----------
+
+    [Seguy et al., 2018] :
+                    International Conference on Learning Representation (2018),
+                      arXiv preprint arxiv:1711.02283.
+
+    '''
+
+    grad_beta = np.zeros(batch_size)
+    grad_beta[:] = batch_size
+    for i in batch_alpha:
+        grad_beta -= np.exp((alpha[i] +
+                             beta[batch_beta] - M[i, batch_beta])/reg)
+    return grad_beta
+
+
+def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr,
+                                alternate=True):
+    '''
+    Compute the sgd algorithm to solve the regularized discrete measures
+        optimal transport dual problem
+
+    The function solves the following optimization problem:
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma \geq 0
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term
+        :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are source and target weights (sum to 1)
+
+    Parameters
+    ----------
+
+    M : np.ndarray(ns, nt),
+        cost matrix
+    reg : float number,
+        Regularization term > 0
+    batch_size : int number
+        size of the batch
+    numItermax : int number
+        number of iteration
+    lr : float number
+        learning rate
+    alternate : bool, optional
+        alternating algorithm
+
+    Returns
+    -------
+
+    alpha : np.ndarray(ns,)
+        dual variable
+    beta : np.ndarray(nt,)
+        dual variable
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 20000
+    >>> lr = 0.1
+    >>> batch_size = 3
+    >>> log = True
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+                                                            batch_size,
+                                                            numItermax, lr, log)
+    >>> print(log['alpha'], log['beta'])
+    >>> print(sgd_dual_pi)
+
+    References
+    ----------
+
+    [Seguy et al., 2018] :
+                    International Conference on Learning Representation (2018),
+                      arXiv preprint arxiv:1711.02283.
+    '''
+
+    n_source = np.shape(M)[0]
+    n_target = np.shape(M)[1]
+    cur_alpha = np.random.randn(n_source)
+    cur_beta = np.random.randn(n_target)
+    if alternate:
+        for cur_iter in range(numItermax):
+            k = np.sqrt(cur_iter + 1)
+            batch_alpha = np.random.choice(n_source, batch_size, replace=False)
+            batch_beta = np.random.choice(n_target, batch_size, replace=False)
+            grad_F_alpha = grad_dF_dalpha(M, reg, cur_alpha, cur_beta,
+                                          batch_size, batch_alpha, batch_beta)
+            cur_alpha[batch_alpha] += (lr/k) * grad_F_alpha
+            grad_F_beta = grad_dF_dbeta(M, reg, cur_alpha, cur_beta,
+                                        batch_size, batch_alpha, batch_beta)
+            cur_beta[batch_beta] += (lr/k) * grad_F_beta
+
+    else:
+        for cur_iter in range(numItermax):
+            k = np.sqrt(cur_iter + 1)
+            batch_alpha = np.random.choice(n_source, batch_size, replace=False)
+            batch_beta = np.random.choice(n_target, batch_size, replace=False)
+            grad_F_alpha = grad_dF_dalpha(M, reg, cur_alpha, cur_beta,
+                                          batch_size, batch_alpha, batch_beta)
+            grad_F_beta = grad_dF_dbeta(M, reg, cur_alpha, cur_beta,
+                                        batch_size, batch_alpha, batch_beta)
+            cur_alpha[batch_alpha] += (lr/k) * grad_F_alpha
+            cur_beta[batch_beta] += (lr/k) * grad_F_beta
+
+    return cur_alpha, cur_beta
+
+
+def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
+                        log=False):
+    '''
+    Compute the transportation matrix to solve the regularized discrete measures
+        optimal transport dual problem
+
+    The function solves the following optimization problem:
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma \geq 0
+    where :
+    - M is the (ns,nt) metric cost matrix
+    - :math:`\Omega` is the entropic regularization term
+        :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+    - a and b are source and target weights (sum to 1)
+
+    Parameters
+    ----------
+
+    a : np.ndarray(ns,),
+        source measure
+    b : np.ndarray(nt,),
+        target measure
+    M : np.ndarray(ns, nt),
+        cost matrix
+    reg : float number,
+        Regularization term > 0
+    batch_size : int number
+        size of the batch
+    numItermax : int number
+        number of iteration
+    lr : float number
+        learning rate
+    log : bool, optional
+        record log if True
+
+    Returns
+    -------
+
+    pi : np.ndarray(ns, nt)
+        transportation matrix
+    log : dict
+        log dictionary return only if log==True in parameters
+
+    Examples
+    --------
+
+    >>> n_source = 7
+    >>> n_target = 4
+    >>> reg = 1
+    >>> numItermax = 20000
+    >>> lr = 0.1
+    >>> batch_size = 3
+    >>> log = True
+    >>> a = ot.utils.unif(n_source)
+    >>> b = ot.utils.unif(n_target)
+    >>> rng = np.random.RandomState(0)
+    >>> X_source = rng.randn(n_source, 2)
+    >>> Y_target = rng.randn(n_target, 2)
+    >>> M = ot.dist(X_source, Y_target)
+    >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
+                                                            batch_size,
+                                                            numItermax, lr, log)
+    >>> print(log['alpha'], log['beta'])
+    >>> print(sgd_dual_pi)
+
+    References
+    ----------
+
+    [Seguy et al., 2018] :
+                    International Conference on Learning Representation (2018),
+                      arXiv preprint arxiv:1711.02283.
+    '''
+
+    opt_alpha, opt_beta = sgd_entropic_regularization(M, reg, batch_size,
+                                                      numItermax, lr)
+    pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :])/reg) *
+          a[:, None] * b[None, :])
+    if log:
+        log = {}
+        log['alpha'] = opt_alpha
+        log['beta'] = opt_beta
+        return pi, log
+    else:
+        return pi
-- 
cgit v1.2.3