summaryrefslogtreecommitdiff
path: root/ot
diff options
context:
space:
mode:
authorRémi Flamary <remi.flamary@gmail.com>2018-09-24 10:37:30 +0200
committerRémi Flamary <remi.flamary@gmail.com>2018-09-24 10:37:30 +0200
commit9fb56beed1ca54bf3913f6a9ea589173b2a5097f (patch)
treeedb1f325f78c6fe01daf7ae64caf99e6b162f4fc /ot
parent697bd55a152d6318e292cffdac2ec18ac4528d30 (diff)
parentc9b99df8fffec1dcc6802ef43b6192774817c5fb (diff)
Merge branch 'master' into prV0.5
Diffstat (limited to 'ot')
-rw-r--r--ot/bregman.py125
-rw-r--r--ot/da.py284
-rw-r--r--ot/stochastic.py179
3 files changed, 162 insertions, 426 deletions
diff --git a/ot/bregman.py b/ot/bregman.py
index c8e69ce..35e51f8 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -350,7 +350,6 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
np.exp(K, out=K)
# print(np.min(K))
- tmp = np.empty(K.shape, dtype=M.dtype)
tmp2 = np.empty(b.shape, dtype=M.dtype)
Kp = (1 / a).reshape(-1, 1) * K
@@ -359,6 +358,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
while (err > stopThr and cpt < numItermax):
uprev = u
vprev = v
+
KtransposeU = np.dot(K.T, u)
v = np.divide(b, KtransposeU)
u = 1. / np.dot(Kp, v)
@@ -379,11 +379,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
np.sum((v - vprev)**2) / np.sum((v)**2)
else:
- np.multiply(u.reshape(-1, 1), K, out=tmp)
- np.multiply(tmp, v.reshape(1, -1), out=tmp)
- np.sum(tmp, axis=0, out=tmp2)
- tmp2 -= b
- err = np.linalg.norm(tmp2)**2
+ # compute right marginal tmp2= (diag(u)Kdiag(v))^T1
+ np.einsum('i,ij,j->j', u, K, v, out=tmp2)
+ err = np.linalg.norm(tmp2 - b)**2 # violation of marginal
if log:
log['err'].append(err)
@@ -398,10 +396,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
log['v'] = v
if nbb: # return only loss
- res = np.zeros((nbb))
- for i in range(nbb):
- res[i] = np.sum(
- u[:, i].reshape((-1, 1)) * K * v[:, i].reshape((1, -1)) * M)
+ res = np.einsum('ik,ij,jk,ij->k', u, K, v, M)
if log:
return res, log
else:
@@ -924,6 +919,116 @@ def barycenter(A, M, reg, weights=None, numItermax=1000,
return geometricBar(weights, UKv)
+def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1e-9, stabThr=1e-30, verbose=False, log=False):
+ """Compute the entropic regularized wasserstein barycenter of distributions A
+ where A is a collection of 2D images.
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \mathbf{a} = arg\min_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
+
+ where :
+
+ - :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance (see ot.bregman.sinkhorn)
+ - :math:`\mathbf{a}_i` are training distributions (2D images) in the mast two dimensions of matrix :math:`\mathbf{A}`
+ - reg is the regularization strength scalar value
+
+ The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [21]_
+
+ Parameters
+ ----------
+ A : np.ndarray (n,w,h)
+ n distributions (2D images) of size w x h
+ reg : float
+ Regularization term >0
+ weights : np.ndarray (n,)
+ Weights of each image on the simplex (barycentric coodinates)
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ stabThr : float, optional
+ Stabilization threshold to avoid numerical precision issue
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ a : (w,h) ndarray
+ 2D Wasserstein barycenter
+ log : dict
+ log dictionary return only if log==True in parameters
+
+
+ References
+ ----------
+
+ .. [21] Solomon, J., De Goes, F., Peyré, G., Cuturi, M., Butscher, A., Nguyen, A. & Guibas, L. (2015).
+ Convolutional wasserstein distances: Efficient optimal transportation on geometric domains
+ ACM Transactions on Graphics (TOG), 34(4), 66
+
+
+ """
+
+ if weights is None:
+ weights = np.ones(A.shape[0]) / A.shape[0]
+ else:
+ assert(len(weights) == A.shape[0])
+
+ if log:
+ log = {'err': []}
+
+ b = np.zeros_like(A[0, :, :])
+ U = np.ones_like(A)
+ KV = np.ones_like(A)
+
+ cpt = 0
+ err = 1
+
+ # build the convolution operator
+ t = np.linspace(0, 1, A.shape[1])
+ [Y, X] = np.meshgrid(t, t)
+ xi1 = np.exp(-(X - Y)**2 / reg)
+
+ def K(x):
+ return np.dot(np.dot(xi1, x), xi1)
+
+ while (err > stopThr and cpt < numItermax):
+
+ bold = b
+ cpt = cpt + 1
+
+ b = np.zeros_like(A[0, :, :])
+ for r in range(A.shape[0]):
+ KV[r, :, :] = K(A[r, :, :] / np.maximum(stabThr, K(U[r, :, :])))
+ b += weights[r] * np.log(np.maximum(stabThr, U[r, :, :] * KV[r, :, :]))
+ b = np.exp(b)
+ for r in range(A.shape[0]):
+ U[r, :, :] = b / np.maximum(stabThr, KV[r, :, :])
+
+ if cpt % 10 == 1:
+ err = np.sum(np.abs(bold - b))
+ # log and verbose print
+ if log:
+ log['err'].append(err)
+
+ if verbose:
+ if cpt % 200 == 0:
+ print('{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
+
+ if log:
+ log['niter'] = cpt
+ log['U'] = U
+ return b, log
+ else:
+ return b
+
+
def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
stopThr=1e-3, verbose=False, log=False):
"""
diff --git a/ot/da.py b/ot/da.py
index 48b418f..bc09e3c 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -15,7 +15,7 @@ import scipy.linalg as linalg
from .bregman import sinkhorn
from .lp import emd
from .utils import unif, dist, kernel, cost_normalization
-from .utils import check_params, deprecated, BaseEstimator
+from .utils import check_params, BaseEstimator
from .optim import cg
from .optim import gcg
@@ -740,288 +740,6 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None,
return A, b
-@deprecated("The class OTDA is deprecated in 0.3.1 and will be "
- "removed in 0.5"
- "\n\tfor standard transport use class EMDTransport instead.")
-class OTDA(object):
-
- """Class for domain adaptation with optimal transport as proposed in [5]
-
-
- References
- ----------
-
- .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
- "Optimal Transport for Domain Adaptation," in IEEE Transactions on
- Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
-
- """
-
- def __init__(self, metric='sqeuclidean', norm=None):
- """ Class initialization"""
- self.xs = 0
- self.xt = 0
- self.G = 0
- self.metric = metric
- self.norm = norm
- self.computed = False
-
- def fit(self, xs, xt, ws=None, wt=None, max_iter=100000):
- """Fit domain adaptation between samples is xs and xt
- (with optional weights)"""
- self.xs = xs
- self.xt = xt
-
- if wt is None:
- wt = unif(xt.shape[0])
- if ws is None:
- ws = unif(xs.shape[0])
-
- self.ws = ws
- self.wt = wt
-
- self.M = dist(xs, xt, metric=self.metric)
- self.M = cost_normalization(self.M, self.norm)
- self.G = emd(ws, wt, self.M, max_iter)
- self.computed = True
-
- def interp(self, direction=1):
- """Barycentric interpolation for the source (1) or target (-1) samples
-
- This Barycentric interpolation solves for each source (resp target)
- sample xs (resp xt) the following optimization problem:
-
- .. math::
- arg\min_x \sum_i \gamma_{k,i} c(x,x_i^t)
-
- where k is the index of the sample in xs
-
- For the moment only squared euclidean distance is provided but more
- metric could be used in the future.
-
- """
- if direction > 0: # >0 then source to target
- G = self.G
- w = self.ws.reshape((self.xs.shape[0], 1))
- x = self.xt
- else:
- G = self.G.T
- w = self.wt.reshape((self.xt.shape[0], 1))
- x = self.xs
-
- if self.computed:
- if self.metric == 'sqeuclidean':
- return np.dot(G / w, x) # weighted mean
- else:
- print(
- "Warning, metric not handled yet, using weighted average")
- return np.dot(G / w, x) # weighted mean
- return None
- else:
- print("Warning, model not fitted yet, returning None")
- return None
-
- def predict(self, x, direction=1):
- """ Out of sample mapping using the formulation from [6]
-
- For each sample x to map, it finds the nearest source sample xs and
- map the samle x to the position xst+(x-xs) wher xst is the barycentric
- interpolation of source sample xs.
-
- References
- ----------
-
- .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
- Regularized discrete optimal transport. SIAM Journal on Imaging
- Sciences, 7(3), 1853-1882.
-
- """
- if direction > 0: # >0 then source to target
- xf = self.xt
- x0 = self.xs
- else:
- xf = self.xs
- x0 = self.xt
-
- D0 = dist(x, x0) # dist netween new samples an source
- idx = np.argmin(D0, 1) # closest one
- xf = self.interp(direction) # interp the source samples
- # aply the delta to the interpolation
- return xf[idx, :] + x - x0[idx, :]
-
-
-@deprecated("The class OTDA_sinkhorn is deprecated in 0.3.1 and will be"
- " removed in 0.5 \nUse class SinkhornTransport instead.")
-class OTDA_sinkhorn(OTDA):
-
- """Class for domain adaptation with optimal transport with entropic
- regularization
-
-
- """
-
- def fit(self, xs, xt, reg=1, ws=None, wt=None, **kwargs):
- """Fit regularized domain adaptation between samples is xs and xt
- (with optional weights)"""
- self.xs = xs
- self.xt = xt
-
- if wt is None:
- wt = unif(xt.shape[0])
- if ws is None:
- ws = unif(xs.shape[0])
-
- self.ws = ws
- self.wt = wt
-
- self.M = dist(xs, xt, metric=self.metric)
- self.M = cost_normalization(self.M, self.norm)
- self.G = sinkhorn(ws, wt, self.M, reg, **kwargs)
- self.computed = True
-
-
-@deprecated("The class OTDA_lpl1 is deprecated in 0.3.1 and will be"
- " removed in 0.5 \nUse class SinkhornLpl1Transport instead.")
-class OTDA_lpl1(OTDA):
-
- """Class for domain adaptation with optimal transport with entropic and
- group regularization"""
-
- def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, **kwargs):
- """Fit regularized domain adaptation between samples is xs and xt
- (with optional weights), See ot.da.sinkhorn_lpl1_mm for fit
- parameters"""
- self.xs = xs
- self.xt = xt
-
- if wt is None:
- wt = unif(xt.shape[0])
- if ws is None:
- ws = unif(xs.shape[0])
-
- self.ws = ws
- self.wt = wt
-
- self.M = dist(xs, xt, metric=self.metric)
- self.M = cost_normalization(self.M, self.norm)
- self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M, reg, eta, **kwargs)
- self.computed = True
-
-
-@deprecated("The class OTDA_l1L2 is deprecated in 0.3.1 and will be"
- " removed in 0.5 \nUse class SinkhornL1l2Transport instead.")
-class OTDA_l1l2(OTDA):
-
- """Class for domain adaptation with optimal transport with entropic
- and group lasso regularization"""
-
- def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, **kwargs):
- """Fit regularized domain adaptation between samples is xs and xt
- (with optional weights), See ot.da.sinkhorn_lpl1_gl for fit
- parameters"""
- self.xs = xs
- self.xt = xt
-
- if wt is None:
- wt = unif(xt.shape[0])
- if ws is None:
- ws = unif(xs.shape[0])
-
- self.ws = ws
- self.wt = wt
-
- self.M = dist(xs, xt, metric=self.metric)
- self.M = cost_normalization(self.M, self.norm)
- self.G = sinkhorn_l1l2_gl(ws, ys, wt, self.M, reg, eta, **kwargs)
- self.computed = True
-
-
-@deprecated("The class OTDA_mapping_linear is deprecated in 0.3.1 and will be"
- " removed in 0.5 \nUse class MappingTransport instead.")
-class OTDA_mapping_linear(OTDA):
-
- """Class for optimal transport with joint linear mapping estimation as in
- [8]
- """
-
- def __init__(self):
- """ Class initialization"""
-
- self.xs = 0
- self.xt = 0
- self.G = 0
- self.L = 0
- self.bias = False
- self.computed = False
- self.metric = 'sqeuclidean'
-
- def fit(self, xs, xt, mu=1, eta=1, bias=False, **kwargs):
- """ Fit domain adaptation between samples is xs and xt (with optional
- weights)"""
- self.xs = xs
- self.xt = xt
- self.bias = bias
-
- self.ws = unif(xs.shape[0])
- self.wt = unif(xt.shape[0])
-
- self.G, self.L = joint_OT_mapping_linear(
- xs, xt, mu=mu, eta=eta, bias=bias, **kwargs)
- self.computed = True
-
- def mapping(self):
- return lambda x: self.predict(x)
-
- def predict(self, x):
- """ Out of sample mapping estimated during the call to fit"""
- if self.computed:
- if self.bias:
- x = np.hstack((x, np.ones((x.shape[0], 1))))
- return x.dot(self.L) # aply the delta to the interpolation
- else:
- print("Warning, model not fitted yet, returning None")
- return None
-
-
-@deprecated("The class OTDA_mapping_kernel is deprecated in 0.3.1 and will be"
- " removed in 0.5 \nUse class MappingTransport instead.")
-class OTDA_mapping_kernel(OTDA_mapping_linear):
-
- """Class for optimal transport with joint nonlinear mapping
- estimation as in [8]"""
-
- def fit(self, xs, xt, mu=1, eta=1, bias=False, kerneltype='gaussian',
- sigma=1, **kwargs):
- """ Fit domain adaptation between samples is xs and xt """
- self.xs = xs
- self.xt = xt
- self.bias = bias
-
- self.ws = unif(xs.shape[0])
- self.wt = unif(xt.shape[0])
- self.kernel = kerneltype
- self.sigma = sigma
- self.kwargs = kwargs
-
- self.G, self.L = joint_OT_mapping_kernel(
- xs, xt, mu=mu, eta=eta, bias=bias, **kwargs)
- self.computed = True
-
- def predict(self, x):
- """ Out of sample mapping estimated during the call to fit"""
-
- if self.computed:
- K = kernel(
- x, self.xs, method=self.kernel, sigma=self.sigma,
- **self.kwargs)
- if self.bias:
- K = np.hstack((K, np.ones((x.shape[0], 1))))
- return K.dot(self.L)
- else:
- print("Warning, model not fitted yet, returning None")
- return None
-
-
def distribution_estimation_uniform(X):
"""estimates a uniform distribution from an array of samples X
diff --git a/ot/stochastic.py b/ot/stochastic.py
index 5e8206e..ec53015 100644
--- a/ot/stochastic.py
+++ b/ot/stochastic.py
@@ -435,8 +435,8 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
##############################################################################
-def batch_grad_dual_alpha(M, reg, alpha, beta, batch_size, batch_alpha,
- batch_beta):
+def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
+ batch_beta):
'''
Computes the partial gradient of F_\W_varepsilon
@@ -444,104 +444,31 @@ def batch_grad_dual_alpha(M, reg, alpha, beta, batch_size, batch_alpha,
..math:
\forall i in batch_alpha,
- grad_alpha_i = 1 * batch_size -
- sum_{j in batch_beta} exp((alpha_i + beta_j - M_{i,j})/reg)
+ grad_alpha_i = alpha_i * batch_size/len(beta) -
+ sum_{j in batch_beta} exp((alpha_i + beta_j - M_{i,j})/reg)
+ * a_i * b_j
- where :
- - M is the (ns,nt) metric cost matrix
- - alpha, beta are dual variables in R^ixR^J
- - reg is the regularization term
- - batch_alpha and batch_beta are list of index
-
- The algorithm used for solving the dual problem is the SGD algorithm
- as proposed in [19]_ [alg.1]
-
- Parameters
- ----------
-
- reg : float number,
- Regularization term > 0
- M : np.ndarray(ns, nt),
- cost matrix
- alpha : np.ndarray(ns,)
- dual variable
- beta : np.ndarray(nt,)
- dual variable
- batch_size : int number
- size of the batch
- batch_alpha : np.ndarray(bs,)
- batch of index of alpha
- batch_beta : np.ndarray(bs,)
- batch of index of beta
-
- Returns
- -------
-
- grad : np.ndarray(ns,)
- partial grad F in alpha
-
- Examples
- --------
-
- >>> n_source = 7
- >>> n_target = 4
- >>> reg = 1
- >>> numItermax = 20000
- >>> lr = 0.1
- >>> batch_size = 3
- >>> log = True
- >>> a = ot.utils.unif(n_source)
- >>> b = ot.utils.unif(n_target)
- >>> rng = np.random.RandomState(0)
- >>> X_source = rng.randn(n_source, 2)
- >>> Y_target = rng.randn(n_target, 2)
- >>> M = ot.dist(X_source, Y_target)
- >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
- batch_size,
- numItermax, lr, log)
- >>> print(log['alpha'], log['beta'])
- >>> print(sgd_dual_pi)
-
- References
- ----------
-
- [Seguy et al., 2018] :
- International Conference on Learning Representation (2018),
- arXiv preprint arxiv:1711.02283.
- '''
-
- grad_alpha = np.zeros(batch_size)
- grad_alpha[:] = batch_size
- for j in batch_beta:
- grad_alpha -= np.exp((alpha[batch_alpha] + beta[j] -
- M[batch_alpha, j]) / reg)
- return grad_alpha
-
-
-def batch_grad_dual_beta(M, reg, alpha, beta, batch_size, batch_alpha,
- batch_beta):
- '''
- Computes the partial gradient of F_\W_varepsilon
-
- Compute the partial gradient of the dual problem:
-
- ..math:
- \forall j in batch_beta,
- grad_beta_j = 1 * batch_size -
+ \forall j in batch_alpha,
+ grad_beta_j = beta_j * batch_size/len(alpha) -
sum_{i in batch_alpha} exp((alpha_i + beta_j - M_{i,j})/reg)
-
+ * a_i * b_j
where :
- M is the (ns,nt) metric cost matrix
- alpha, beta are dual variables in R^ixR^J
- reg is the regularization term
- - batch_alpha and batch_beta are list of index
+ - batch_alpha and batch_beta are lists of index
+ - a and b are source and target weights (sum to 1)
+
The algorithm used for solving the dual problem is the SGD algorithm
as proposed in [19]_ [alg.1]
Parameters
----------
-
+ a : np.ndarray(ns,),
+ source measure
+ b : np.ndarray(nt,),
+ target measure
M : np.ndarray(ns, nt),
cost matrix
reg : float number,
@@ -561,7 +488,7 @@ def batch_grad_dual_beta(M, reg, alpha, beta, batch_size, batch_alpha,
-------
grad : np.ndarray(ns,)
- partial grad F in beta
+ partial grad F
Examples
--------
@@ -591,19 +518,22 @@ def batch_grad_dual_beta(M, reg, alpha, beta, batch_size, batch_alpha,
[Seguy et al., 2018] :
International Conference on Learning Representation (2018),
arXiv preprint arxiv:1711.02283.
-
'''
- grad_beta = np.zeros(batch_size)
- grad_beta[:] = batch_size
- for i in batch_alpha:
- grad_beta -= np.exp((alpha[i] +
- beta[batch_beta] - M[i, batch_beta]) / reg)
- return grad_beta
+ G = - (np.exp((alpha[batch_alpha, None] + beta[None, batch_beta] -
+ M[batch_alpha, :][:, batch_beta]) / reg) *
+ a[batch_alpha, None] * b[None, batch_beta])
+ grad_beta = np.zeros(np.shape(M)[1])
+ grad_alpha = np.zeros(np.shape(M)[0])
+ grad_beta[batch_beta] = (b[batch_beta] * len(batch_alpha) / np.shape(M)[0] +
+ G.sum(0))
+ grad_alpha[batch_alpha] = (a[batch_alpha] * len(batch_beta) /
+ np.shape(M)[1] + G.sum(1))
+
+ return grad_alpha, grad_beta
-def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr,
- alternate=True):
+def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
'''
Compute the sgd algorithm to solve the regularized discrete measures
optimal transport dual problem
@@ -623,7 +553,10 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr,
Parameters
----------
-
+ a : np.ndarray(ns,),
+ source measure
+ b : np.ndarray(nt,),
+ target measure
M : np.ndarray(ns, nt),
cost matrix
reg : float number,
@@ -634,8 +567,6 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr,
number of iteration
lr : float number
learning rate
- alternate : bool, optional
- alternating algorithm
Returns
-------
@@ -662,8 +593,8 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr,
>>> Y_target = rng.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
>>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
- batch_size,
- numItermax, lr, log)
+ batch_size,
+ numItermax, lr, log)
>>> print(log['alpha'], log['beta'])
>>> print(sgd_dual_pi)
@@ -677,35 +608,17 @@ def sgd_entropic_regularization(M, reg, batch_size, numItermax, lr,
n_source = np.shape(M)[0]
n_target = np.shape(M)[1]
- cur_alpha = np.random.randn(n_source)
- cur_beta = np.random.randn(n_target)
- if alternate:
- for cur_iter in range(numItermax):
- k = np.sqrt(cur_iter + 1)
- batch_alpha = np.random.choice(n_source, batch_size, replace=False)
- batch_beta = np.random.choice(n_target, batch_size, replace=False)
- grad_F_alpha = batch_grad_dual_alpha(M, reg, cur_alpha, cur_beta,
- batch_size, batch_alpha,
- batch_beta)
- cur_alpha[batch_alpha] += (lr / k) * grad_F_alpha
- grad_F_beta = batch_grad_dual_beta(M, reg, cur_alpha, cur_beta,
- batch_size, batch_alpha,
- batch_beta)
- cur_beta[batch_beta] += (lr / k) * grad_F_beta
-
- else:
- for cur_iter in range(numItermax):
- k = np.sqrt(cur_iter + 1)
- batch_alpha = np.random.choice(n_source, batch_size, replace=False)
- batch_beta = np.random.choice(n_target, batch_size, replace=False)
- grad_F_alpha = batch_grad_dual_alpha(M, reg, cur_alpha, cur_beta,
- batch_size, batch_alpha,
- batch_beta)
- grad_F_beta = batch_grad_dual_beta(M, reg, cur_alpha, cur_beta,
- batch_size, batch_alpha,
- batch_beta)
- cur_alpha[batch_alpha] += (lr / k) * grad_F_alpha
- cur_beta[batch_beta] += (lr / k) * grad_F_beta
+ cur_alpha = np.zeros(n_source)
+ cur_beta = np.zeros(n_target)
+ for cur_iter in range(numItermax):
+ k = np.sqrt(cur_iter + 1)
+ batch_alpha = np.random.choice(n_source, batch_size, replace=False)
+ batch_beta = np.random.choice(n_target, batch_size, replace=False)
+ update_alpha, update_beta = batch_grad_dual(a, b, M, reg, cur_alpha,
+ cur_beta, batch_size,
+ batch_alpha, batch_beta)
+ cur_alpha[batch_alpha] += (lr / k) * update_alpha[batch_alpha]
+ cur_beta[batch_beta] += (lr / k) * update_beta[batch_beta]
return cur_alpha, cur_beta
@@ -787,7 +700,7 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
arXiv preprint arxiv:1711.02283.
'''
- opt_alpha, opt_beta = sgd_entropic_regularization(M, reg, batch_size,
+ opt_alpha, opt_beta = sgd_entropic_regularization(a, b, M, reg, batch_size,
numItermax, lr)
pi = (np.exp((opt_alpha[:, None] + opt_beta[None, :] - M[:, :]) / reg) *
a[:, None] * b[None, :])