diff options
author | Nicolas Courty <ncourty@irisa.fr> | 2017-09-01 01:25:02 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-09-01 01:25:02 +0200 |
commit | 986f46ddde3ce2f550cb56f66620df377326423d (patch) | |
tree | 7fcff22d8a53d16b5b36fe395172fdc9a2c7e44d /ot | |
parent | bc68cc3e8b23ad7d542518ba8ffa665094d57663 (diff) | |
parent | 16697047eff9326a0ecb483317c13a854a3d3a71 (diff) |
Merge branch 'master' into gromov
Diffstat (limited to 'ot')
-rw-r--r-- | ot/da.py | 1030 | ||||
-rw-r--r-- | ot/lp/EMD.h | 7 | ||||
-rw-r--r-- | ot/lp/EMD_wrapper.cpp | 7 | ||||
-rw-r--r-- | ot/lp/__init__.py | 42 | ||||
-rw-r--r-- | ot/lp/emd_wrap.pyx | 89 | ||||
-rw-r--r-- | ot/utils.py | 273 |
6 files changed, 1318 insertions, 130 deletions
@@ -10,21 +10,27 @@ Domain adaptation with optimal transport # License: MIT License import numpy as np + from .bregman import sinkhorn from .lp import emd -from .utils import unif, dist, kernel +from .utils import unif, dist, kernel, cost_normalization +from .utils import check_params, deprecated, BaseEstimator from .optim import cg from .optim import gcg -def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, numInnerItermax=200, stopInnerThr=1e-9, verbose=False, log=False): +def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, + numInnerItermax=200, stopInnerThr=1e-9, verbose=False, + log=False): """ - Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization + Solve the entropic regularization optimal transport problem with nonconvex + group lasso regularization The function solves the following optimization problem: .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ \eta \Omega_g(\gamma) + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma) + + \eta \Omega_g(\gamma) s.t. \gamma 1 = a @@ -34,11 +40,16 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, numInnerIte where : - M is the (ns,nt) metric cost matrix - - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - :math:`\Omega_g` is the group lasso regulaization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` where :math:`\mathcal{I}_c` are the index of samples from class c in the source domain. + - :math:`\Omega_e` is the entropic regularization term + :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega_g` is the group lasso regulaization term + :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` + where :math:`\mathcal{I}_c` are the index of samples from class c + in the source domain. - a and b are source and target weights (sum to 1) - The algorithm used for solving the problem is the generalised conditional gradient as proposed in [5]_ [7]_ + The algorithm used for solving the problem is the generalised conditional + gradient as proposed in [5]_ [7]_ Parameters @@ -78,8 +89,13 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, numInnerIte References ---------- - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 - .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567. + .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE + Transactions on Pattern Analysis and Machine Intelligence , + vol.PP, no.99, pp.1-1 + .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence + and applications. arXiv preprint arXiv:1510.06567. See Also -------- @@ -114,14 +130,18 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, numInnerIte return transp -def sinkhorn_l1l2_gl(a, labels_a, b, M, reg, eta=0.1, numItermax=10, numInnerItermax=200, stopInnerThr=1e-9, verbose=False, log=False): +def sinkhorn_l1l2_gl(a, labels_a, b, M, reg, eta=0.1, numItermax=10, + numInnerItermax=200, stopInnerThr=1e-9, verbose=False, + log=False): """ - Solve the entropic regularization optimal transport problem with group lasso regularization + Solve the entropic regularization optimal transport problem with group + lasso regularization The function solves the following optimization problem: .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ \eta \Omega_g(\gamma) + \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ + \eta \Omega_g(\gamma) s.t. \gamma 1 = a @@ -131,11 +151,16 @@ def sinkhorn_l1l2_gl(a, labels_a, b, M, reg, eta=0.1, numItermax=10, numInnerIte where : - M is the (ns,nt) metric cost matrix - - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - :math:`\Omega_g` is the group lasso regulaization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^2` where :math:`\mathcal{I}_c` are the index of samples from class c in the source domain. + - :math:`\Omega_e` is the entropic regularization term + :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega_g` is the group lasso regulaization term + :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^2` + where :math:`\mathcal{I}_c` are the index of samples from class + c in the source domain. - a and b are source and target weights (sum to 1) - The algorithm used for solving the problem is the generalised conditional gradient as proposed in [5]_ [7]_ + The algorithm used for solving the problem is the generalised conditional + gradient as proposed in [5]_ [7]_ Parameters @@ -175,8 +200,12 @@ def sinkhorn_l1l2_gl(a, labels_a, b, M, reg, eta=0.1, numItermax=10, numInnerIte References ---------- - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 - .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567. + .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE Transactions + on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 + .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence and + applications. arXiv preprint arXiv:1510.06567. See Also -------- @@ -203,16 +232,22 @@ def sinkhorn_l1l2_gl(a, labels_a, b, M, reg, eta=0.1, numItermax=10, numInnerIte W[labels_a == lab, i] = temp / n return W - return gcg(a, b, M, reg, eta, f, df, G0=None, numItermax=numItermax, numInnerItermax=numInnerItermax, stopThr=stopInnerThr, verbose=verbose, log=log) + return gcg(a, b, M, reg, eta, f, df, G0=None, numItermax=numItermax, + numInnerItermax=numInnerItermax, stopThr=stopInnerThr, + verbose=verbose, log=log) -def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False, verbose2=False, numItermax=100, numInnerItermax=10, stopInnerThr=1e-6, stopThr=1e-5, log=False, **kwargs): +def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False, + verbose2=False, numItermax=100, numInnerItermax=10, + stopInnerThr=1e-6, stopThr=1e-5, log=False, + **kwargs): """Joint OT and linear mapping estimation as proposed in [8] The function solves the following optimization problem: .. math:: - \min_{\gamma,L}\quad \|L(X_s) -n_s\gamma X_t\|^2_F + \mu<\gamma,M>_F + \eta \|L -I\|^2_F + \min_{\gamma,L}\quad \|L(X_s) -n_s\gamma X_t\|^2_F + + \mu<\gamma,M>_F + \eta \|L -I\|^2_F s.t. \gamma 1 = a @@ -221,8 +256,10 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False, \gamma\geq 0 where : - - M is the (ns,nt) squared euclidean cost matrix between samples in Xs and Xt (scaled by ns) - - :math:`L` is a dxd linear operator that approximates the barycentric mapping + - M is the (ns,nt) squared euclidean cost matrix between samples in + Xs and Xt (scaled by ns) + - :math:`L` is a dxd linear operator that approximates the barycentric + mapping - :math:`I` is the identity matrix (neutral linear mapping) - a and b are uniform source and target weights @@ -277,7 +314,9 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False, References ---------- - .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard, "Mapping estimation for discrete optimal transport", Neural Information Processing Systems (NIPS), 2016. + .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard, + "Mapping estimation for discrete optimal transport", + Neural Information Processing Systems (NIPS), 2016. See Also -------- @@ -384,13 +423,18 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False, return G, L -def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian', sigma=1, bias=False, verbose=False, verbose2=False, numItermax=100, numInnerItermax=10, stopInnerThr=1e-6, stopThr=1e-5, log=False, **kwargs): +def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian', + sigma=1, bias=False, verbose=False, verbose2=False, + numItermax=100, numInnerItermax=10, + stopInnerThr=1e-6, stopThr=1e-5, log=False, + **kwargs): """Joint OT and nonlinear mapping estimation with kernels as proposed in [8] The function solves the following optimization problem: .. math:: - \min_{\gamma,L\in\mathcal{H}}\quad \|L(X_s) -n_s\gamma X_t\|^2_F + \mu<\gamma,M>_F + \eta \|L\|^2_\mathcal{H} + \min_{\gamma,L\in\mathcal{H}}\quad \|L(X_s) - + n_s\gamma X_t\|^2_F + \mu<\gamma,M>_F + \eta \|L\|^2_\mathcal{H} s.t. \gamma 1 = a @@ -399,8 +443,10 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian', sigm \gamma\geq 0 where : - - M is the (ns,nt) squared euclidean cost matrix between samples in Xs and Xt (scaled by ns) - - :math:`L` is a ns x d linear operator on a kernel matrix that approximates the barycentric mapping + - M is the (ns,nt) squared euclidean cost matrix between samples in + Xs and Xt (scaled by ns) + - :math:`L` is a ns x d linear operator on a kernel matrix that + approximates the barycentric mapping - a and b are uniform source and target weights The problem consist in solving jointly an optimal transport matrix @@ -458,7 +504,9 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian', sigm References ---------- - .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard, "Mapping estimation for discrete optimal transport", Neural Information Processing Systems (NIPS), 2016. + .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard, + "Mapping estimation for discrete optimal transport", + Neural Information Processing Systems (NIPS), 2016. See Also -------- @@ -585,6 +633,9 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian', sigm return G, L +@deprecated("The class OTDA is deprecated in 0.3.1 and will be " + "removed in 0.5" + "\n\tfor standard transport use class EMDTransport instead.") class OTDA(object): """Class for domain adaptation with optimal transport as proposed in [5] @@ -593,20 +644,24 @@ class OTDA(object): References ---------- - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 + .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE Transactions on + Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 """ - def __init__(self, metric='sqeuclidean'): + def __init__(self, metric='sqeuclidean', norm=None): """ Class initialization""" self.xs = 0 self.xt = 0 self.G = 0 self.metric = metric + self.norm = norm self.computed = False - def fit(self, xs, xt, ws=None, wt=None, norm=None): - """ Fit domain adaptation between samples is xs and xt (with optional weights)""" + def fit(self, xs, xt, ws=None, wt=None, max_iter=100000): + """Fit domain adaptation between samples is xs and xt + (with optional weights)""" self.xs = xs self.xt = xt @@ -619,8 +674,8 @@ class OTDA(object): self.wt = wt self.M = dist(xs, xt, metric=self.metric) - self.normalizeM(norm) - self.G = emd(ws, wt, self.M) + self.M = cost_normalization(self.M, self.norm) + self.G = emd(ws, wt, self.M, max_iter) self.computed = True def interp(self, direction=1): @@ -669,7 +724,9 @@ class OTDA(object): References ---------- - .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014). Regularized discrete optimal transport. SIAM Journal on Imaging Sciences, 7(3), 1853-1882. + .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014). + Regularized discrete optimal transport. SIAM Journal on Imaging + Sciences, 7(3), 1853-1882. """ if direction > 0: # >0 then source to target @@ -685,33 +742,20 @@ class OTDA(object): # aply the delta to the interpolation return xf[idx, :] + x - x0[idx, :] - def normalizeM(self, norm): - """ Apply normalization to the loss matrix - - - Parameters - ---------- - norm : str - type of normalization from 'median','max','log','loglog' - - """ - if norm == "median": - self.M /= float(np.median(self.M)) - elif norm == "max": - self.M /= float(np.max(self.M)) - elif norm == "log": - self.M = np.log(1 + self.M) - elif norm == "loglog": - self.M = np.log(1 + np.log(1 + self.M)) +@deprecated("The class OTDA_sinkhorn is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class SinkhornTransport instead.") +class OTDA_sinkhorn(OTDA): + """Class for domain adaptation with optimal transport with entropic + regularization -class OTDA_sinkhorn(OTDA): - """Class for domain adaptation with optimal transport with entropic regularization""" + """ - def fit(self, xs, xt, reg=1, ws=None, wt=None, norm=None, **kwargs): - """ Fit regularized domain adaptation between samples is xs and xt (with optional weights)""" + def fit(self, xs, xt, reg=1, ws=None, wt=None, **kwargs): + """Fit regularized domain adaptation between samples is xs and xt + (with optional weights)""" self.xs = xs self.xt = xt @@ -724,17 +768,22 @@ class OTDA_sinkhorn(OTDA): self.wt = wt self.M = dist(xs, xt, metric=self.metric) - self.normalizeM(norm) + self.M = cost_normalization(self.M, self.norm) self.G = sinkhorn(ws, wt, self.M, reg, **kwargs) self.computed = True +@deprecated("The class OTDA_lpl1 is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class SinkhornLpl1Transport instead.") class OTDA_lpl1(OTDA): - """Class for domain adaptation with optimal transport with entropic and group regularization""" + """Class for domain adaptation with optimal transport with entropic and + group regularization""" - def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None, **kwargs): - """ Fit regularized domain adaptation between samples is xs and xt (with optional weights), See ot.da.sinkhorn_lpl1_mm for fit parameters""" + def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, **kwargs): + """Fit regularized domain adaptation between samples is xs and xt + (with optional weights), See ot.da.sinkhorn_lpl1_mm for fit + parameters""" self.xs = xs self.xt = xt @@ -747,17 +796,22 @@ class OTDA_lpl1(OTDA): self.wt = wt self.M = dist(xs, xt, metric=self.metric) - self.normalizeM(norm) + self.M = cost_normalization(self.M, self.norm) self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M, reg, eta, **kwargs) self.computed = True +@deprecated("The class OTDA_l1L2 is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class SinkhornL1l2Transport instead.") class OTDA_l1l2(OTDA): - """Class for domain adaptation with optimal transport with entropic and group lasso regularization""" + """Class for domain adaptation with optimal transport with entropic + and group lasso regularization""" - def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, norm=None, **kwargs): - """ Fit regularized domain adaptation between samples is xs and xt (with optional weights), See ot.da.sinkhorn_lpl1_gl for fit parameters""" + def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, **kwargs): + """Fit regularized domain adaptation between samples is xs and xt + (with optional weights), See ot.da.sinkhorn_lpl1_gl for fit + parameters""" self.xs = xs self.xt = xt @@ -770,14 +824,18 @@ class OTDA_l1l2(OTDA): self.wt = wt self.M = dist(xs, xt, metric=self.metric) - self.normalizeM(norm) + self.M = cost_normalization(self.M, self.norm) self.G = sinkhorn_l1l2_gl(ws, ys, wt, self.M, reg, eta, **kwargs) self.computed = True +@deprecated("The class OTDA_mapping_linear is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class MappingTransport instead.") class OTDA_mapping_linear(OTDA): - """Class for optimal transport with joint linear mapping estimation as in [8]""" + """Class for optimal transport with joint linear mapping estimation as in + [8] + """ def __init__(self): """ Class initialization""" @@ -818,11 +876,15 @@ class OTDA_mapping_linear(OTDA): return None +@deprecated("The class OTDA_mapping_kernel is deprecated in 0.3.1 and will be" + " removed in 0.5 \nUse class MappingTransport instead.") class OTDA_mapping_kernel(OTDA_mapping_linear): - """Class for optimal transport with joint nonlinear mapping estimation as in [8]""" + """Class for optimal transport with joint nonlinear mapping + estimation as in [8]""" - def fit(self, xs, xt, mu=1, eta=1, bias=False, kerneltype='gaussian', sigma=1, **kwargs): + def fit(self, xs, xt, mu=1, eta=1, bias=False, kerneltype='gaussian', + sigma=1, **kwargs): """ Fit domain adaptation between samples is xs and xt """ self.xs = xs self.xt = xt @@ -843,10 +905,838 @@ class OTDA_mapping_kernel(OTDA_mapping_linear): if self.computed: K = kernel( - x, self.xs, method=self.kernel, sigma=self.sigma, **self.kwargs) + x, self.xs, method=self.kernel, sigma=self.sigma, + **self.kwargs) if self.bias: K = np.hstack((K, np.ones((x.shape[0], 1)))) return K.dot(self.L) else: print("Warning, model not fitted yet, returning None") return None + + +def distribution_estimation_uniform(X): + """estimates a uniform distribution from an array of samples X + + Parameters + ---------- + X : array-like, shape (n_samples, n_features) + The array of samples + + Returns + ------- + mu : array-like, shape (n_samples,) + The uniform distribution estimated from X + """ + + return unif(X.shape[0]) + + +class BaseTransport(BaseEstimator): + """Base class for OTDA objects + + Notes + ----- + All estimators should specify all the parameters that can be set + at the class level in their ``__init__`` as explicit keyword + arguments (no ``*args`` or ``**kwargs``). + + fit method should: + - estimate a cost matrix and store it in a `cost_` attribute + - estimate a coupling matrix and store it in a `coupling_` + attribute + - estimate distributions from source and target data and store them in + mu_s and mu_t attributes + - store Xs and Xt in attributes to be used later on in transform and + inverse_transform methods + + transform method should always get as input a Xs parameter + inverse_transform method should always get as input a Xt parameter + """ + + def fit(self, Xs=None, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + + Returns + ------- + self : object + Returns self. + """ + + # check the necessary inputs parameters are here + if check_params(Xs=Xs, Xt=Xt): + + # pairwise distance + self.cost_ = dist(Xs, Xt, metric=self.metric) + self.cost_ = cost_normalization(self.cost_, self.norm) + + if (ys is not None) and (yt is not None): + + if self.limit_max != np.infty: + self.limit_max = self.limit_max * np.max(self.cost_) + + # assumes labeled source samples occupy the first rows + # and labeled target samples occupy the first columns + classes = np.unique(ys) + for c in classes: + idx_s = np.where((ys != c) & (ys != -1)) + idx_t = np.where(yt == c) + + # all the coefficients corresponding to a source sample + # and a target sample : + # with different labels get a infinite + for j in idx_t[0]: + self.cost_[idx_s[0], j] = self.limit_max + + # distribution estimation + self.mu_s = self.distribution_estimation(Xs) + self.mu_t = self.distribution_estimation(Xt) + + # store arrays of samples + self.xs_ = Xs + self.xt_ = Xt + + return self + + def fit_transform(self, Xs=None, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) and transports source samples Xs onto target + ones Xt + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + + Returns + ------- + transp_Xs : array-like, shape (n_source_samples, n_features) + The source samples samples. + """ + + return self.fit(Xs, ys, Xt, yt).transform(Xs, ys, Xt, yt) + + def transform(self, Xs=None, ys=None, Xt=None, yt=None, batch_size=128): + """Transports source samples Xs onto target ones Xt + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + batch_size : int, optional (default=128) + The batch size for out of sample inverse transform + + Returns + ------- + transp_Xs : array-like, shape (n_source_samples, n_features) + The transport source samples. + """ + + # check the necessary inputs parameters are here + if check_params(Xs=Xs): + + if np.array_equal(self.xs_, Xs): + + # perform standard barycentric mapping + transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None] + + # set nans to 0 + transp[~ np.isfinite(transp)] = 0 + + # compute transported samples + transp_Xs = np.dot(transp, self.xt_) + else: + # perform out of sample mapping + indices = np.arange(Xs.shape[0]) + batch_ind = [ + indices[i:i + batch_size] + for i in range(0, len(indices), batch_size)] + + transp_Xs = [] + for bi in batch_ind: + + # get the nearest neighbor in the source domain + D0 = dist(Xs[bi], self.xs_) + idx = np.argmin(D0, axis=1) + + # transport the source samples + transp = self.coupling_ / np.sum( + self.coupling_, 1)[:, None] + transp[~ np.isfinite(transp)] = 0 + transp_Xs_ = np.dot(transp, self.xt_) + + # define the transported points + transp_Xs_ = transp_Xs_[idx, :] + Xs[bi] - self.xs_[idx, :] + + transp_Xs.append(transp_Xs_) + + transp_Xs = np.concatenate(transp_Xs, axis=0) + + return transp_Xs + + def inverse_transform(self, Xs=None, ys=None, Xt=None, yt=None, + batch_size=128): + """Transports target samples Xt onto target samples Xs + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + batch_size : int, optional (default=128) + The batch size for out of sample inverse transform + + Returns + ------- + transp_Xt : array-like, shape (n_source_samples, n_features) + The transported target samples. + """ + + # check the necessary inputs parameters are here + if check_params(Xt=Xt): + + if np.array_equal(self.xt_, Xt): + + # perform standard barycentric mapping + transp_ = self.coupling_.T / np.sum(self.coupling_, 0)[:, None] + + # set nans to 0 + transp_[~ np.isfinite(transp_)] = 0 + + # compute transported samples + transp_Xt = np.dot(transp_, self.xs_) + else: + # perform out of sample mapping + indices = np.arange(Xt.shape[0]) + batch_ind = [ + indices[i:i + batch_size] + for i in range(0, len(indices), batch_size)] + + transp_Xt = [] + for bi in batch_ind: + + D0 = dist(Xt[bi], self.xt_) + idx = np.argmin(D0, axis=1) + + # transport the target samples + transp_ = self.coupling_.T / np.sum( + self.coupling_, 0)[:, None] + transp_[~ np.isfinite(transp_)] = 0 + transp_Xt_ = np.dot(transp_, self.xs_) + + # define the transported points + transp_Xt_ = transp_Xt_[idx, :] + Xt[bi] - self.xt_[idx, :] + + transp_Xt.append(transp_Xt_) + + transp_Xt = np.concatenate(transp_Xt, axis=0) + + return transp_Xt + + +class SinkhornTransport(BaseTransport): + """Domain Adapatation OT method based on Sinkhorn Algorithm + + Parameters + ---------- + reg_e : float, optional (default=1) + Entropic regularization parameter + max_iter : int, float, optional (default=1000) + The minimum number of iteration before stopping the optimization + algorithm if no it has not converged + tol : float, optional (default=10e-9) + The precision required to stop the optimization algorithm. + mapping : string, optional (default="barycentric") + The kind of mapping to apply to transport samples from a domain into + another one. + if "barycentric" only the samples used to estimate the coupling can + be transported from a domain to another one. + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. + distribution : string, optional (default="uniform") + The kind of distribution estimation to employ + verbose : int, optional (default=0) + Controls the verbosity of the optimization algorithm + log : int, optional (default=0) + Controls the logs of the optimization algorithm + limit_max: float, optional (defaul=np.infty) + Controls the semi supervised mode. Transport between labeled source + and target samples of different classes will exhibit an infinite cost + + Attributes + ---------- + coupling_ : array-like, shape (n_source_samples, n_target_samples) + The optimal coupling + log_ : dictionary + The dictionary of log, empty dic if parameter log is not True + + References + ---------- + .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE Transactions + on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 + .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal + Transport, Advances in Neural Information Processing Systems (NIPS) + 26, 2013 + """ + + def __init__(self, reg_e=1., max_iter=1000, + tol=10e-9, verbose=False, log=False, + metric="sqeuclidean", norm=None, + distribution_estimation=distribution_estimation_uniform, + out_of_sample_map='ferradans', limit_max=np.infty): + + self.reg_e = reg_e + self.max_iter = max_iter + self.tol = tol + self.verbose = verbose + self.log = log + self.metric = metric + self.norm = norm + self.limit_max = limit_max + self.distribution_estimation = distribution_estimation + self.out_of_sample_map = out_of_sample_map + + def fit(self, Xs=None, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + + Returns + ------- + self : object + Returns self. + """ + + super(SinkhornTransport, self).fit(Xs, ys, Xt, yt) + + # coupling estimation + returned_ = sinkhorn( + a=self.mu_s, b=self.mu_t, M=self.cost_, reg=self.reg_e, + numItermax=self.max_iter, stopThr=self.tol, + verbose=self.verbose, log=self.log) + + # deal with the value of log + if self.log: + self.coupling_, self.log_ = returned_ + else: + self.coupling_ = returned_ + self.log_ = dict() + + return self + + +class EMDTransport(BaseTransport): + """Domain Adapatation OT method based on Earth Mover's Distance + + Parameters + ---------- + mapping : string, optional (default="barycentric") + The kind of mapping to apply to transport samples from a domain into + another one. + if "barycentric" only the samples used to estimate the coupling can + be transported from a domain to another one. + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. + distribution : string, optional (default="uniform") + The kind of distribution estimation to employ + verbose : int, optional (default=0) + Controls the verbosity of the optimization algorithm + log : int, optional (default=0) + Controls the logs of the optimization algorithm + limit_max: float, optional (default=10) + Controls the semi supervised mode. Transport between labeled source + and target samples of different classes will exhibit an infinite cost + (10 times the maximum value of the cost matrix) + max_iter : int, optional (default=100000) + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. + + Attributes + ---------- + coupling_ : array-like, shape (n_source_samples, n_target_samples) + The optimal coupling + + References + ---------- + .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE Transactions + on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 + """ + + def __init__(self, metric="sqeuclidean", norm=None, + distribution_estimation=distribution_estimation_uniform, + out_of_sample_map='ferradans', limit_max=10, + max_iter=100000): + + self.metric = metric + self.norm = norm + self.limit_max = limit_max + self.distribution_estimation = distribution_estimation + self.out_of_sample_map = out_of_sample_map + self.max_iter = max_iter + + def fit(self, Xs, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + + Returns + ------- + self : object + Returns self. + """ + + super(EMDTransport, self).fit(Xs, ys, Xt, yt) + + # coupling estimation + self.coupling_ = emd( + a=self.mu_s, b=self.mu_t, M=self.cost_, numItermax=self.max_iter + ) + + return self + + +class SinkhornLpl1Transport(BaseTransport): + """Domain Adapatation OT method based on sinkhorn algorithm + + LpL1 class regularization. + + Parameters + ---------- + reg_e : float, optional (default=1) + Entropic regularization parameter + reg_cl : float, optional (default=0.1) + Class regularization parameter + mapping : string, optional (default="barycentric") + The kind of mapping to apply to transport samples from a domain into + another one. + if "barycentric" only the samples used to estimate the coupling can + be transported from a domain to another one. + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. + distribution : string, optional (default="uniform") + The kind of distribution estimation to employ + max_iter : int, float, optional (default=10) + The minimum number of iteration before stopping the optimization + algorithm if no it has not converged + max_inner_iter : int, float, optional (default=200) + The number of iteration in the inner loop + verbose : int, optional (default=0) + Controls the verbosity of the optimization algorithm + limit_max: float, optional (defaul=np.infty) + Controls the semi supervised mode. Transport between labeled source + and target samples of different classes will exhibit an infinite cost + + Attributes + ---------- + coupling_ : array-like, shape (n_source_samples, n_target_samples) + The optimal coupling + + References + ---------- + + .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE + Transactions on Pattern Analysis and Machine Intelligence , + vol.PP, no.99, pp.1-1 + .. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence + and applications. arXiv preprint arXiv:1510.06567. + + """ + + def __init__(self, reg_e=1., reg_cl=0.1, + max_iter=10, max_inner_iter=200, + tol=10e-9, verbose=False, + metric="sqeuclidean", norm=None, + distribution_estimation=distribution_estimation_uniform, + out_of_sample_map='ferradans', limit_max=np.infty): + + self.reg_e = reg_e + self.reg_cl = reg_cl + self.max_iter = max_iter + self.max_inner_iter = max_inner_iter + self.tol = tol + self.verbose = verbose + self.metric = metric + self.norm = norm + self.distribution_estimation = distribution_estimation + self.out_of_sample_map = out_of_sample_map + self.limit_max = limit_max + + def fit(self, Xs, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + + Returns + ------- + self : object + Returns self. + """ + + # check the necessary inputs parameters are here + if check_params(Xs=Xs, Xt=Xt, ys=ys): + + super(SinkhornLpl1Transport, self).fit(Xs, ys, Xt, yt) + + self.coupling_ = sinkhorn_lpl1_mm( + a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.cost_, + reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter, + numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol, + verbose=self.verbose) + + return self + + +class SinkhornL1l2Transport(BaseTransport): + """Domain Adapatation OT method based on sinkhorn algorithm + + l1l2 class regularization. + + Parameters + ---------- + reg_e : float, optional (default=1) + Entropic regularization parameter + reg_cl : float, optional (default=0.1) + Class regularization parameter + mapping : string, optional (default="barycentric") + The kind of mapping to apply to transport samples from a domain into + another one. + if "barycentric" only the samples used to estimate the coupling can + be transported from a domain to another one. + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. + distribution : string, optional (default="uniform") + The kind of distribution estimation to employ + max_iter : int, float, optional (default=10) + The minimum number of iteration before stopping the optimization + algorithm if no it has not converged + max_inner_iter : int, float, optional (default=200) + The number of iteration in the inner loop + verbose : int, optional (default=0) + Controls the verbosity of the optimization algorithm + log : int, optional (default=0) + Controls the logs of the optimization algorithm + limit_max: float, optional (default=10) + Controls the semi supervised mode. Transport between labeled source + and target samples of different classes will exhibit an infinite cost + (10 times the maximum value of the cost matrix) + + Attributes + ---------- + coupling_ : array-like, shape (n_source_samples, n_target_samples) + The optimal coupling + log_ : dictionary + The dictionary of log, empty dic if parameter log is not True + + References + ---------- + + .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, + "Optimal Transport for Domain Adaptation," in IEEE + Transactions on Pattern Analysis and Machine Intelligence , + vol.PP, no.99, pp.1-1 + .. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). + Generalized conditional gradient: analysis of convergence + and applications. arXiv preprint arXiv:1510.06567. + + """ + + def __init__(self, reg_e=1., reg_cl=0.1, + max_iter=10, max_inner_iter=200, + tol=10e-9, verbose=False, log=False, + metric="sqeuclidean", norm=None, + distribution_estimation=distribution_estimation_uniform, + out_of_sample_map='ferradans', limit_max=10): + + self.reg_e = reg_e + self.reg_cl = reg_cl + self.max_iter = max_iter + self.max_inner_iter = max_inner_iter + self.tol = tol + self.verbose = verbose + self.log = log + self.metric = metric + self.norm = norm + self.distribution_estimation = distribution_estimation + self.out_of_sample_map = out_of_sample_map + self.limit_max = limit_max + + def fit(self, Xs, ys=None, Xt=None, yt=None): + """Build a coupling matrix from source and target sets of samples + (Xs, ys) and (Xt, yt) + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + + Returns + ------- + self : object + Returns self. + """ + + # check the necessary inputs parameters are here + if check_params(Xs=Xs, Xt=Xt, ys=ys): + + super(SinkhornL1l2Transport, self).fit(Xs, ys, Xt, yt) + + returned_ = sinkhorn_l1l2_gl( + a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.cost_, + reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter, + numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol, + verbose=self.verbose, log=self.log) + + # deal with the value of log + if self.log: + self.coupling_, self.log_ = returned_ + else: + self.coupling_ = returned_ + self.log_ = dict() + + return self + + +class MappingTransport(BaseEstimator): + """MappingTransport: DA methods that aims at jointly estimating a optimal + transport coupling and the associated mapping + + Parameters + ---------- + mu : float, optional (default=1) + Weight for the linear OT loss (>0) + eta : float, optional (default=0.001) + Regularization term for the linear mapping L (>0) + bias : bool, optional (default=False) + Estimate linear mapping with constant bias + metric : string, optional (default="sqeuclidean") + The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. + kernel : string, optional (default="linear") + The kernel to use either linear or gaussian + sigma : float, optional (default=1) + The gaussian kernel parameter + max_iter : int, optional (default=100) + Max number of BCD iterations + tol : float, optional (default=1e-5) + Stop threshold on relative loss decrease (>0) + max_inner_iter : int, optional (default=10) + Max number of iterations (inner CG solver) + inner_tol : float, optional (default=1e-6) + Stop threshold on error (inner CG solver) (>0) + verbose : bool, optional (default=False) + Print information along iterations + log : bool, optional (default=False) + record log if True + + Attributes + ---------- + coupling_ : array-like, shape (n_source_samples, n_target_samples) + The optimal coupling + mapping_ : array-like, shape (n_features (+ 1), n_features) + (if bias) for kernel == linear + The associated mapping + array-like, shape (n_source_samples (+ 1), n_features) + (if bias) for kernel == gaussian + log_ : dictionary + The dictionary of log, empty dic if parameter log is not True + + References + ---------- + + .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard, + "Mapping estimation for discrete optimal transport", + Neural Information Processing Systems (NIPS), 2016. + + """ + + def __init__(self, mu=1, eta=0.001, bias=False, metric="sqeuclidean", + norm=None, kernel="linear", sigma=1, max_iter=100, tol=1e-5, + max_inner_iter=10, inner_tol=1e-6, log=False, verbose=False, + verbose2=False): + + self.metric = metric + self.norm = norm + self.mu = mu + self.eta = eta + self.bias = bias + self.kernel = kernel + self.sigma = sigma + self.max_iter = max_iter + self.tol = tol + self.max_inner_iter = max_inner_iter + self.inner_tol = inner_tol + self.log = log + self.verbose = verbose + self.verbose2 = verbose2 + + def fit(self, Xs=None, ys=None, Xt=None, yt=None): + """Builds an optimal coupling and estimates the associated mapping + from source and target sets of samples (Xs, ys) and (Xt, yt) + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + ys : array-like, shape (n_source_samples,) + The class labels + Xt : array-like, shape (n_target_samples, n_features) + The training input samples. + yt : array-like, shape (n_labeled_target_samples,) + The class labels + + Returns + ------- + self : object + Returns self + """ + + # check the necessary inputs parameters are here + if check_params(Xs=Xs, Xt=Xt): + + self.xs_ = Xs + self.xt_ = Xt + + if self.kernel == "linear": + returned_ = joint_OT_mapping_linear( + Xs, Xt, mu=self.mu, eta=self.eta, bias=self.bias, + verbose=self.verbose, verbose2=self.verbose2, + numItermax=self.max_iter, + numInnerItermax=self.max_inner_iter, stopThr=self.tol, + stopInnerThr=self.inner_tol, log=self.log) + + elif self.kernel == "gaussian": + returned_ = joint_OT_mapping_kernel( + Xs, Xt, mu=self.mu, eta=self.eta, bias=self.bias, + sigma=self.sigma, verbose=self.verbose, + verbose2=self.verbose, numItermax=self.max_iter, + numInnerItermax=self.max_inner_iter, + stopInnerThr=self.inner_tol, stopThr=self.tol, + log=self.log) + + # deal with the value of log + if self.log: + self.coupling_, self.mapping_, self.log_ = returned_ + else: + self.coupling_, self.mapping_ = returned_ + self.log_ = dict() + + return self + + def transform(self, Xs): + """Transports source samples Xs onto target ones Xt + + Parameters + ---------- + Xs : array-like, shape (n_source_samples, n_features) + The training input samples. + + Returns + ------- + transp_Xs : array-like, shape (n_source_samples, n_features) + The transport source samples. + """ + + # check the necessary inputs parameters are here + if check_params(Xs=Xs): + + if np.array_equal(self.xs_, Xs): + # perform standard barycentric mapping + transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None] + + # set nans to 0 + transp[~ np.isfinite(transp)] = 0 + + # compute transported samples + transp_Xs = np.dot(transp, self.xt_) + else: + if self.kernel == "gaussian": + K = kernel(Xs, self.xs_, method=self.kernel, + sigma=self.sigma) + elif self.kernel == "linear": + K = Xs + if self.bias: + K = np.hstack((K, np.ones((Xs.shape[0], 1)))) + transp_Xs = K.dot(self.mapping_) + + return transp_Xs diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h index 40d7192..aa92441 100644 --- a/ot/lp/EMD.h +++ b/ot/lp/EMD.h @@ -23,7 +23,12 @@ using namespace lemon; typedef unsigned int node_id_type; +enum ProblemType { + INFEASIBLE, + OPTIMAL, + UNBOUNDED +}; -void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost); +int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int max_iter); #endif diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index cad4750..c8c2eb3 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -15,11 +15,10 @@ #include "EMD.h" -void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost) { +int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int max_iter) { // beware M and C anre strored in row major C style!!! int n, m, i,cur; double max; - int max_iter=10000; typedef FullBipartiteDigraph Digraph; DIGRAPH_TYPEDEFS(FullBipartiteDigraph); @@ -46,7 +45,7 @@ void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double * std::vector<int> indI(n), indJ(m); std::vector<double> weights1(n), weights2(m); Digraph di(n, m); - NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m,max_iter); + NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, max_iter); // Set supply and demand, don't account for 0 values (faster) @@ -116,5 +115,5 @@ void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double * }; - + return ret; } diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 6e0bdb8..de91e74 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -14,8 +14,7 @@ from ..utils import parmap import multiprocessing - -def emd(a, b, M): +def emd(a, b, M, numItermax=100000): """Solves the Earth Movers distance problem and returns the OT matrix @@ -40,6 +39,9 @@ def emd(a, b, M): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix + numItermax : int, optional (default=100000) + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. Returns ------- @@ -52,7 +54,7 @@ def emd(a, b, M): Simple example with obvious solution. The function emd accepts lists and perform automatic conversion to numpy arrays - + >>> import ot >>> a=[.5,.5] >>> b=[.5,.5] @@ -84,10 +86,11 @@ def emd(a, b, M): if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - return emd_c(a, b, M) + return emd_c(a, b, M, numItermax) + -def emd2(a, b, M,processes=multiprocessing.cpu_count()): - """Solves the Earth Movers distance problem and returns the loss +def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000): + """Solves the Earth Movers distance problem and returns the loss .. math:: \gamma = arg\min_\gamma <\gamma,M>_F @@ -110,6 +113,9 @@ def emd2(a, b, M,processes=multiprocessing.cpu_count()): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix + numItermax : int, optional (default=100000) + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. Returns ------- @@ -122,15 +128,15 @@ def emd2(a, b, M,processes=multiprocessing.cpu_count()): Simple example with obvious solution. The function emd accepts lists and perform automatic conversion to numpy arrays - - + + >>> import ot >>> a=[.5,.5] >>> b=[.5,.5] >>> M=[[0.,1.],[1.,0.]] >>> ot.emd2(a,b,M) 0.0 - + References ---------- @@ -153,16 +159,14 @@ def emd2(a, b, M,processes=multiprocessing.cpu_count()): a = np.ones((M.shape[0], ), dtype=np.float64)/M.shape[0] if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - - if len(b.shape)==1: - return emd2_c(a, b, M) + + if len(b.shape) == 1: + return emd2_c(a, b, M, numItermax) else: - nb=b.shape[1] - #res=[emd2_c(a,b[:,i].copy(),M) for i in range(nb)] + nb = b.shape[1] + # res = [emd2_c(a, b[:, i].copy(), M, numItermax) for i in range(nb)] + def f(b): - return emd2_c(a,b,M) - res= parmap(f, [b[:,i] for i in range(nb)],processes) + return emd2_c(a, b, M, numItermax) + res = parmap(f, [b[:, i] for i in range(nb)], processes) return np.array(res) - - -
\ No newline at end of file diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 46c96c1..26d3330 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -15,53 +15,57 @@ cimport cython cdef extern from "EMD.h": - void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost) + int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int max_iter) + cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED @cython.boundscheck(False) @cython.wraparound(False) -def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M): +def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int max_iter): """ Solves the Earth Movers distance problem and returns the optimal transport matrix - + gamm=emd(a,b,M) - + .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F - + \gamma = arg\min_\gamma <\gamma,M>_F + s.t. \gamma 1 = a - - \gamma^T 1= b - + + \gamma^T 1= b + \gamma\geq 0 where : - + - M is the metric cost matrix - a and b are the sample weights - + Parameters ---------- a : (ns,) ndarray, float64 - source histogram + source histogram b : (nt,) ndarray, float64 target histogram M : (ns,nt) ndarray, float64 - loss matrix - - + loss matrix + max_iter : int + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. + + Returns ------- gamma: (ns x nt) ndarray Optimal transportation matrix for the given parameters - + """ cdef int n1= M.shape[0] cdef int n2= M.shape[1] cdef float cost=0 cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2]) - + if not len(a): a=np.ones((n1,))/n1 @@ -69,53 +73,61 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod b=np.ones((n2,))/n2 # calling the function - EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data,<double*> &cost) + cdef int resultSolver = EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data,<double*> &cost, max_iter) + if resultSolver != OPTIMAL: + if resultSolver == INFEASIBLE: + print("Problem infeasible. Try to increase numItermax.") + elif resultSolver == UNBOUNDED: + print("Problem unbounded") return G @cython.boundscheck(False) @cython.wraparound(False) -def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M): +def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int max_iter): """ Solves the Earth Movers distance problem and returns the optimal transport loss - + gamm=emd(a,b,M) - + .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F - + \gamma = arg\min_\gamma <\gamma,M>_F + s.t. \gamma 1 = a - - \gamma^T 1= b - + + \gamma^T 1= b + \gamma\geq 0 where : - + - M is the metric cost matrix - a and b are the sample weights - + Parameters ---------- a : (ns,) ndarray, float64 - source histogram + source histogram b : (nt,) ndarray, float64 target histogram M : (ns,nt) ndarray, float64 - loss matrix - - + loss matrix + max_iter : int + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. + + Returns ------- gamma: (ns x nt) ndarray Optimal transportation matrix for the given parameters - + """ cdef int n1= M.shape[0] cdef int n2= M.shape[1] cdef float cost=0 cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2]) - + if not len(a): a=np.ones((n1,))/n1 @@ -123,8 +135,13 @@ def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mo b=np.ones((n2,))/n2 # calling the function - EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data,<double*> &cost) - + cdef int resultSolver = EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data,<double*> &cost, max_iter) + if resultSolver != OPTIMAL: + if resultSolver == INFEASIBLE: + print("Problem infeasible. Try to inscrease numItermax.") + elif resultSolver == UNBOUNDED: + print("Problem unbounded") + cost=0 for i in range(n1): for j in range(n2): diff --git a/ot/utils.py b/ot/utils.py index 2b2f8b3..31a002b 100644 --- a/ot/utils.py +++ b/ot/utils.py @@ -13,6 +13,9 @@ import time import numpy as np from scipy.spatial.distance import cdist +import sys +import warnings + __time_tic_toc = time.time() @@ -131,6 +134,39 @@ def dist0(n, method='lin_square'): return res +def cost_normalization(C, norm=None): + """ Apply normalization to the loss matrix + + + Parameters + ---------- + C : np.array (n1, n2) + The cost matrix to normalize. + norm : str + type of normalization from 'median','max','log','loglog'. Any other + value do not normalize. + + + Returns + ------- + + C : np.array (n1, n2) + The input cost matrix normalized according to given norm. + + """ + + if norm == "median": + C /= float(np.median(C)) + elif norm == "max": + C /= float(np.max(C)) + elif norm == "log": + C = np.log(1 + C) + elif norm == "loglog": + C = np.log(1 + np.log(1 + C)) + + return C + + def dots(*args): """ dots function for multiple matrix multiply """ return reduce(np.dot, args) @@ -163,3 +199,240 @@ def parmap(f, X, nprocs=multiprocessing.cpu_count()): [p.join() for p in proc] return [x for i, x in sorted(res)] + + +def check_params(**kwargs): + """check_params: check whether some parameters are missing + """ + + missing_params = [] + check = True + + for param in kwargs: + if kwargs[param] is None: + missing_params.append(param) + + if len(missing_params) > 0: + print("POT - Warning: following necessary parameters are missing") + for p in missing_params: + print("\n", p) + + check = False + + return check + + +class deprecated(object): + """Decorator to mark a function or class as deprecated. + + deprecated class from scikit-learn package + https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/deprecation.py + Issue a warning when the function is called/the class is instantiated and + adds a warning to the docstring. + The optional extra argument will be appended to the deprecation message + and the docstring. Note: to use this with the default value for extra, put + in an empty of parentheses: + >>> from ot.deprecation import deprecated + >>> @deprecated() + ... def some_function(): pass + + Parameters + ---------- + extra : string + to be added to the deprecation messages + """ + + # Adapted from http://wiki.python.org/moin/PythonDecoratorLibrary, + # but with many changes. + + def __init__(self, extra=''): + self.extra = extra + + def __call__(self, obj): + """Call method + Parameters + ---------- + obj : object + """ + if isinstance(obj, type): + return self._decorate_class(obj) + else: + return self._decorate_fun(obj) + + def _decorate_class(self, cls): + msg = "Class %s is deprecated" % cls.__name__ + if self.extra: + msg += "; %s" % self.extra + + # FIXME: we should probably reset __new__ for full generality + init = cls.__init__ + + def wrapped(*args, **kwargs): + warnings.warn(msg, category=DeprecationWarning) + return init(*args, **kwargs) + + cls.__init__ = wrapped + + wrapped.__name__ = '__init__' + wrapped.__doc__ = self._update_doc(init.__doc__) + wrapped.deprecated_original = init + + return cls + + def _decorate_fun(self, fun): + """Decorate function fun""" + + msg = "Function %s is deprecated" % fun.__name__ + if self.extra: + msg += "; %s" % self.extra + + def wrapped(*args, **kwargs): + warnings.warn(msg, category=DeprecationWarning) + return fun(*args, **kwargs) + + wrapped.__name__ = fun.__name__ + wrapped.__dict__ = fun.__dict__ + wrapped.__doc__ = self._update_doc(fun.__doc__) + + return wrapped + + def _update_doc(self, olddoc): + newdoc = "DEPRECATED" + if self.extra: + newdoc = "%s: %s" % (newdoc, self.extra) + if olddoc: + newdoc = "%s\n\n%s" % (newdoc, olddoc) + return newdoc + + +def _is_deprecated(func): + """Helper to check if func is wraped by our deprecated decorator""" + if sys.version_info < (3, 5): + raise NotImplementedError("This is only available for python3.5 " + "or above") + closures = getattr(func, '__closure__', []) + if closures is None: + closures = [] + is_deprecated = ('deprecated' in ''.join([c.cell_contents + for c in closures + if isinstance(c.cell_contents, str)])) + return is_deprecated + + +class BaseEstimator(object): + """Base class for most objects in POT + adapted from sklearn BaseEstimator class + + Notes + ----- + All estimators should specify all the parameters that can be set + at the class level in their ``__init__`` as explicit keyword + arguments (no ``*args`` or ``**kwargs``). + """ + + @classmethod + def _get_param_names(cls): + """Get parameter names for the estimator""" + try: + from inspect import signature + except ImportError: + from .externals.funcsigs import signature + # fetch the constructor or the original constructor before + # deprecation wrapping if any + init = getattr(cls.__init__, 'deprecated_original', cls.__init__) + if init is object.__init__: + # No explicit constructor to introspect + return [] + + # introspect the constructor arguments to find the model parameters + # to represent + init_signature = signature(init) + # Consider the constructor parameters excluding 'self' + parameters = [p for p in init_signature.parameters.values() + if p.name != 'self' and p.kind != p.VAR_KEYWORD] + for p in parameters: + if p.kind == p.VAR_POSITIONAL: + raise RuntimeError("POT estimators should always " + "specify their parameters in the signature" + " of their __init__ (no varargs)." + " %s with constructor %s doesn't " + " follow this convention." + % (cls, init_signature)) + # Extract and sort argument names excluding 'self' + return sorted([p.name for p in parameters]) + + def get_params(self, deep=True): + """Get parameters for this estimator. + + Parameters + ---------- + deep : boolean, optional + If True, will return the parameters for this estimator and + contained subobjects that are estimators. + + Returns + ------- + params : mapping of string to any + Parameter names mapped to their values. + """ + out = dict() + for key in self._get_param_names(): + # We need deprecation warnings to always be on in order to + # catch deprecated param values. + # This is set in utils/__init__.py but it gets overwritten + # when running under python3 somehow. + warnings.simplefilter("always", DeprecationWarning) + try: + with warnings.catch_warnings(record=True) as w: + value = getattr(self, key, None) + if len(w) and w[0].category == DeprecationWarning: + # if the parameter is deprecated, don't show it + continue + finally: + warnings.filters.pop(0) + + # XXX: should we rather test if instance of estimator? + if deep and hasattr(value, 'get_params'): + deep_items = value.get_params().items() + out.update((key + '__' + k, val) for k, val in deep_items) + out[key] = value + return out + + def set_params(self, **params): + """Set the parameters of this estimator. + + The method works on simple estimators as well as on nested objects + (such as pipelines). The latter have parameters of the form + ``<component>__<parameter>`` so that it's possible to update each + component of a nested object. + + Returns + ------- + self + """ + if not params: + # Simple optimisation to gain speed (inspect is slow) + return self + valid_params = self.get_params(deep=True) + # for key, value in iteritems(params): + for key, value in params.items(): + split = key.split('__', 1) + if len(split) > 1: + # nested objects case + name, sub_name = split + if name not in valid_params: + raise ValueError('Invalid parameter %s for estimator %s. ' + 'Check the list of available parameters ' + 'with `estimator.get_params().keys()`.' % + (name, self)) + sub_object = valid_params[name] + sub_object.set_params(**{sub_name: value}) + else: + # simple objects case + if key not in valid_params: + raise ValueError('Invalid parameter %s for estimator %s. ' + 'Check the list of available parameters ' + 'with `estimator.get_params().keys()`.' % + (key, self.__class__.__name__)) + setattr(self, key, value) + return self |