diff options
author | ievred <ievgen.redko@univ-st-etienne.fr> | 2020-04-08 10:08:47 +0200 |
---|---|---|
committer | ievred <ievgen.redko@univ-st-etienne.fr> | 2020-04-08 10:08:47 +0200 |
commit | c68b52d1623683e86555484bf9a4875a66957bb6 (patch) | |
tree | e7727a19ed7ba3a47a1df1ec893d0bc27c2eec57 /ot/da.py | |
parent | 2c9f992157844d6253a302905417e86580ac6b12 (diff) |
remove laplace from jcpot
Diffstat (limited to 'ot/da.py')
-rw-r--r-- | ot/da.py | 216 |
1 files changed, 0 insertions, 216 deletions
@@ -748,115 +748,6 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None, return A, b -def emd_laplace(a, b, xs, xt, M, sim, eta, alpha, - numItermax, stopThr, numInnerItermax, - stopInnerThr, log=False, verbose=False, **kwargs): - r"""Solve the optimal transport problem (OT) with Laplacian regularization - - .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + eta\Omega_\alpha(\gamma) - - s.t.\ \gamma 1 = a - - \gamma^T 1= b - - \gamma\geq 0 - - where: - - - a and b are source and target weights (sum to 1) - - xs and xt are source and target samples - - M is the (ns,nt) metric cost matrix - - :math:`\Omega_\alpha` is the Laplacian regularization term - :math:`\Omega_\alpha = (1-\alpha)/n_s^2\sum_{i,j}S^s_{i,j}\|T(\mathbf{x}^s_i)-T(\mathbf{x}^s_j)\|^2+\alpha/n_t^2\sum_{i,j}S^t_{i,j}^'\|T(\mathbf{x}^t_i)-T(\mathbf{x}^t_j)\|^2` - with :math:`S^s_{i,j}, S^t_{i,j}` denoting source and target similarity matrices and :math:`T(\cdot)` being a barycentric mapping - - The algorithm used for solving the problem is the conditional gradient algorithm as proposed in [5]. - - Parameters - ---------- - a : np.ndarray (ns,) - samples weights in the source domain - b : np.ndarray (nt,) - samples weights in the target domain - xs : np.ndarray (ns,d) - samples in the source domain - xt : np.ndarray (nt,d) - samples in the target domain - M : np.ndarray (ns,nt) - loss matrix - eta : float - Regularization term for Laplacian regularization - alpha : float - Regularization term for source domain's importance in regularization - numItermax : int, optional - Max number of iterations - stopThr : float, optional - Stop threshold on error (inner emd solver) (>0) - numInnerItermax : int, optional - Max number of iterations (inner CG solver) - stopInnerThr : float, optional - Stop threshold on error (inner CG solver) (>0) - verbose : bool, optional - Print information along iterations - log : bool, optional - record log if True - - - Returns - ------- - gamma : (ns x nt) ndarray - Optimal transportation matrix for the given parameters - log : dict - log dictionary return only if log==True in parameters - - - References - ---------- - - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, - "Optimal Transport for Domain Adaptation," in IEEE - Transactions on Pattern Analysis and Machine Intelligence , - vol.PP, no.99, pp.1-1 - - See Also - -------- - ot.lp.emd : Unregularized OT - ot.optim.cg : General regularized OT - - """ - if sim == 'gauss': - if 'rbfparam' not in kwargs: - kwargs['rbfparam'] = 1 / (2 * (np.mean(dist(xs, xs, 'sqeuclidean')) ** 2)) - sS = kernel(xs, xs, method=kwargs['sim'], sigma=kwargs['rbfparam']) - sT = kernel(xt, xt, method=kwargs['sim'], sigma=kwargs['rbfparam']) - - elif sim == 'knn': - if 'nn' not in kwargs: - kwargs['nn'] = 5 - - from sklearn.neighbors import kneighbors_graph - - sS = kneighbors_graph(xs, kwargs['nn']).toarray() - sS = (sS + sS.T) / 2 - sT = kneighbors_graph(xt, kwargs['nn']).toarray() - sT = (sT + sT.T) / 2 - - lS = laplacian(sS) - lT = laplacian(sT) - - def f(G): - return alpha * np.trace(np.dot(xt.T, np.dot(G.T, np.dot(lS, np.dot(G, xt))))) \ - + (1 - alpha) * np.trace(np.dot(xs.T, np.dot(G, np.dot(lT, np.dot(G.T, xs))))) - - def df(G): - return alpha * np.dot(lS + lS.T, np.dot(G, np.dot(xt, xt.T)))\ - + (1 - alpha) * np.dot(xs, np.dot(xs.T, np.dot(G, lT + lT.T))) - - return cg(a, b, M, reg=eta, f=f, df=df, G0=None, numItermax=numItermax, numItermaxEmd=numInnerItermax, - stopThr=stopThr, stopThr2=stopInnerThr, verbose=verbose, log=log) - - def distribution_estimation_uniform(X): """estimates a uniform distribution from an array of samples X @@ -1603,113 +1494,6 @@ class SinkhornLpl1Transport(BaseTransport): return self -class EMDLaplaceTransport(BaseTransport): - - """Domain Adapatation OT method based on Earth Mover's Distance with Laplacian regularization - - Parameters - ---------- - reg_lap : float, optional (default=1) - Laplacian regularization parameter - reg_src : float, optional (default=0.5) - Source relative importance in regularization - metric : string, optional (default="sqeuclidean") - The ground metric for the Wasserstein problem - norm : string, optional (default=None) - If given, normalize the ground metric to avoid numerical errors that - can occur with large metric values. - similarity : string, optional (default="knn") - The similarity to use either knn or gaussian - max_iter : int, optional (default=100) - Max number of BCD iterations - tol : float, optional (default=1e-5) - Stop threshold on relative loss decrease (>0) - max_inner_iter : int, optional (default=10) - Max number of iterations (inner CG solver) - inner_tol : float, optional (default=1e-6) - Stop threshold on error (inner CG solver) (>0) - log : int, optional (default=False) - Controls the logs of the optimization algorithm - distribution_estimation : callable, optional (defaults to the uniform) - The kind of distribution estimation to employ - out_of_sample_map : string, optional (default="ferradans") - The kind of out of sample mapping to apply to transport samples - from a domain into another one. Currently the only possible option is - "ferradans" which uses the method proposed in [6]. - - Attributes - ---------- - coupling_ : array-like, shape (n_source_samples, n_target_samples) - The optimal coupling - - References - ---------- - .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, - "Optimal Transport for Domain Adaptation," in IEEE Transactions - on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 - """ - - def __init__(self, reg_lap=1., reg_src=1., alpha=0.5, - metric="sqeuclidean", norm=None, similarity="knn", max_iter=100, tol=1e-9, - max_inner_iter=100000, inner_tol=1e-9, log=False, verbose=False, - distribution_estimation=distribution_estimation_uniform, - out_of_sample_map='ferradans'): - self.reg_lap = reg_lap - self.reg_src = reg_src - self.alpha = alpha - self.metric = metric - self.norm = norm - self.similarity = similarity - self.max_iter = max_iter - self.tol = tol - self.max_inner_iter = max_inner_iter - self.inner_tol = inner_tol - self.log = log - self.verbose = verbose - self.distribution_estimation = distribution_estimation - self.out_of_sample_map = out_of_sample_map - - def fit(self, Xs, ys=None, Xt=None, yt=None): - """Build a coupling matrix from source and target sets of samples - (Xs, ys) and (Xt, yt) - - Parameters - ---------- - Xs : array-like, shape (n_source_samples, n_features) - The training input samples. - ys : array-like, shape (n_source_samples,) - The class labels - Xt : array-like, shape (n_target_samples, n_features) - The training input samples. - yt : array-like, shape (n_target_samples,) - The class labels. If some target samples are unlabeled, fill the - yt's elements with -1. - - Warning: Note that, due to this convention -1 cannot be used as a - class label - - Returns - ------- - self : object - Returns self. - """ - - super(EMDLaplaceTransport, self).fit(Xs, ys, Xt, yt) - - returned_ = emd_laplace(a=self.mu_s, b=self.mu_t, xs=self.xs_, - xt=self.xt_, M=self.cost_, sim=self.similarity, eta=self.reg_lap, alpha=self.reg_src, - numItermax=self.max_iter, stopThr=self.tol, numInnerItermax=self.max_inner_iter, - stopInnerThr=self.inner_tol, log=self.log, verbose=self.verbose) - - # coupling estimation - if self.log: - self.coupling_, self.log_ = returned_ - else: - self.coupling_ = returned_ - self.log_ = dict() - return self - - class SinkhornL1l2Transport(BaseTransport): """Domain Adapatation OT method based on sinkhorn algorithm + |