remove laplace from jcpot

author: ievred <ievgen.redko@univ-st-etienne.fr> 2020-04-08 10:08:47 +0200
committer: ievred <ievgen.redko@univ-st-etienne.fr> 2020-04-08 10:08:47 +0200
commit: c68b52d1623683e86555484bf9a4875a66957bb6 (patch)
tree: e7727a19ed7ba3a47a1df1ec893d0bc27c2eec57
parent: 2c9f992157844d6253a302905417e86580ac6b12 (diff)
5 files changed, 5 insertions, 403 deletions
diff --git a/examples/plot_otda_jcpot.py b/examples/plot_otda_jcpot.py
index 316fa8b..c495690 100644
--- a/examples/plot_otda_jcpot.py
+++ b/examples/plot_otda_jcpot.py
@@ -115,7 +115,7 @@ pl.axis('off')
 ##############################################################################
 # Instantiate JCPOT adaptation algorithm and fit it
 # ----------------------------------------------------------------------------
-otda = ot.da.JCPOTTransport(reg_e=1e-2, max_iter=1000, metric='sqeuclidean', tol=1e-9, verbose=True, log=True)
+otda = ot.da.JCPOTTransport(reg_e=1, max_iter=1000, metric='sqeuclidean', tol=1e-9, verbose=True, log=True)
 otda.fit(all_Xr, all_Yr, xt)
 
 ws1 = otda.proportions_.dot(otda.log_['D2'][0])
@@ -126,8 +126,8 @@ pl.clf()
 plot_ax(dec1, 'Source 1')
 plot_ax(dec2, 'Source 2')
 plot_ax(dect, 'Target')
-print_G(ot.bregman.sinkhorn(ws1, [], otda.log_['M'][0], reg=1e-2), xs1, ys1, xt)
-print_G(ot.bregman.sinkhorn(ws2, [], otda.log_['M'][1], reg=1e-2), xs2, ys2, xt)
+print_G(ot.bregman.sinkhorn(ws1, [], otda.log_['M'][0], reg=1e-1), xs1, ys1, xt)
+print_G(ot.bregman.sinkhorn(ws2, [], otda.log_['M'][1], reg=1e-1), xs2, ys2, xt)
 pl.scatter(xs1[:, 0], xs1[:, 1], c=ys1, s=35, marker='x', cmap='Set1', vmax=9)
 pl.scatter(xs2[:, 0], xs2[:, 1], c=ys2, s=35, marker='+', cmap='Set1', vmax=9)
 pl.scatter(xt[:, 0], xt[:, 1], c=yt, s=35, marker='o', cmap='Set1', vmax=9)
@@ -154,8 +154,8 @@ pl.clf()
 plot_ax(dec1, 'Source 1')
 plot_ax(dec2, 'Source 2')
 plot_ax(dect, 'Target')
-print_G(ot.bregman.sinkhorn(ws1, [], otda.log_['M'][0], reg=1e-2), xs1, ys1, xt)
-print_G(ot.bregman.sinkhorn(ws2, [], otda.log_['M'][1], reg=1e-2), xs2, ys2, xt)
+print_G(ot.bregman.sinkhorn(ws1, [], otda.log_['M'][0], reg=1e-1), xs1, ys1, xt)
+print_G(ot.bregman.sinkhorn(ws2, [], otda.log_['M'][1], reg=1e-1), xs2, ys2, xt)
 pl.scatter(xs1[:, 0], xs1[:, 1], c=ys1, s=35, marker='x', cmap='Set1', vmax=9)
 pl.scatter(xs2[:, 0], xs2[:, 1], c=ys2, s=35, marker='+', cmap='Set1', vmax=9)
 pl.scatter(xt[:, 0], xt[:, 1], c=yt, s=35, marker='o', cmap='Set1', vmax=9)
diff --git a/examples/plot_otda_laplacian.py b/examples/plot_otda_laplacian.py
deleted file mode 100644
index 965380c..0000000
--- a/examples/plot_otda_laplacian.py
+++ /dev/null
@@ -1,127 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-========================
-OT for domain adaptation
-========================
-
-This example introduces a domain adaptation in a 2D setting and OTDA
-approache with Laplacian regularization.
-
-"""
-
-# Authors: Ievgen Redko <ievgen.redko@univ-st-etienne.fr>
-
-# License: MIT License
-
-import matplotlib.pylab as pl
-import ot
-
-##############################################################################
-# Generate data
-# -------------
-
-n_source_samples = 150
-n_target_samples = 150
-
-Xs, ys = ot.datasets.make_data_classif('3gauss', n_source_samples)
-Xt, yt = ot.datasets.make_data_classif('3gauss2', n_target_samples)
-
-
-##############################################################################
-# Instantiate the different transport algorithms and fit them
-# -----------------------------------------------------------
-
-# EMD Transport
-ot_emd = ot.da.EMDTransport()
-ot_emd.fit(Xs=Xs, Xt=Xt)
-
-# Sinkhorn Transport
-ot_sinkhorn = ot.da.SinkhornTransport(reg_e=.01)
-ot_sinkhorn.fit(Xs=Xs, Xt=Xt)
-
-# EMD Transport with Laplacian regularization
-ot_emd_laplace = ot.da.EMDLaplaceTransport(reg_lap=100, reg_src=1)
-ot_emd_laplace.fit(Xs=Xs, Xt=Xt)
-
-# transport source samples onto target samples
-transp_Xs_emd = ot_emd.transform(Xs=Xs)
-transp_Xs_sinkhorn = ot_sinkhorn.transform(Xs=Xs)
-transp_Xs_emd_laplace = ot_emd_laplace.transform(Xs=Xs)
-
-##############################################################################
-# Fig 1 : plots source and target samples
-# ---------------------------------------
-
-pl.figure(1, figsize=(10, 5))
-pl.subplot(1, 2, 1)
-pl.scatter(Xs[:, 0], Xs[:, 1], c=ys, marker='+', label='Source samples')
-pl.xticks([])
-pl.yticks([])
-pl.legend(loc=0)
-pl.title('Source  samples')
-
-pl.subplot(1, 2, 2)
-pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', label='Target samples')
-pl.xticks([])
-pl.yticks([])
-pl.legend(loc=0)
-pl.title('Target samples')
-pl.tight_layout()
-
-
-##############################################################################
-# Fig 2 : plot optimal couplings and transported samples
-# ------------------------------------------------------
-
-param_img = {'interpolation': 'nearest'}
-
-pl.figure(2, figsize=(15, 8))
-pl.subplot(2, 3, 1)
-pl.imshow(ot_emd.coupling_, **param_img)
-pl.xticks([])
-pl.yticks([])
-pl.title('Optimal coupling\nEMDTransport')
-
-pl.figure(2, figsize=(15, 8))
-pl.subplot(2, 3, 2)
-pl.imshow(ot_sinkhorn.coupling_, **param_img)
-pl.xticks([])
-pl.yticks([])
-pl.title('Optimal coupling\nSinkhornTransport')
-
-pl.subplot(2, 3, 3)
-pl.imshow(ot_emd_laplace.coupling_, **param_img)
-pl.xticks([])
-pl.yticks([])
-pl.title('Optimal coupling\nEMDLaplaceTransport')
-
-pl.subplot(2, 3, 4)
-pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o',
-           label='Target samples', alpha=0.3)
-pl.scatter(transp_Xs_emd[:, 0], transp_Xs_emd[:, 1], c=ys,
-           marker='+', label='Transp samples', s=30)
-pl.xticks([])
-pl.yticks([])
-pl.title('Transported samples\nEmdTransport')
-pl.legend(loc="lower left")
-
-pl.subplot(2, 3, 5)
-pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o',
-           label='Target samples', alpha=0.3)
-pl.scatter(transp_Xs_sinkhorn[:, 0], transp_Xs_sinkhorn[:, 1], c=ys,
-           marker='+', label='Transp samples', s=30)
-pl.xticks([])
-pl.yticks([])
-pl.title('Transported samples\nSinkhornTransport')
-
-pl.subplot(2, 3, 6)
-pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o',
-           label='Target samples', alpha=0.3)
-pl.scatter(transp_Xs_emd_laplace[:, 0], transp_Xs_emd_laplace[:, 1], c=ys,
-           marker='+', label='Transp samples', s=30)
-pl.xticks([])
-pl.yticks([])
-pl.title('Transported samples\nEMDLaplaceTransport')
-pl.tight_layout()
-
-pl.show()
diff --git a/ot/bregman.py b/ot/bregman.py
index 61dfa52..410ae85 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -1607,7 +1607,6 @@ def jcpot_barycenter(Xs, Ys, Xt, reg, metric='sqeuclidean', numItermax=100,
 
         # build the cost matrix and the Gibbs kernel
         Mtmp = dist(Xs[d], Xt, metric=metric)
-        Mtmp = Mtmp / np.median(Mtmp)
         M.append(Mtmp)
 
         Ktmp = np.empty(Mtmp.shape, dtype=Mtmp.dtype)
diff --git a/ot/da.py b/ot/da.py
index 0fdd3be..90e9e92 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -748,115 +748,6 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None,
         return A, b
 
 
-def emd_laplace(a, b, xs, xt, M, sim, eta, alpha,
-                numItermax, stopThr, numInnerItermax,
-                stopInnerThr, log=False, verbose=False, **kwargs):
-    r"""Solve the optimal transport problem (OT) with Laplacian regularization
-
-    .. math::
-        \gamma = arg\min_\gamma <\gamma,M>_F + eta\Omega_\alpha(\gamma)
-
-        s.t.\ \gamma 1 = a
-
-             \gamma^T 1= b
-
-             \gamma\geq 0
-
-    where:
-
-    - a and b are source and target weights (sum to 1)
-    - xs and xt are source and target samples
-    - M is the (ns,nt) metric cost matrix
-    - :math:`\Omega_\alpha` is the Laplacian regularization term
-      :math:`\Omega_\alpha = (1-\alpha)/n_s^2\sum_{i,j}S^s_{i,j}\|T(\mathbf{x}^s_i)-T(\mathbf{x}^s_j)\|^2+\alpha/n_t^2\sum_{i,j}S^t_{i,j}^'\|T(\mathbf{x}^t_i)-T(\mathbf{x}^t_j)\|^2`
-      with :math:`S^s_{i,j}, S^t_{i,j}` denoting source and target similarity matrices and :math:`T(\cdot)` being a barycentric mapping
-
-    The algorithm used for solving the problem is the conditional gradient algorithm as proposed in [5].
-
-    Parameters
-    ----------
-    a : np.ndarray (ns,)
-        samples weights in the source domain
-    b : np.ndarray (nt,)
-        samples weights in the target domain
-    xs : np.ndarray (ns,d)
-        samples in the source domain
-    xt : np.ndarray (nt,d)
-        samples in the target domain
-    M : np.ndarray (ns,nt)
-        loss matrix
-    eta : float
-        Regularization term for Laplacian regularization
-    alpha : float
-        Regularization term  for source domain's importance in regularization
-    numItermax : int, optional
-        Max number of iterations
-    stopThr : float, optional
-        Stop threshold on error (inner emd solver) (>0)
-    numInnerItermax : int, optional
-        Max number of iterations (inner CG solver)
-    stopInnerThr : float, optional
-        Stop threshold on error (inner CG solver) (>0)
-    verbose : bool, optional
-        Print information along iterations
-    log : bool, optional
-        record log if True
-
-
-    Returns
-    -------
-    gamma : (ns x nt) ndarray
-        Optimal transportation matrix for the given parameters
-    log : dict
-        log dictionary return only if log==True in parameters
-
-
-    References
-    ----------
-
-    .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
-       "Optimal Transport for Domain Adaptation," in IEEE
-       Transactions on Pattern Analysis and Machine Intelligence ,
-       vol.PP, no.99, pp.1-1
-
-    See Also
-    --------
-    ot.lp.emd : Unregularized OT
-    ot.optim.cg : General regularized OT
-
-    """
-    if sim == 'gauss':
-        if 'rbfparam' not in kwargs:
-            kwargs['rbfparam'] = 1 / (2 * (np.mean(dist(xs, xs, 'sqeuclidean')) ** 2))
-        sS = kernel(xs, xs, method=kwargs['sim'], sigma=kwargs['rbfparam'])
-        sT = kernel(xt, xt, method=kwargs['sim'], sigma=kwargs['rbfparam'])
-
-    elif sim == 'knn':
-        if 'nn' not in kwargs:
-            kwargs['nn'] = 5
-
-        from sklearn.neighbors import kneighbors_graph
-
-        sS = kneighbors_graph(xs, kwargs['nn']).toarray()
-        sS = (sS + sS.T) / 2
-        sT = kneighbors_graph(xt, kwargs['nn']).toarray()
-        sT = (sT + sT.T) / 2
-
-    lS = laplacian(sS)
-    lT = laplacian(sT)
-
-    def f(G):
-        return alpha * np.trace(np.dot(xt.T, np.dot(G.T, np.dot(lS, np.dot(G, xt))))) \
-            + (1 - alpha) * np.trace(np.dot(xs.T, np.dot(G, np.dot(lT, np.dot(G.T, xs)))))
-
-    def df(G):
-        return alpha * np.dot(lS + lS.T, np.dot(G, np.dot(xt, xt.T)))\
-            + (1 - alpha) * np.dot(xs, np.dot(xs.T, np.dot(G, lT + lT.T)))
-
-    return cg(a, b, M, reg=eta, f=f, df=df, G0=None, numItermax=numItermax, numItermaxEmd=numInnerItermax,
-              stopThr=stopThr, stopThr2=stopInnerThr, verbose=verbose, log=log)
-
-
 def distribution_estimation_uniform(X):
     """estimates a uniform distribution from an array of samples X
 
@@ -1603,113 +1494,6 @@ class SinkhornLpl1Transport(BaseTransport):
         return self
 
 
-class EMDLaplaceTransport(BaseTransport):
-
-    """Domain Adapatation OT method based on Earth Mover's Distance with Laplacian regularization
-
-    Parameters
-    ----------
-    reg_lap : float, optional (default=1)
-        Laplacian regularization parameter
-    reg_src : float, optional (default=0.5)
-        Source relative importance in regularization
-    metric : string, optional (default="sqeuclidean")
-        The ground metric for the Wasserstein problem
-    norm : string, optional (default=None)
-        If given, normalize the ground metric to avoid numerical errors that
-        can occur with large metric values.
-    similarity : string, optional (default="knn")
-        The similarity to use either knn or gaussian
-    max_iter : int, optional (default=100)
-        Max number of BCD iterations
-    tol : float, optional (default=1e-5)
-        Stop threshold on relative loss decrease (>0)
-    max_inner_iter : int, optional (default=10)
-        Max number of iterations (inner CG solver)
-    inner_tol : float, optional (default=1e-6)
-        Stop threshold on error (inner CG solver) (>0)
-    log : int, optional (default=False)
-        Controls the logs of the optimization algorithm
-    distribution_estimation : callable, optional (defaults to the uniform)
-        The kind of distribution estimation to employ
-    out_of_sample_map : string, optional (default="ferradans")
-        The kind of out of sample mapping to apply to transport samples
-        from a domain into another one. Currently the only possible option is
-        "ferradans" which uses the method proposed in [6].
-
-    Attributes
-    ----------
-    coupling_ : array-like, shape (n_source_samples, n_target_samples)
-        The optimal coupling
-
-    References
-    ----------
-    .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
-           "Optimal Transport for Domain Adaptation," in IEEE Transactions
-           on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
-    """
-
-    def __init__(self, reg_lap=1., reg_src=1., alpha=0.5,
-                 metric="sqeuclidean", norm=None, similarity="knn", max_iter=100, tol=1e-9,
-                 max_inner_iter=100000, inner_tol=1e-9, log=False, verbose=False,
-                 distribution_estimation=distribution_estimation_uniform,
-                 out_of_sample_map='ferradans'):
-        self.reg_lap = reg_lap
-        self.reg_src = reg_src
-        self.alpha = alpha
-        self.metric = metric
-        self.norm = norm
-        self.similarity = similarity
-        self.max_iter = max_iter
-        self.tol = tol
-        self.max_inner_iter = max_inner_iter
-        self.inner_tol = inner_tol
-        self.log = log
-        self.verbose = verbose
-        self.distribution_estimation = distribution_estimation
-        self.out_of_sample_map = out_of_sample_map
-
-    def fit(self, Xs, ys=None, Xt=None, yt=None):
-        """Build a coupling matrix from source and target sets of samples
-        (Xs, ys) and (Xt, yt)
-
-        Parameters
-        ----------
-        Xs : array-like, shape (n_source_samples, n_features)
-            The training input samples.
-        ys : array-like, shape (n_source_samples,)
-            The class labels
-        Xt : array-like, shape (n_target_samples, n_features)
-            The training input samples.
-        yt : array-like, shape (n_target_samples,)
-            The class labels. If some target samples are unlabeled, fill the
-            yt's elements with -1.
-
-            Warning: Note that, due to this convention -1 cannot be used as a
-            class label
-
-        Returns
-        -------
-        self : object
-            Returns self.
-        """
-
-        super(EMDLaplaceTransport, self).fit(Xs, ys, Xt, yt)
-
-        returned_ = emd_laplace(a=self.mu_s, b=self.mu_t, xs=self.xs_,
-                                xt=self.xt_, M=self.cost_, sim=self.similarity, eta=self.reg_lap, alpha=self.reg_src,
-                                numItermax=self.max_iter, stopThr=self.tol, numInnerItermax=self.max_inner_iter,
-                                stopInnerThr=self.inner_tol, log=self.log, verbose=self.verbose)
-
-        # coupling estimation
-        if self.log:
-            self.coupling_, self.log_ = returned_
-        else:
-            self.coupling_ = returned_
-            self.log_ = dict()
-        return self
-
-
 class SinkhornL1l2Transport(BaseTransport):
 
     """Domain Adapatation OT method based on sinkhorn algorithm +
diff --git a/test/test_da.py b/test/test_da.py
index 4eaf193..1517cec 100644
--- a/test/test_da.py
+++ b/test/test_da.py
@@ -601,57 +601,3 @@ def test_jcpot_transport_class():
 
     # check that the oos method is working
     assert_equal(transp_Xs_new.shape, Xs_new.shape)
-
-
-def test_emd_laplace_class():
-    """test_emd_laplace_transport
-    """
-    ns = 150
-    nt = 200
-
-    Xs, ys = make_data_classif('3gauss', ns)
-    Xt, yt = make_data_classif('3gauss2', nt)
-
-    otda = ot.da.EMDLaplaceTransport(reg_lap=0.01, max_iter=1000, tol=1e-9, verbose=False, log=True)
-
-    # test its computed
-    otda.fit(Xs=Xs, ys=ys, Xt=Xt)
-
-    assert hasattr(otda, "coupling_")
-    assert hasattr(otda, "log_")
-
-    # test dimensions of coupling
-    assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
-
-    # test all margin constraints
-    mu_s = unif(ns)
-    mu_t = unif(nt)
-
-    assert_allclose(
-        np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
-    assert_allclose(
-        np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
-
-    # test transform
-    transp_Xs = otda.transform(Xs=Xs)
-    [assert_equal(x.shape, y.shape) for x, y in zip(transp_Xs, Xs)]
-
-    Xs_new, _ = make_data_classif('3gauss', ns + 1)
-    transp_Xs_new = otda.transform(Xs_new)
-
-    # check that the oos method is working
-    assert_equal(transp_Xs_new.shape, Xs_new.shape)
-
-    # test inverse transform
-    transp_Xt = otda.inverse_transform(Xt=Xt)
-    assert_equal(transp_Xt.shape, Xt.shape)
-
-    Xt_new, _ = make_data_classif('3gauss2', nt + 1)
-    transp_Xt_new = otda.inverse_transform(Xt=Xt_new)
-
-    # check that the oos method is working
-    assert_equal(transp_Xt_new.shape, Xt_new.shape)
-
-    # test fit_transform
-    transp_Xs = otda.fit_transform(Xs=Xs, Xt=Xt)
-    assert_equal(transp_Xs.shape, Xs.shape)
author	ievred <ievgen.redko@univ-st-etienne.fr>	2020-04-08 10:08:47 +0200
committer	ievred <ievgen.redko@univ-st-etienne.fr>	2020-04-08 10:08:47 +0200
commit	c68b52d1623683e86555484bf9a4875a66957bb6 (patch)
tree	e7727a19ed7ba3a47a1df1ec893d0bc27c2eec57
parent	2c9f992157844d6253a302905417e86580ac6b12 (diff)