summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ot/da.py86
-rw-r--r--test/test_da.py59
2 files changed, 135 insertions, 10 deletions
diff --git a/ot/da.py b/ot/da.py
index 6b98a17..fb2fd36 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -1144,7 +1144,7 @@ class BaseTransport(BaseEstimator):
if np.array_equal(self.Xs, Xs):
# perform standard barycentric mapping
- transp = self.gamma_ / np.sum(self.gamma_, 1)[:, None]
+ transp = self.Coupling_ / np.sum(self.Coupling_, 1)[:, None]
# set nans to 0
transp[~ np.isfinite(transp)] = 0
@@ -1179,7 +1179,7 @@ class BaseTransport(BaseEstimator):
if np.array_equal(self.Xt, Xt):
# perform standard barycentric mapping
- transp_ = self.gamma_.T / np.sum(self.gamma_, 0)[:, None]
+ transp_ = self.Coupling_.T / np.sum(self.Coupling_, 0)[:, None]
# set nans to 0
transp_[~ np.isfinite(transp_)] = 0
@@ -1228,7 +1228,7 @@ class SinkhornTransport(BaseTransport):
Controls the logs of the optimization algorithm
Attributes
----------
- gamma_ : the optimal coupling
+ Coupling_ : the optimal coupling
References
----------
@@ -1254,7 +1254,6 @@ class SinkhornTransport(BaseTransport):
self.log = log
self.metric = metric
self.distribution_estimation = distribution_estimation
- self.method = "sinkhorn"
self.out_of_sample_map = out_of_sample_map
def fit(self, Xs=None, ys=None, Xt=None, yt=None):
@@ -1276,10 +1275,85 @@ class SinkhornTransport(BaseTransport):
Returns self.
"""
- self = super(SinkhornTransport, self).fit(Xs, ys, Xt, yt)
+ super(SinkhornTransport, self).fit(Xs, ys, Xt, yt)
# coupling estimation
- self.gamma_ = sinkhorn(
+ self.Coupling_ = sinkhorn(
a=self.mu_s, b=self.mu_t, M=self.Cost, reg=self.reg_e,
numItermax=self.max_iter, stopThr=self.tol,
verbose=self.verbose, log=self.log)
+
+
+class EMDTransport(BaseTransport):
+ """Domain Adapatation OT method based on Earth Mover's Distance
+ Parameters
+ ----------
+ mode : string, optional (default="unsupervised")
+ The DA mode. If "unsupervised" no target labels are taken into account
+ to modify the cost matrix. If "semisupervised" the target labels
+ are taken into account to set coefficients of the pairwise distance
+ matrix to 0 for row and columns indices that correspond to source and
+ target samples which share the same labels.
+ mapping : string, optional (default="barycentric")
+ The kind of mapping to apply to transport samples from a domain into
+ another one.
+ if "barycentric" only the samples used to estimate the coupling can
+ be transported from a domain to another one.
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ distribution : string, optional (default="uniform")
+ The kind of distribution estimation to employ
+ verbose : int, optional (default=0)
+ Controls the verbosity of the optimization algorithm
+ log : int, optional (default=0)
+ Controls the logs of the optimization algorithm
+ Attributes
+ ----------
+ Coupling_ : the optimal coupling
+
+ References
+ ----------
+ .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE Transactions
+ on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
+ """
+
+ def __init__(self, mode="unsupervised", verbose=False,
+ log=False, metric="sqeuclidean",
+ distribution_estimation=distribution_estimation_uniform,
+ out_of_sample_map='ferradans'):
+
+ self.mode = mode
+ self.verbose = verbose
+ self.log = log
+ self.metric = metric
+ self.distribution_estimation = distribution_estimation
+ self.out_of_sample_map = out_of_sample_map
+
+ def fit(self, Xs, ys=None, Xt=None, yt=None):
+ """Build a coupling matrix from source and target sets of samples
+ (Xs, ys) and (Xt, yt)
+ Parameters
+ ----------
+ Xs : array-like of shape = [n_source_samples, n_features]
+ The training input samples.
+ ys : array-like, shape = [n_source_samples]
+ The class labels
+ Xt : array-like of shape = [n_target_samples, n_features]
+ The training input samples.
+ yt : array-like, shape = [n_labeled_target_samples]
+ The class labels
+ Returns
+ -------
+ self : object
+ Returns self.
+ """
+
+ super(EMDTransport, self).fit(Xs, ys, Xt, yt)
+
+ # coupling estimation
+ self.Coupling_ = emd(
+ a=self.mu_s, b=self.mu_t, M=self.Cost,
+ # verbose=self.verbose,
+ # log=self.log
+ )
diff --git a/test/test_da.py b/test/test_da.py
index e7b4ed1..33b3695 100644
--- a/test/test_da.py
+++ b/test/test_da.py
@@ -13,7 +13,7 @@ from ot.utils import unif
np.random.seed(42)
-def test_sinkhorn_transport():
+def test_sinkhorn_transport_class():
"""test_sinkhorn_transport
"""
@@ -30,13 +30,59 @@ def test_sinkhorn_transport():
# test dimensions of coupling
assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0])))
- assert_equal(clf.gamma_.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
# test margin constraints
mu_s = unif(ns)
mu_t = unif(nt)
- assert_allclose(np.sum(clf.gamma_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
- assert_allclose(np.sum(clf.gamma_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
+ assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
+ assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
+
+ # test transform
+ transp_Xs = clf.transform(Xs=Xs)
+ assert_equal(transp_Xs.shape, Xs.shape)
+
+ Xs_new, _ = get_data_classif('3gauss', ns + 1)
+ transp_Xs_new = clf.transform(Xs_new)
+
+ # check that the oos method is not working
+ assert_equal(transp_Xs_new, Xs_new)
+
+ # test inverse transform
+ transp_Xt = clf.inverse_transform(Xt=Xt)
+ assert_equal(transp_Xt.shape, Xt.shape)
+
+ Xt_new, _ = get_data_classif('3gauss2', nt + 1)
+ transp_Xt_new = clf.inverse_transform(Xt=Xt_new)
+
+ # check that the oos method is not working and returns the input data
+ assert_equal(transp_Xt_new, Xt_new)
+
+
+def test_emd_transport_class():
+ """test_sinkhorn_transport
+ """
+
+ ns = 150
+ nt = 200
+
+ Xs, ys = get_data_classif('3gauss', ns)
+ Xt, yt = get_data_classif('3gauss2', nt)
+
+ clf = ot.da.EMDTransport()
+
+ # test its computed
+ clf.fit(Xs=Xs, Xt=Xt)
+
+ # test dimensions of coupling
+ assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
+
+ # test margin constraints
+ mu_s = unif(ns)
+ mu_t = unif(nt)
+ assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
+ assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
# test transform
transp_Xs = clf.transform(Xs=Xs)
@@ -119,3 +165,8 @@ def test_otda():
da_emd = ot.da.OTDA_mapping_kernel() # init class
da_emd.fit(xs, xt, numItermax=10) # fit distributions
da_emd.predict(xs) # interpolation of source samples
+
+
+if __name__ == "__main__":
+ test_sinkhorn_transport_class()
+ test_emd_transport_class()