summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRĂ©mi Flamary <remi.flamary@gmail.com>2017-09-07 14:28:54 +0200
committerGitHub <noreply@github.com>2017-09-07 14:28:54 +0200
commit62dcfbfb78a2be24379cd5cdb4aec70d8c4befaa (patch)
tree61bd6b7408dd714ee6643c033bba745af0197059
parent16697047eff9326a0ecb483317c13a854a3d3a71 (diff)
parent2097116c7db725a88876d617e20a94f32627f7c9 (diff)
Merge pull request #28 from Slasnista/domain_adaptation_corrections
Domain adaptation corrections, closes #26
-rw-r--r--examples/da/plot_otda_semi_supervised.py147
-rw-r--r--ot/da.py74
-rw-r--r--test/test_da.py326
3 files changed, 384 insertions, 163 deletions
diff --git a/examples/da/plot_otda_semi_supervised.py b/examples/da/plot_otda_semi_supervised.py
new file mode 100644
index 0000000..8095c4d
--- /dev/null
+++ b/examples/da/plot_otda_semi_supervised.py
@@ -0,0 +1,147 @@
+# -*- coding: utf-8 -*-
+"""
+============================================
+OTDA unsupervised vs semi-supervised setting
+============================================
+
+This example introduces a semi supervised domain adaptation in a 2D setting.
+It explicits the problem of semi supervised domain adaptation and introduces
+some optimal transport approaches to solve it.
+
+Quantities such as optimal couplings, greater coupling coefficients and
+transported samples are represented in order to give a visual understanding
+of what the transport methods are doing.
+"""
+
+# Authors: Remi Flamary <remi.flamary@unice.fr>
+# Stanislas Chambon <stan.chambon@gmail.com>
+#
+# License: MIT License
+
+import matplotlib.pylab as pl
+import ot
+
+
+##############################################################################
+# generate data
+##############################################################################
+
+n_samples_source = 150
+n_samples_target = 150
+
+Xs, ys = ot.datasets.get_data_classif('3gauss', n_samples_source)
+Xt, yt = ot.datasets.get_data_classif('3gauss2', n_samples_target)
+
+
+##############################################################################
+# Transport source samples onto target samples
+##############################################################################
+
+# unsupervised domain adaptation
+ot_sinkhorn_un = ot.da.SinkhornTransport(reg_e=1e-1)
+ot_sinkhorn_un.fit(Xs=Xs, Xt=Xt)
+transp_Xs_sinkhorn_un = ot_sinkhorn_un.transform(Xs=Xs)
+
+# semi-supervised domain adaptation
+ot_sinkhorn_semi = ot.da.SinkhornTransport(reg_e=1e-1)
+ot_sinkhorn_semi.fit(Xs=Xs, Xt=Xt, ys=ys, yt=yt)
+transp_Xs_sinkhorn_semi = ot_sinkhorn_semi.transform(Xs=Xs)
+
+# semi supervised DA uses available labaled target samples to modify the cost
+# matrix involved in the OT problem. The cost of transporting a source sample
+# of class A onto a target sample of class B != A is set to infinite, or a
+# very large value
+
+# note that in the present case we consider that all the target samples are
+# labeled. For daily applications, some target sample might not have labels,
+# in this case the element of yt corresponding to these samples should be
+# filled with -1.
+
+# Warning: we recall that -1 cannot be used as a class label
+
+
+##############################################################################
+# Fig 1 : plots source and target samples + matrix of pairwise distance
+##############################################################################
+
+pl.figure(1, figsize=(10, 10))
+pl.subplot(2, 2, 1)
+pl.scatter(Xs[:, 0], Xs[:, 1], c=ys, marker='+', label='Source samples')
+pl.xticks([])
+pl.yticks([])
+pl.legend(loc=0)
+pl.title('Source samples')
+
+pl.subplot(2, 2, 2)
+pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', label='Target samples')
+pl.xticks([])
+pl.yticks([])
+pl.legend(loc=0)
+pl.title('Target samples')
+
+pl.subplot(2, 2, 3)
+pl.imshow(ot_sinkhorn_un.cost_, interpolation='nearest')
+pl.xticks([])
+pl.yticks([])
+pl.title('Cost matrix - unsupervised DA')
+
+pl.subplot(2, 2, 4)
+pl.imshow(ot_sinkhorn_semi.cost_, interpolation='nearest')
+pl.xticks([])
+pl.yticks([])
+pl.title('Cost matrix - semisupervised DA')
+
+pl.tight_layout()
+
+# the optimal coupling in the semi-supervised DA case will exhibit " shape
+# similar" to the cost matrix, (block diagonal matrix)
+
+
+##############################################################################
+# Fig 2 : plots optimal couplings for the different methods
+##############################################################################
+
+pl.figure(2, figsize=(8, 4))
+
+pl.subplot(1, 2, 1)
+pl.imshow(ot_sinkhorn_un.coupling_, interpolation='nearest')
+pl.xticks([])
+pl.yticks([])
+pl.title('Optimal coupling\nUnsupervised DA')
+
+pl.subplot(1, 2, 2)
+pl.imshow(ot_sinkhorn_semi.coupling_, interpolation='nearest')
+pl.xticks([])
+pl.yticks([])
+pl.title('Optimal coupling\nSemi-supervised DA')
+
+pl.tight_layout()
+
+
+##############################################################################
+# Fig 3 : plot transported samples
+##############################################################################
+
+# display transported samples
+pl.figure(4, figsize=(8, 4))
+pl.subplot(1, 2, 1)
+pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o',
+ label='Target samples', alpha=0.5)
+pl.scatter(transp_Xs_sinkhorn_un[:, 0], transp_Xs_sinkhorn_un[:, 1], c=ys,
+ marker='+', label='Transp samples', s=30)
+pl.title('Transported samples\nEmdTransport')
+pl.legend(loc=0)
+pl.xticks([])
+pl.yticks([])
+
+pl.subplot(1, 2, 2)
+pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o',
+ label='Target samples', alpha=0.5)
+pl.scatter(transp_Xs_sinkhorn_semi[:, 0], transp_Xs_sinkhorn_semi[:, 1], c=ys,
+ marker='+', label='Transp samples', s=30)
+pl.title('Transported samples\nSinkhornTransport')
+pl.xticks([])
+pl.yticks([])
+
+pl.tight_layout()
+pl.show()
diff --git a/ot/da.py b/ot/da.py
index 564c7b7..1d3d0ba 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -966,8 +966,12 @@ class BaseTransport(BaseEstimator):
The class labels
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
- yt : array-like, shape (n_labeled_target_samples,)
- The class labels
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
Returns
-------
@@ -989,7 +993,7 @@ class BaseTransport(BaseEstimator):
# assumes labeled source samples occupy the first rows
# and labeled target samples occupy the first columns
- classes = np.unique(ys)
+ classes = [c for c in np.unique(ys) if c != -1]
for c in classes:
idx_s = np.where((ys != c) & (ys != -1))
idx_t = np.where(yt == c)
@@ -1023,8 +1027,12 @@ class BaseTransport(BaseEstimator):
The class labels
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
- yt : array-like, shape (n_labeled_target_samples,)
- The class labels
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
Returns
-------
@@ -1045,8 +1053,12 @@ class BaseTransport(BaseEstimator):
The class labels
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
- yt : array-like, shape (n_labeled_target_samples,)
- The class labels
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
batch_size : int, optional (default=128)
The batch size for out of sample inverse transform
@@ -1110,8 +1122,12 @@ class BaseTransport(BaseEstimator):
The class labels
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
- yt : array-like, shape (n_labeled_target_samples,)
- The class labels
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
batch_size : int, optional (default=128)
The batch size for out of sample inverse transform
@@ -1241,8 +1257,12 @@ class SinkhornTransport(BaseTransport):
The class labels
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
- yt : array-like, shape (n_labeled_target_samples,)
- The class labels
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
Returns
-------
@@ -1333,8 +1353,12 @@ class EMDTransport(BaseTransport):
The class labels
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
- yt : array-like, shape (n_labeled_target_samples,)
- The class labels
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
Returns
-------
@@ -1434,8 +1458,12 @@ class SinkhornLpl1Transport(BaseTransport):
The class labels
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
- yt : array-like, shape (n_labeled_target_samples,)
- The class labels
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
Returns
-------
@@ -1545,8 +1573,12 @@ class SinkhornL1l2Transport(BaseTransport):
The class labels
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
- yt : array-like, shape (n_labeled_target_samples,)
- The class labels
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
Returns
-------
@@ -1662,8 +1694,12 @@ class MappingTransport(BaseEstimator):
The class labels
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
- yt : array-like, shape (n_labeled_target_samples,)
- The class labels
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
Returns
-------
diff --git a/test/test_da.py b/test/test_da.py
index 104a798..593dc53 100644
--- a/test/test_da.py
+++ b/test/test_da.py
@@ -22,60 +22,68 @@ def test_sinkhorn_lpl1_transport_class():
Xs, ys = get_data_classif('3gauss', ns)
Xt, yt = get_data_classif('3gauss2', nt)
- clf = ot.da.SinkhornLpl1Transport()
+ otda = ot.da.SinkhornLpl1Transport()
# test its computed
- clf.fit(Xs=Xs, ys=ys, Xt=Xt)
- assert hasattr(clf, "cost_")
- assert hasattr(clf, "coupling_")
+ otda.fit(Xs=Xs, ys=ys, Xt=Xt)
+ assert hasattr(otda, "cost_")
+ assert hasattr(otda, "coupling_")
# test dimensions of coupling
- assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
- assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(otda.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
# test margin constraints
mu_s = unif(ns)
mu_t = unif(nt)
- assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
- assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
# test transform
- transp_Xs = clf.transform(Xs=Xs)
+ transp_Xs = otda.transform(Xs=Xs)
assert_equal(transp_Xs.shape, Xs.shape)
Xs_new, _ = get_data_classif('3gauss', ns + 1)
- transp_Xs_new = clf.transform(Xs_new)
+ transp_Xs_new = otda.transform(Xs_new)
# check that the oos method is working
assert_equal(transp_Xs_new.shape, Xs_new.shape)
# test inverse transform
- transp_Xt = clf.inverse_transform(Xt=Xt)
+ transp_Xt = otda.inverse_transform(Xt=Xt)
assert_equal(transp_Xt.shape, Xt.shape)
Xt_new, _ = get_data_classif('3gauss2', nt + 1)
- transp_Xt_new = clf.inverse_transform(Xt=Xt_new)
+ transp_Xt_new = otda.inverse_transform(Xt=Xt_new)
# check that the oos method is working
assert_equal(transp_Xt_new.shape, Xt_new.shape)
# test fit_transform
- transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt)
+ transp_Xs = otda.fit_transform(Xs=Xs, ys=ys, Xt=Xt)
assert_equal(transp_Xs.shape, Xs.shape)
- # test semi supervised mode
- clf = ot.da.SinkhornLpl1Transport()
- clf.fit(Xs=Xs, ys=ys, Xt=Xt)
- n_unsup = np.sum(clf.cost_)
+ # test unsupervised vs semi-supervised mode
+ otda_unsup = ot.da.SinkhornLpl1Transport()
+ otda_unsup.fit(Xs=Xs, ys=ys, Xt=Xt)
+ n_unsup = np.sum(otda_unsup.cost_)
- # test semi supervised mode
- clf = ot.da.SinkhornLpl1Transport()
- clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt)
- assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
- n_semisup = np.sum(clf.cost_)
+ otda_semi = ot.da.SinkhornLpl1Transport()
+ otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt)
+ assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
+ n_semisup = np.sum(otda_semi.cost_)
+ # check that the cost matrix norms are indeed different
assert n_unsup != n_semisup, "semisupervised mode not working"
+ # check that the coupling forbids mass transport between labeled source
+ # and labeled target samples
+ mass_semi = np.sum(
+ otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max])
+ assert mass_semi == 0, "semisupervised mode not working"
+
def test_sinkhorn_l1l2_transport_class():
"""test_sinkhorn_transport
@@ -87,65 +95,75 @@ def test_sinkhorn_l1l2_transport_class():
Xs, ys = get_data_classif('3gauss', ns)
Xt, yt = get_data_classif('3gauss2', nt)
- clf = ot.da.SinkhornL1l2Transport()
+ otda = ot.da.SinkhornL1l2Transport()
# test its computed
- clf.fit(Xs=Xs, ys=ys, Xt=Xt)
- assert hasattr(clf, "cost_")
- assert hasattr(clf, "coupling_")
- assert hasattr(clf, "log_")
+ otda.fit(Xs=Xs, ys=ys, Xt=Xt)
+ assert hasattr(otda, "cost_")
+ assert hasattr(otda, "coupling_")
+ assert hasattr(otda, "log_")
# test dimensions of coupling
- assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
- assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(otda.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
# test margin constraints
mu_s = unif(ns)
mu_t = unif(nt)
- assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
- assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
# test transform
- transp_Xs = clf.transform(Xs=Xs)
+ transp_Xs = otda.transform(Xs=Xs)
assert_equal(transp_Xs.shape, Xs.shape)
Xs_new, _ = get_data_classif('3gauss', ns + 1)
- transp_Xs_new = clf.transform(Xs_new)
+ transp_Xs_new = otda.transform(Xs_new)
# check that the oos method is working
assert_equal(transp_Xs_new.shape, Xs_new.shape)
# test inverse transform
- transp_Xt = clf.inverse_transform(Xt=Xt)
+ transp_Xt = otda.inverse_transform(Xt=Xt)
assert_equal(transp_Xt.shape, Xt.shape)
Xt_new, _ = get_data_classif('3gauss2', nt + 1)
- transp_Xt_new = clf.inverse_transform(Xt=Xt_new)
+ transp_Xt_new = otda.inverse_transform(Xt=Xt_new)
# check that the oos method is working
assert_equal(transp_Xt_new.shape, Xt_new.shape)
# test fit_transform
- transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt)
+ transp_Xs = otda.fit_transform(Xs=Xs, ys=ys, Xt=Xt)
assert_equal(transp_Xs.shape, Xs.shape)
- # test semi supervised mode
- clf = ot.da.SinkhornL1l2Transport()
- clf.fit(Xs=Xs, ys=ys, Xt=Xt)
- n_unsup = np.sum(clf.cost_)
+ # test unsupervised vs semi-supervised mode
+ otda_unsup = ot.da.SinkhornL1l2Transport()
+ otda_unsup.fit(Xs=Xs, ys=ys, Xt=Xt)
+ n_unsup = np.sum(otda_unsup.cost_)
- # test semi supervised mode
- clf = ot.da.SinkhornL1l2Transport()
- clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt)
- assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
- n_semisup = np.sum(clf.cost_)
+ otda_semi = ot.da.SinkhornL1l2Transport()
+ otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt)
+ assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
+ n_semisup = np.sum(otda_semi.cost_)
+ # check that the cost matrix norms are indeed different
assert n_unsup != n_semisup, "semisupervised mode not working"
+ # check that the coupling forbids mass transport between labeled source
+ # and labeled target samples
+ mass_semi = np.sum(
+ otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max])
+ mass_semi = otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max]
+ assert_allclose(mass_semi, np.zeros_like(mass_semi),
+ rtol=1e-9, atol=1e-9)
+
# check everything runs well with log=True
- clf = ot.da.SinkhornL1l2Transport(log=True)
- clf.fit(Xs=Xs, ys=ys, Xt=Xt)
- assert len(clf.log_.keys()) != 0
+ otda = ot.da.SinkhornL1l2Transport(log=True)
+ otda.fit(Xs=Xs, ys=ys, Xt=Xt)
+ assert len(otda.log_.keys()) != 0
def test_sinkhorn_transport_class():
@@ -158,65 +176,73 @@ def test_sinkhorn_transport_class():
Xs, ys = get_data_classif('3gauss', ns)
Xt, yt = get_data_classif('3gauss2', nt)
- clf = ot.da.SinkhornTransport()
+ otda = ot.da.SinkhornTransport()
# test its computed
- clf.fit(Xs=Xs, Xt=Xt)
- assert hasattr(clf, "cost_")
- assert hasattr(clf, "coupling_")
- assert hasattr(clf, "log_")
+ otda.fit(Xs=Xs, Xt=Xt)
+ assert hasattr(otda, "cost_")
+ assert hasattr(otda, "coupling_")
+ assert hasattr(otda, "log_")
# test dimensions of coupling
- assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
- assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(otda.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
# test margin constraints
mu_s = unif(ns)
mu_t = unif(nt)
- assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
- assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
# test transform
- transp_Xs = clf.transform(Xs=Xs)
+ transp_Xs = otda.transform(Xs=Xs)
assert_equal(transp_Xs.shape, Xs.shape)
Xs_new, _ = get_data_classif('3gauss', ns + 1)
- transp_Xs_new = clf.transform(Xs_new)
+ transp_Xs_new = otda.transform(Xs_new)
# check that the oos method is working
assert_equal(transp_Xs_new.shape, Xs_new.shape)
# test inverse transform
- transp_Xt = clf.inverse_transform(Xt=Xt)
+ transp_Xt = otda.inverse_transform(Xt=Xt)
assert_equal(transp_Xt.shape, Xt.shape)
Xt_new, _ = get_data_classif('3gauss2', nt + 1)
- transp_Xt_new = clf.inverse_transform(Xt=Xt_new)
+ transp_Xt_new = otda.inverse_transform(Xt=Xt_new)
# check that the oos method is working
assert_equal(transp_Xt_new.shape, Xt_new.shape)
# test fit_transform
- transp_Xs = clf.fit_transform(Xs=Xs, Xt=Xt)
+ transp_Xs = otda.fit_transform(Xs=Xs, Xt=Xt)
assert_equal(transp_Xs.shape, Xs.shape)
- # test semi supervised mode
- clf = ot.da.SinkhornTransport()
- clf.fit(Xs=Xs, Xt=Xt)
- n_unsup = np.sum(clf.cost_)
+ # test unsupervised vs semi-supervised mode
+ otda_unsup = ot.da.SinkhornTransport()
+ otda_unsup.fit(Xs=Xs, Xt=Xt)
+ n_unsup = np.sum(otda_unsup.cost_)
- # test semi supervised mode
- clf = ot.da.SinkhornTransport()
- clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt)
- assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
- n_semisup = np.sum(clf.cost_)
+ otda_semi = ot.da.SinkhornTransport()
+ otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt)
+ assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
+ n_semisup = np.sum(otda_semi.cost_)
+ # check that the cost matrix norms are indeed different
assert n_unsup != n_semisup, "semisupervised mode not working"
+ # check that the coupling forbids mass transport between labeled source
+ # and labeled target samples
+ mass_semi = np.sum(
+ otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max])
+ assert mass_semi == 0, "semisupervised mode not working"
+
# check everything runs well with log=True
- clf = ot.da.SinkhornTransport(log=True)
- clf.fit(Xs=Xs, ys=ys, Xt=Xt)
- assert len(clf.log_.keys()) != 0
+ otda = ot.da.SinkhornTransport(log=True)
+ otda.fit(Xs=Xs, ys=ys, Xt=Xt)
+ assert len(otda.log_.keys()) != 0
def test_emd_transport_class():
@@ -229,60 +255,72 @@ def test_emd_transport_class():
Xs, ys = get_data_classif('3gauss', ns)
Xt, yt = get_data_classif('3gauss2', nt)
- clf = ot.da.EMDTransport()
+ otda = ot.da.EMDTransport()
# test its computed
- clf.fit(Xs=Xs, Xt=Xt)
- assert hasattr(clf, "cost_")
- assert hasattr(clf, "coupling_")
+ otda.fit(Xs=Xs, Xt=Xt)
+ assert hasattr(otda, "cost_")
+ assert hasattr(otda, "coupling_")
# test dimensions of coupling
- assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
- assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(otda.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
# test margin constraints
mu_s = unif(ns)
mu_t = unif(nt)
- assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
- assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
# test transform
- transp_Xs = clf.transform(Xs=Xs)
+ transp_Xs = otda.transform(Xs=Xs)
assert_equal(transp_Xs.shape, Xs.shape)
Xs_new, _ = get_data_classif('3gauss', ns + 1)
- transp_Xs_new = clf.transform(Xs_new)
+ transp_Xs_new = otda.transform(Xs_new)
# check that the oos method is working
assert_equal(transp_Xs_new.shape, Xs_new.shape)
# test inverse transform
- transp_Xt = clf.inverse_transform(Xt=Xt)
+ transp_Xt = otda.inverse_transform(Xt=Xt)
assert_equal(transp_Xt.shape, Xt.shape)
Xt_new, _ = get_data_classif('3gauss2', nt + 1)
- transp_Xt_new = clf.inverse_transform(Xt=Xt_new)
+ transp_Xt_new = otda.inverse_transform(Xt=Xt_new)
# check that the oos method is working
assert_equal(transp_Xt_new.shape, Xt_new.shape)
# test fit_transform
- transp_Xs = clf.fit_transform(Xs=Xs, Xt=Xt)
+ transp_Xs = otda.fit_transform(Xs=Xs, Xt=Xt)
assert_equal(transp_Xs.shape, Xs.shape)
- # test semi supervised mode
- clf = ot.da.EMDTransport()
- clf.fit(Xs=Xs, Xt=Xt)
- n_unsup = np.sum(clf.cost_)
+ # test unsupervised vs semi-supervised mode
+ otda_unsup = ot.da.EMDTransport()
+ otda_unsup.fit(Xs=Xs, ys=ys, Xt=Xt)
+ n_unsup = np.sum(otda_unsup.cost_)
- # test semi supervised mode
- clf = ot.da.EMDTransport()
- clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt)
- assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
- n_semisup = np.sum(clf.cost_)
+ otda_semi = ot.da.EMDTransport()
+ otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt)
+ assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0])))
+ n_semisup = np.sum(otda_semi.cost_)
+ # check that the cost matrix norms are indeed different
assert n_unsup != n_semisup, "semisupervised mode not working"
+ # check that the coupling forbids mass transport between labeled source
+ # and labeled target samples
+ mass_semi = np.sum(
+ otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max])
+ mass_semi = otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max]
+
+ # we need to use a small tolerance here, otherwise the test breaks
+ assert_allclose(mass_semi, np.zeros_like(mass_semi),
+ rtol=1e-2, atol=1e-2)
+
def test_mapping_transport_class():
"""test_mapping_transport
@@ -300,47 +338,51 @@ def test_mapping_transport_class():
##########################################################################
# check computation and dimensions if bias == False
- clf = ot.da.MappingTransport(kernel="linear", bias=False)
- clf.fit(Xs=Xs, Xt=Xt)
- assert hasattr(clf, "coupling_")
- assert hasattr(clf, "mapping_")
- assert hasattr(clf, "log_")
+ otda = ot.da.MappingTransport(kernel="linear", bias=False)
+ otda.fit(Xs=Xs, Xt=Xt)
+ assert hasattr(otda, "coupling_")
+ assert hasattr(otda, "mapping_")
+ assert hasattr(otda, "log_")
- assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
- assert_equal(clf.mapping_.shape, ((Xs.shape[1], Xt.shape[1])))
+ assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(otda.mapping_.shape, ((Xs.shape[1], Xt.shape[1])))
# test margin constraints
mu_s = unif(ns)
mu_t = unif(nt)
- assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
- assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
# test transform
- transp_Xs = clf.transform(Xs=Xs)
+ transp_Xs = otda.transform(Xs=Xs)
assert_equal(transp_Xs.shape, Xs.shape)
- transp_Xs_new = clf.transform(Xs_new)
+ transp_Xs_new = otda.transform(Xs_new)
# check that the oos method is working
assert_equal(transp_Xs_new.shape, Xs_new.shape)
# check computation and dimensions if bias == True
- clf = ot.da.MappingTransport(kernel="linear", bias=True)
- clf.fit(Xs=Xs, Xt=Xt)
- assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
- assert_equal(clf.mapping_.shape, ((Xs.shape[1] + 1, Xt.shape[1])))
+ otda = ot.da.MappingTransport(kernel="linear", bias=True)
+ otda.fit(Xs=Xs, Xt=Xt)
+ assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(otda.mapping_.shape, ((Xs.shape[1] + 1, Xt.shape[1])))
# test margin constraints
mu_s = unif(ns)
mu_t = unif(nt)
- assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
- assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
# test transform
- transp_Xs = clf.transform(Xs=Xs)
+ transp_Xs = otda.transform(Xs=Xs)
assert_equal(transp_Xs.shape, Xs.shape)
- transp_Xs_new = clf.transform(Xs_new)
+ transp_Xs_new = otda.transform(Xs_new)
# check that the oos method is working
assert_equal(transp_Xs_new.shape, Xs_new.shape)
@@ -350,52 +392,56 @@ def test_mapping_transport_class():
##########################################################################
# check computation and dimensions if bias == False
- clf = ot.da.MappingTransport(kernel="gaussian", bias=False)
- clf.fit(Xs=Xs, Xt=Xt)
+ otda = ot.da.MappingTransport(kernel="gaussian", bias=False)
+ otda.fit(Xs=Xs, Xt=Xt)
- assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
- assert_equal(clf.mapping_.shape, ((Xs.shape[0], Xt.shape[1])))
+ assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(otda.mapping_.shape, ((Xs.shape[0], Xt.shape[1])))
# test margin constraints
mu_s = unif(ns)
mu_t = unif(nt)
- assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
- assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
# test transform
- transp_Xs = clf.transform(Xs=Xs)
+ transp_Xs = otda.transform(Xs=Xs)
assert_equal(transp_Xs.shape, Xs.shape)
- transp_Xs_new = clf.transform(Xs_new)
+ transp_Xs_new = otda.transform(Xs_new)
# check that the oos method is working
assert_equal(transp_Xs_new.shape, Xs_new.shape)
# check computation and dimensions if bias == True
- clf = ot.da.MappingTransport(kernel="gaussian", bias=True)
- clf.fit(Xs=Xs, Xt=Xt)
- assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
- assert_equal(clf.mapping_.shape, ((Xs.shape[0] + 1, Xt.shape[1])))
+ otda = ot.da.MappingTransport(kernel="gaussian", bias=True)
+ otda.fit(Xs=Xs, Xt=Xt)
+ assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
+ assert_equal(otda.mapping_.shape, ((Xs.shape[0] + 1, Xt.shape[1])))
# test margin constraints
mu_s = unif(ns)
mu_t = unif(nt)
- assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
- assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
# test transform
- transp_Xs = clf.transform(Xs=Xs)
+ transp_Xs = otda.transform(Xs=Xs)
assert_equal(transp_Xs.shape, Xs.shape)
- transp_Xs_new = clf.transform(Xs_new)
+ transp_Xs_new = otda.transform(Xs_new)
# check that the oos method is working
assert_equal(transp_Xs_new.shape, Xs_new.shape)
# check everything runs well with log=True
- clf = ot.da.MappingTransport(kernel="gaussian", log=True)
- clf.fit(Xs=Xs, Xt=Xt)
- assert len(clf.log_.keys()) != 0
+ otda = ot.da.MappingTransport(kernel="gaussian", log=True)
+ otda.fit(Xs=Xs, Xt=Xt)
+ assert len(otda.log_.keys()) != 0
def test_otda():
@@ -424,7 +470,8 @@ def test_otda():
da_entrop.interp()
da_entrop.predict(xs)
- np.testing.assert_allclose(a, np.sum(da_entrop.G, 1), rtol=1e-3, atol=1e-3)
+ np.testing.assert_allclose(
+ a, np.sum(da_entrop.G, 1), rtol=1e-3, atol=1e-3)
np.testing.assert_allclose(b, np.sum(da_entrop.G, 0), rtol=1e-3, atol=1e-3)
# non-convex Group lasso regularization
@@ -458,12 +505,3 @@ def test_otda():
da_emd = ot.da.OTDA_mapping_kernel() # init class
da_emd.fit(xs, xt, numItermax=10) # fit distributions
da_emd.predict(xs) # interpolation of source samples
-
-
-# if __name__ == "__main__":
-
-# test_sinkhorn_transport_class()
-# test_emd_transport_class()
-# test_sinkhorn_l1l2_transport_class()
-# test_sinkhorn_lpl1_transport_class()
-# test_mapping_transport_class()