diff options
author | Antoine Rolet <antoine.rolet@gmail.com> | 2017-09-09 12:40:09 +0900 |
---|---|---|
committer | Antoine Rolet <antoine.rolet@gmail.com> | 2017-09-09 12:40:09 +0900 |
commit | 619bb41a18a542ce768fd4ce3eb9240e9ad6650e (patch) | |
tree | cc60cccc304a8d9fcad31d42aab40513b1dce48d | |
parent | e58cd780ccf87736265e4e1a39afa3a167325ccc (diff) | |
parent | 62dcfbfb78a2be24379cd5cdb4aec70d8c4befaa (diff) |
Merge remote-tracking branch 'upstream/master' into ot_dual_variables
-rw-r--r-- | examples/da/plot_otda_semi_supervised.py | 147 | ||||
-rw-r--r-- | ot/da.py | 74 | ||||
-rw-r--r-- | test/test_da.py | 326 |
3 files changed, 384 insertions, 163 deletions
diff --git a/examples/da/plot_otda_semi_supervised.py b/examples/da/plot_otda_semi_supervised.py new file mode 100644 index 0000000..8095c4d --- /dev/null +++ b/examples/da/plot_otda_semi_supervised.py @@ -0,0 +1,147 @@ +# -*- coding: utf-8 -*- +""" +============================================ +OTDA unsupervised vs semi-supervised setting +============================================ + +This example introduces a semi supervised domain adaptation in a 2D setting. +It explicits the problem of semi supervised domain adaptation and introduces +some optimal transport approaches to solve it. + +Quantities such as optimal couplings, greater coupling coefficients and +transported samples are represented in order to give a visual understanding +of what the transport methods are doing. +""" + +# Authors: Remi Flamary <remi.flamary@unice.fr> +# Stanislas Chambon <stan.chambon@gmail.com> +# +# License: MIT License + +import matplotlib.pylab as pl +import ot + + +############################################################################## +# generate data +############################################################################## + +n_samples_source = 150 +n_samples_target = 150 + +Xs, ys = ot.datasets.get_data_classif('3gauss', n_samples_source) +Xt, yt = ot.datasets.get_data_classif('3gauss2', n_samples_target) + + +############################################################################## +# Transport source samples onto target samples +############################################################################## + +# unsupervised domain adaptation +ot_sinkhorn_un = ot.da.SinkhornTransport(reg_e=1e-1) +ot_sinkhorn_un.fit(Xs=Xs, Xt=Xt) +transp_Xs_sinkhorn_un = ot_sinkhorn_un.transform(Xs=Xs) + +# semi-supervised domain adaptation +ot_sinkhorn_semi = ot.da.SinkhornTransport(reg_e=1e-1) +ot_sinkhorn_semi.fit(Xs=Xs, Xt=Xt, ys=ys, yt=yt) +transp_Xs_sinkhorn_semi = ot_sinkhorn_semi.transform(Xs=Xs) + +# semi supervised DA uses available labaled target samples to modify the cost +# matrix involved in the OT problem. The cost of transporting a source sample +# of class A onto a target sample of class B != A is set to infinite, or a +# very large value + +# note that in the present case we consider that all the target samples are +# labeled. For daily applications, some target sample might not have labels, +# in this case the element of yt corresponding to these samples should be +# filled with -1. + +# Warning: we recall that -1 cannot be used as a class label + + +############################################################################## +# Fig 1 : plots source and target samples + matrix of pairwise distance +############################################################################## + +pl.figure(1, figsize=(10, 10)) +pl.subplot(2, 2, 1) +pl.scatter(Xs[:, 0], Xs[:, 1], c=ys, marker='+', label='Source samples') +pl.xticks([]) +pl.yticks([]) +pl.legend(loc=0) +pl.title('Source samples') + +pl.subplot(2, 2, 2) +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', label='Target samples') +pl.xticks([]) +pl.yticks([]) +pl.legend(loc=0) +pl.title('Target samples') + +pl.subplot(2, 2, 3) +pl.imshow(ot_sinkhorn_un.cost_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Cost matrix - unsupervised DA') + +pl.subplot(2, 2, 4) +pl.imshow(ot_sinkhorn_semi.cost_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Cost matrix - semisupervised DA') + +pl.tight_layout() + +# the optimal coupling in the semi-supervised DA case will exhibit " shape +# similar" to the cost matrix, (block diagonal matrix) + + +############################################################################## +# Fig 2 : plots optimal couplings for the different methods +############################################################################## + +pl.figure(2, figsize=(8, 4)) + +pl.subplot(1, 2, 1) +pl.imshow(ot_sinkhorn_un.coupling_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Optimal coupling\nUnsupervised DA') + +pl.subplot(1, 2, 2) +pl.imshow(ot_sinkhorn_semi.coupling_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Optimal coupling\nSemi-supervised DA') + +pl.tight_layout() + + +############################################################################## +# Fig 3 : plot transported samples +############################################################################## + +# display transported samples +pl.figure(4, figsize=(8, 4)) +pl.subplot(1, 2, 1) +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', + label='Target samples', alpha=0.5) +pl.scatter(transp_Xs_sinkhorn_un[:, 0], transp_Xs_sinkhorn_un[:, 1], c=ys, + marker='+', label='Transp samples', s=30) +pl.title('Transported samples\nEmdTransport') +pl.legend(loc=0) +pl.xticks([]) +pl.yticks([]) + +pl.subplot(1, 2, 2) +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', + label='Target samples', alpha=0.5) +pl.scatter(transp_Xs_sinkhorn_semi[:, 0], transp_Xs_sinkhorn_semi[:, 1], c=ys, + marker='+', label='Transp samples', s=30) +pl.title('Transported samples\nSinkhornTransport') +pl.xticks([]) +pl.yticks([]) + +pl.tight_layout() +pl.show() @@ -966,8 +966,12 @@ class BaseTransport(BaseEstimator): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label Returns ------- @@ -989,7 +993,7 @@ class BaseTransport(BaseEstimator): # assumes labeled source samples occupy the first rows # and labeled target samples occupy the first columns - classes = np.unique(ys) + classes = [c for c in np.unique(ys) if c != -1] for c in classes: idx_s = np.where((ys != c) & (ys != -1)) idx_t = np.where(yt == c) @@ -1023,8 +1027,12 @@ class BaseTransport(BaseEstimator): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label Returns ------- @@ -1045,8 +1053,12 @@ class BaseTransport(BaseEstimator): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label batch_size : int, optional (default=128) The batch size for out of sample inverse transform @@ -1110,8 +1122,12 @@ class BaseTransport(BaseEstimator): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label batch_size : int, optional (default=128) The batch size for out of sample inverse transform @@ -1241,8 +1257,12 @@ class SinkhornTransport(BaseTransport): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label Returns ------- @@ -1333,8 +1353,12 @@ class EMDTransport(BaseTransport): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label Returns ------- @@ -1434,8 +1458,12 @@ class SinkhornLpl1Transport(BaseTransport): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label Returns ------- @@ -1545,8 +1573,12 @@ class SinkhornL1l2Transport(BaseTransport): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label Returns ------- @@ -1662,8 +1694,12 @@ class MappingTransport(BaseEstimator): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label Returns ------- diff --git a/test/test_da.py b/test/test_da.py index 104a798..593dc53 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -22,60 +22,68 @@ def test_sinkhorn_lpl1_transport_class(): Xs, ys = get_data_classif('3gauss', ns) Xt, yt = get_data_classif('3gauss2', nt) - clf = ot.da.SinkhornLpl1Transport() + otda = ot.da.SinkhornLpl1Transport() # test its computed - clf.fit(Xs=Xs, ys=ys, Xt=Xt) - assert hasattr(clf, "cost_") - assert hasattr(clf, "coupling_") + otda.fit(Xs=Xs, ys=ys, Xt=Xt) + assert hasattr(otda, "cost_") + assert hasattr(otda, "coupling_") # test dimensions of coupling - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) Xs_new, _ = get_data_classif('3gauss', ns + 1) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) # test inverse transform - transp_Xt = clf.inverse_transform(Xt=Xt) + transp_Xt = otda.inverse_transform(Xt=Xt) assert_equal(transp_Xt.shape, Xt.shape) Xt_new, _ = get_data_classif('3gauss2', nt + 1) - transp_Xt_new = clf.inverse_transform(Xt=Xt_new) + transp_Xt_new = otda.inverse_transform(Xt=Xt_new) # check that the oos method is working assert_equal(transp_Xt_new.shape, Xt_new.shape) # test fit_transform - transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt) + transp_Xs = otda.fit_transform(Xs=Xs, ys=ys, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) - # test semi supervised mode - clf = ot.da.SinkhornLpl1Transport() - clf.fit(Xs=Xs, ys=ys, Xt=Xt) - n_unsup = np.sum(clf.cost_) + # test unsupervised vs semi-supervised mode + otda_unsup = ot.da.SinkhornLpl1Transport() + otda_unsup.fit(Xs=Xs, ys=ys, Xt=Xt) + n_unsup = np.sum(otda_unsup.cost_) - # test semi supervised mode - clf = ot.da.SinkhornLpl1Transport() - clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.cost_) + otda_semi = ot.da.SinkhornLpl1Transport() + otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(otda_semi.cost_) + # check that the cost matrix norms are indeed different assert n_unsup != n_semisup, "semisupervised mode not working" + # check that the coupling forbids mass transport between labeled source + # and labeled target samples + mass_semi = np.sum( + otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max]) + assert mass_semi == 0, "semisupervised mode not working" + def test_sinkhorn_l1l2_transport_class(): """test_sinkhorn_transport @@ -87,65 +95,75 @@ def test_sinkhorn_l1l2_transport_class(): Xs, ys = get_data_classif('3gauss', ns) Xt, yt = get_data_classif('3gauss2', nt) - clf = ot.da.SinkhornL1l2Transport() + otda = ot.da.SinkhornL1l2Transport() # test its computed - clf.fit(Xs=Xs, ys=ys, Xt=Xt) - assert hasattr(clf, "cost_") - assert hasattr(clf, "coupling_") - assert hasattr(clf, "log_") + otda.fit(Xs=Xs, ys=ys, Xt=Xt) + assert hasattr(otda, "cost_") + assert hasattr(otda, "coupling_") + assert hasattr(otda, "log_") # test dimensions of coupling - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) Xs_new, _ = get_data_classif('3gauss', ns + 1) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) # test inverse transform - transp_Xt = clf.inverse_transform(Xt=Xt) + transp_Xt = otda.inverse_transform(Xt=Xt) assert_equal(transp_Xt.shape, Xt.shape) Xt_new, _ = get_data_classif('3gauss2', nt + 1) - transp_Xt_new = clf.inverse_transform(Xt=Xt_new) + transp_Xt_new = otda.inverse_transform(Xt=Xt_new) # check that the oos method is working assert_equal(transp_Xt_new.shape, Xt_new.shape) # test fit_transform - transp_Xs = clf.fit_transform(Xs=Xs, ys=ys, Xt=Xt) + transp_Xs = otda.fit_transform(Xs=Xs, ys=ys, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) - # test semi supervised mode - clf = ot.da.SinkhornL1l2Transport() - clf.fit(Xs=Xs, ys=ys, Xt=Xt) - n_unsup = np.sum(clf.cost_) + # test unsupervised vs semi-supervised mode + otda_unsup = ot.da.SinkhornL1l2Transport() + otda_unsup.fit(Xs=Xs, ys=ys, Xt=Xt) + n_unsup = np.sum(otda_unsup.cost_) - # test semi supervised mode - clf = ot.da.SinkhornL1l2Transport() - clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.cost_) + otda_semi = ot.da.SinkhornL1l2Transport() + otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(otda_semi.cost_) + # check that the cost matrix norms are indeed different assert n_unsup != n_semisup, "semisupervised mode not working" + # check that the coupling forbids mass transport between labeled source + # and labeled target samples + mass_semi = np.sum( + otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max]) + mass_semi = otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max] + assert_allclose(mass_semi, np.zeros_like(mass_semi), + rtol=1e-9, atol=1e-9) + # check everything runs well with log=True - clf = ot.da.SinkhornL1l2Transport(log=True) - clf.fit(Xs=Xs, ys=ys, Xt=Xt) - assert len(clf.log_.keys()) != 0 + otda = ot.da.SinkhornL1l2Transport(log=True) + otda.fit(Xs=Xs, ys=ys, Xt=Xt) + assert len(otda.log_.keys()) != 0 def test_sinkhorn_transport_class(): @@ -158,65 +176,73 @@ def test_sinkhorn_transport_class(): Xs, ys = get_data_classif('3gauss', ns) Xt, yt = get_data_classif('3gauss2', nt) - clf = ot.da.SinkhornTransport() + otda = ot.da.SinkhornTransport() # test its computed - clf.fit(Xs=Xs, Xt=Xt) - assert hasattr(clf, "cost_") - assert hasattr(clf, "coupling_") - assert hasattr(clf, "log_") + otda.fit(Xs=Xs, Xt=Xt) + assert hasattr(otda, "cost_") + assert hasattr(otda, "coupling_") + assert hasattr(otda, "log_") # test dimensions of coupling - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) Xs_new, _ = get_data_classif('3gauss', ns + 1) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) # test inverse transform - transp_Xt = clf.inverse_transform(Xt=Xt) + transp_Xt = otda.inverse_transform(Xt=Xt) assert_equal(transp_Xt.shape, Xt.shape) Xt_new, _ = get_data_classif('3gauss2', nt + 1) - transp_Xt_new = clf.inverse_transform(Xt=Xt_new) + transp_Xt_new = otda.inverse_transform(Xt=Xt_new) # check that the oos method is working assert_equal(transp_Xt_new.shape, Xt_new.shape) # test fit_transform - transp_Xs = clf.fit_transform(Xs=Xs, Xt=Xt) + transp_Xs = otda.fit_transform(Xs=Xs, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) - # test semi supervised mode - clf = ot.da.SinkhornTransport() - clf.fit(Xs=Xs, Xt=Xt) - n_unsup = np.sum(clf.cost_) + # test unsupervised vs semi-supervised mode + otda_unsup = ot.da.SinkhornTransport() + otda_unsup.fit(Xs=Xs, Xt=Xt) + n_unsup = np.sum(otda_unsup.cost_) - # test semi supervised mode - clf = ot.da.SinkhornTransport() - clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.cost_) + otda_semi = ot.da.SinkhornTransport() + otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(otda_semi.cost_) + # check that the cost matrix norms are indeed different assert n_unsup != n_semisup, "semisupervised mode not working" + # check that the coupling forbids mass transport between labeled source + # and labeled target samples + mass_semi = np.sum( + otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max]) + assert mass_semi == 0, "semisupervised mode not working" + # check everything runs well with log=True - clf = ot.da.SinkhornTransport(log=True) - clf.fit(Xs=Xs, ys=ys, Xt=Xt) - assert len(clf.log_.keys()) != 0 + otda = ot.da.SinkhornTransport(log=True) + otda.fit(Xs=Xs, ys=ys, Xt=Xt) + assert len(otda.log_.keys()) != 0 def test_emd_transport_class(): @@ -229,60 +255,72 @@ def test_emd_transport_class(): Xs, ys = get_data_classif('3gauss', ns) Xt, yt = get_data_classif('3gauss2', nt) - clf = ot.da.EMDTransport() + otda = ot.da.EMDTransport() # test its computed - clf.fit(Xs=Xs, Xt=Xt) - assert hasattr(clf, "cost_") - assert hasattr(clf, "coupling_") + otda.fit(Xs=Xs, Xt=Xt) + assert hasattr(otda, "cost_") + assert hasattr(otda, "coupling_") # test dimensions of coupling - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) Xs_new, _ = get_data_classif('3gauss', ns + 1) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) # test inverse transform - transp_Xt = clf.inverse_transform(Xt=Xt) + transp_Xt = otda.inverse_transform(Xt=Xt) assert_equal(transp_Xt.shape, Xt.shape) Xt_new, _ = get_data_classif('3gauss2', nt + 1) - transp_Xt_new = clf.inverse_transform(Xt=Xt_new) + transp_Xt_new = otda.inverse_transform(Xt=Xt_new) # check that the oos method is working assert_equal(transp_Xt_new.shape, Xt_new.shape) # test fit_transform - transp_Xs = clf.fit_transform(Xs=Xs, Xt=Xt) + transp_Xs = otda.fit_transform(Xs=Xs, Xt=Xt) assert_equal(transp_Xs.shape, Xs.shape) - # test semi supervised mode - clf = ot.da.EMDTransport() - clf.fit(Xs=Xs, Xt=Xt) - n_unsup = np.sum(clf.cost_) + # test unsupervised vs semi-supervised mode + otda_unsup = ot.da.EMDTransport() + otda_unsup.fit(Xs=Xs, ys=ys, Xt=Xt) + n_unsup = np.sum(otda_unsup.cost_) - # test semi supervised mode - clf = ot.da.EMDTransport() - clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.cost_) + otda_semi = ot.da.EMDTransport() + otda_semi.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) + assert_equal(otda_semi.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(otda_semi.cost_) + # check that the cost matrix norms are indeed different assert n_unsup != n_semisup, "semisupervised mode not working" + # check that the coupling forbids mass transport between labeled source + # and labeled target samples + mass_semi = np.sum( + otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max]) + mass_semi = otda_semi.coupling_[otda_semi.cost_ == otda_semi.limit_max] + + # we need to use a small tolerance here, otherwise the test breaks + assert_allclose(mass_semi, np.zeros_like(mass_semi), + rtol=1e-2, atol=1e-2) + def test_mapping_transport_class(): """test_mapping_transport @@ -300,47 +338,51 @@ def test_mapping_transport_class(): ########################################################################## # check computation and dimensions if bias == False - clf = ot.da.MappingTransport(kernel="linear", bias=False) - clf.fit(Xs=Xs, Xt=Xt) - assert hasattr(clf, "coupling_") - assert hasattr(clf, "mapping_") - assert hasattr(clf, "log_") + otda = ot.da.MappingTransport(kernel="linear", bias=False) + otda.fit(Xs=Xs, Xt=Xt) + assert hasattr(otda, "coupling_") + assert hasattr(otda, "mapping_") + assert hasattr(otda, "log_") - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.mapping_.shape, ((Xs.shape[1], Xt.shape[1]))) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.mapping_.shape, ((Xs.shape[1], Xt.shape[1]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) # check computation and dimensions if bias == True - clf = ot.da.MappingTransport(kernel="linear", bias=True) - clf.fit(Xs=Xs, Xt=Xt) - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.mapping_.shape, ((Xs.shape[1] + 1, Xt.shape[1]))) + otda = ot.da.MappingTransport(kernel="linear", bias=True) + otda.fit(Xs=Xs, Xt=Xt) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.mapping_.shape, ((Xs.shape[1] + 1, Xt.shape[1]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) @@ -350,52 +392,56 @@ def test_mapping_transport_class(): ########################################################################## # check computation and dimensions if bias == False - clf = ot.da.MappingTransport(kernel="gaussian", bias=False) - clf.fit(Xs=Xs, Xt=Xt) + otda = ot.da.MappingTransport(kernel="gaussian", bias=False) + otda.fit(Xs=Xs, Xt=Xt) - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.mapping_.shape, ((Xs.shape[0], Xt.shape[1]))) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.mapping_.shape, ((Xs.shape[0], Xt.shape[1]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) # check computation and dimensions if bias == True - clf = ot.da.MappingTransport(kernel="gaussian", bias=True) - clf.fit(Xs=Xs, Xt=Xt) - assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.mapping_.shape, ((Xs.shape[0] + 1, Xt.shape[1]))) + otda = ot.da.MappingTransport(kernel="gaussian", bias=True) + otda.fit(Xs=Xs, Xt=Xt) + assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(otda.mapping_.shape, ((Xs.shape[0] + 1, Xt.shape[1]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose( + np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform - transp_Xs = clf.transform(Xs=Xs) + transp_Xs = otda.transform(Xs=Xs) assert_equal(transp_Xs.shape, Xs.shape) - transp_Xs_new = clf.transform(Xs_new) + transp_Xs_new = otda.transform(Xs_new) # check that the oos method is working assert_equal(transp_Xs_new.shape, Xs_new.shape) # check everything runs well with log=True - clf = ot.da.MappingTransport(kernel="gaussian", log=True) - clf.fit(Xs=Xs, Xt=Xt) - assert len(clf.log_.keys()) != 0 + otda = ot.da.MappingTransport(kernel="gaussian", log=True) + otda.fit(Xs=Xs, Xt=Xt) + assert len(otda.log_.keys()) != 0 def test_otda(): @@ -424,7 +470,8 @@ def test_otda(): da_entrop.interp() da_entrop.predict(xs) - np.testing.assert_allclose(a, np.sum(da_entrop.G, 1), rtol=1e-3, atol=1e-3) + np.testing.assert_allclose( + a, np.sum(da_entrop.G, 1), rtol=1e-3, atol=1e-3) np.testing.assert_allclose(b, np.sum(da_entrop.G, 0), rtol=1e-3, atol=1e-3) # non-convex Group lasso regularization @@ -458,12 +505,3 @@ def test_otda(): da_emd = ot.da.OTDA_mapping_kernel() # init class da_emd.fit(xs, xt, numItermax=10) # fit distributions da_emd.predict(xs) # interpolation of source samples - - -# if __name__ == "__main__": - -# test_sinkhorn_transport_class() -# test_emd_transport_class() -# test_sinkhorn_l1l2_transport_class() -# test_sinkhorn_lpl1_transport_class() -# test_mapping_transport_class() |