diff options
author | Slasnista <stan.chambon@gmail.com> | 2017-08-23 13:50:24 +0200 |
---|---|---|
committer | Slasnista <stan.chambon@gmail.com> | 2017-08-23 13:50:24 +0200 |
commit | 791a4a6f215033a75d5f56cd16fe2412301bec14 (patch) | |
tree | 9e1d54a85d6cae07ab745363114b8e662dc7dce9 | |
parent | 8149e059be7f715834d11b365855f2684bd3d6f5 (diff) |
out of samples transform and inverse transform by batch
-rw-r--r-- | ot/da.py | 89 | ||||
-rw-r--r-- | test/test_da.py | 66 |
2 files changed, 91 insertions, 64 deletions
@@ -1147,7 +1147,7 @@ class BaseTransport(BaseEstimator): return self.fit(Xs, ys, Xt, yt).transform(Xs, ys, Xt, yt) - def transform(self, Xs=None, ys=None, Xt=None, yt=None): + def transform(self, Xs=None, ys=None, Xt=None, yt=None, batch_size=128): """Transports source samples Xs onto target ones Xt Parameters @@ -1160,6 +1160,8 @@ class BaseTransport(BaseEstimator): The training input samples. yt : array-like, shape (n_labeled_target_samples,) The class labels + batch_size : int, optional (default=128) + The batch size for out of sample inverse transform Returns ------- @@ -1178,34 +1180,48 @@ class BaseTransport(BaseEstimator): transp_Xs = np.dot(transp, self.Xt) else: # perform out of sample mapping + indices = np.arange(Xs.shape[0]) + batch_ind = [ + indices[i:i + batch_size] + for i in range(0, len(indices), batch_size)] - # get the nearest neighbor in the source domain - D0 = dist(Xs, self.Xs) - idx = np.argmin(D0, axis=1) + transp_Xs = [] + for bi in batch_ind: - # transport the source samples - transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None] - transp[~ np.isfinite(transp)] = 0 - transp_Xs_ = np.dot(transp, self.Xt) + # get the nearest neighbor in the source domain + D0 = dist(Xs[bi], self.Xs) + idx = np.argmin(D0, axis=1) + + # transport the source samples + transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None] + transp[~ np.isfinite(transp)] = 0 + transp_Xs_ = np.dot(transp, self.Xt) - # define the transported points - transp_Xs = transp_Xs_[idx, :] + Xs - self.Xs[idx, :] + # define the transported points + transp_Xs_ = transp_Xs_[idx, :] + Xs[bi] - self.Xs[idx, :] + + transp_Xs.append(transp_Xs_) + + transp_Xs = np.concatenate(transp_Xs, axis=0) return transp_Xs - def inverse_transform(self, Xs=None, ys=None, Xt=None, yt=None): + def inverse_transform(self, Xs=None, ys=None, Xt=None, yt=None, + batch_size=128): """Transports target samples Xt onto target samples Xs Parameters ---------- Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels + batch_size : int, optional (default=128) + The batch size for out of sample inverse transform Returns ------- @@ -1224,17 +1240,28 @@ class BaseTransport(BaseEstimator): transp_Xt = np.dot(transp_, self.Xs) else: # perform out of sample mapping + indices = np.arange(Xt.shape[0]) + batch_ind = [ + indices[i:i + batch_size] + for i in range(0, len(indices), batch_size)] - D0 = dist(Xt, self.Xt) - idx = np.argmin(D0, axis=1) + transp_Xt = [] + for bi in batch_ind: - # transport the target samples - transp_ = self.coupling_.T / np.sum(self.coupling_, 0)[:, None] - transp_[~ np.isfinite(transp_)] = 0 - transp_Xt_ = np.dot(transp_, self.Xs) + D0 = dist(Xt[bi], self.Xt) + idx = np.argmin(D0, axis=1) + + # transport the target samples + transp_ = self.coupling_.T / np.sum(self.coupling_, 0)[:, None] + transp_[~ np.isfinite(transp_)] = 0 + transp_Xt_ = np.dot(transp_, self.Xs) + + # define the transported points + transp_Xt_ = transp_Xt_[idx, :] + Xt[bi] - self.Xt[idx, :] - # define the transported points - transp_Xt = transp_Xt_[idx, :] + Xt - self.Xt[idx, :] + transp_Xt.append(transp_Xt_) + + transp_Xt = np.concatenate(transp_Xt, axis=0) return transp_Xt @@ -1306,11 +1333,11 @@ class SinkhornTransport(BaseTransport): ---------- Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels Returns @@ -1381,11 +1408,11 @@ class EMDTransport(BaseTransport): ---------- Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels Returns @@ -1480,11 +1507,11 @@ class SinkhornLpl1Transport(BaseTransport): ---------- Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels Returns @@ -1581,11 +1608,11 @@ class SinkhornL1l2Transport(BaseTransport): ---------- Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels Returns @@ -1675,11 +1702,11 @@ class MappingTransport(BaseEstimator): ---------- Xs : array-like, shape (n_source_samples, n_features) The training input samples. - ys : array-like, shape = (n_source_samples,) + ys : array-like, shape (n_source_samples,) The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape = (n_labeled_target_samples,) + yt : array-like, shape (n_labeled_target_samples,) The class labels Returns diff --git a/test/test_da.py b/test/test_da.py index 93f7e83..196f4c4 100644 --- a/test/test_da.py +++ b/test/test_da.py @@ -28,14 +28,14 @@ def test_sinkhorn_lpl1_transport_class(): clf.fit(Xs=Xs, ys=ys, Xt=Xt) # test dimensions of coupling - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = clf.transform(Xs=Xs) @@ -64,13 +64,13 @@ def test_sinkhorn_lpl1_transport_class(): # test semi supervised mode clf = ot.da.SinkhornLpl1Transport() clf.fit(Xs=Xs, ys=ys, Xt=Xt) - n_unsup = np.sum(clf.Cost) + n_unsup = np.sum(clf.cost_) # test semi supervised mode clf = ot.da.SinkhornLpl1Transport() clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.Cost) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf.cost_) assert n_unsup != n_semisup, "semisupervised mode not working" @@ -91,14 +91,14 @@ def test_sinkhorn_l1l2_transport_class(): clf.fit(Xs=Xs, ys=ys, Xt=Xt) # test dimensions of coupling - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = clf.transform(Xs=Xs) @@ -127,13 +127,13 @@ def test_sinkhorn_l1l2_transport_class(): # test semi supervised mode clf = ot.da.SinkhornL1l2Transport() clf.fit(Xs=Xs, ys=ys, Xt=Xt) - n_unsup = np.sum(clf.Cost) + n_unsup = np.sum(clf.cost_) # test semi supervised mode clf = ot.da.SinkhornL1l2Transport() clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.Cost) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf.cost_) assert n_unsup != n_semisup, "semisupervised mode not working" @@ -154,14 +154,14 @@ def test_sinkhorn_transport_class(): clf.fit(Xs=Xs, Xt=Xt) # test dimensions of coupling - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = clf.transform(Xs=Xs) @@ -190,13 +190,13 @@ def test_sinkhorn_transport_class(): # test semi supervised mode clf = ot.da.SinkhornTransport() clf.fit(Xs=Xs, Xt=Xt) - n_unsup = np.sum(clf.Cost) + n_unsup = np.sum(clf.cost_) # test semi supervised mode clf = ot.da.SinkhornTransport() clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.Cost) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf.cost_) assert n_unsup != n_semisup, "semisupervised mode not working" @@ -217,14 +217,14 @@ def test_emd_transport_class(): clf.fit(Xs=Xs, Xt=Xt) # test dimensions of coupling - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - assert_equal(clf.Coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + assert_equal(clf.coupling_.shape, ((Xs.shape[0], Xt.shape[0]))) # test margin constraints mu_s = unif(ns) mu_t = unif(nt) - assert_allclose(np.sum(clf.Coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) - assert_allclose(np.sum(clf.Coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3) + assert_allclose(np.sum(clf.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3) # test transform transp_Xs = clf.transform(Xs=Xs) @@ -253,13 +253,13 @@ def test_emd_transport_class(): # test semi supervised mode clf = ot.da.EMDTransport() clf.fit(Xs=Xs, Xt=Xt) - n_unsup = np.sum(clf.Cost) + n_unsup = np.sum(clf.cost_) # test semi supervised mode clf = ot.da.EMDTransport() clf.fit(Xs=Xs, ys=ys, Xt=Xt, yt=yt) - assert_equal(clf.Cost.shape, ((Xs.shape[0], Xt.shape[0]))) - n_semisup = np.sum(clf.Cost) + assert_equal(clf.cost_.shape, ((Xs.shape[0], Xt.shape[0]))) + n_semisup = np.sum(clf.cost_) assert n_unsup != n_semisup, "semisupervised mode not working" @@ -326,9 +326,9 @@ def test_otda(): da_emd.predict(xs) # interpolation of source samples -if __name__ == "__main__": +# if __name__ == "__main__": - test_sinkhorn_transport_class() - test_emd_transport_class() - test_sinkhorn_l1l2_transport_class() - test_sinkhorn_lpl1_transport_class() +# test_sinkhorn_transport_class() +# test_emd_transport_class() +# test_sinkhorn_l1l2_transport_class() +# test_sinkhorn_lpl1_transport_class() |