From 30bfc5ce5acd98991b3d01e313d0c14f0e600b14 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Mon, 4 Sep 2017 08:46:36 +0200 Subject: correction semi supervised case --- ot/da.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'ot') diff --git a/ot/da.py b/ot/da.py index 564c7b7..e694668 100644 --- a/ot/da.py +++ b/ot/da.py @@ -989,7 +989,7 @@ class BaseTransport(BaseEstimator): # assumes labeled source samples occupy the first rows # and labeled target samples occupy the first columns - classes = np.unique(ys) + classes = [c for c in np.unique(ys) if c != -1] for c in classes: idx_s = np.where((ys != c) & (ys != -1)) idx_t = np.where(yt == c) -- cgit v1.2.3 From 363c5f92a4865527320edcff97036e62a7ca28c9 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Mon, 4 Sep 2017 09:12:32 +0200 Subject: doc string + example --- examples/da/plot_otda_semi_supervised.py | 142 +++++++++++++++++++++++++++++++ ot/da.py | 72 ++++++++++++---- 2 files changed, 196 insertions(+), 18 deletions(-) create mode 100644 examples/da/plot_otda_semi_supervised.py (limited to 'ot') diff --git a/examples/da/plot_otda_semi_supervised.py b/examples/da/plot_otda_semi_supervised.py new file mode 100644 index 0000000..6e6296b --- /dev/null +++ b/examples/da/plot_otda_semi_supervised.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +""" +============================================ +OTDA unsupervised vs semi-supervised setting +============================================ + +This example introduces a semi supervised domain adaptation in a 2D setting. +It explicits the problem of semi supervised domain adaptation and introduces +some optimal transport approaches to solve it. + +Quantities such as optimal couplings, greater coupling coefficients and +transported samples are represented in order to give a visual understanding +of what the transport methods are doing. +""" + +# Authors: Remi Flamary +# Stanislas Chambon +# +# License: MIT License + +import matplotlib.pylab as pl +import ot + + +############################################################################## +# generate data +############################################################################## + +n_samples_source = 150 +n_samples_target = 150 + +Xs, ys = ot.datasets.get_data_classif('3gauss', n_samples_source) +Xt, yt = ot.datasets.get_data_classif('3gauss2', n_samples_target) + +# Cost matrix +M = ot.dist(Xs, Xt, metric='sqeuclidean') + + +############################################################################## +# Transport source samples onto target samples +############################################################################## + +# unsupervised domain adaptation +ot_sinkhorn_un = ot.da.SinkhornTransport(reg_e=1e-1) +ot_sinkhorn_un.fit(Xs=Xs, Xt=Xt) +transp_Xs_sinkhorn_un = ot_sinkhorn_un.transform(Xs=Xs) + +# semi-supervised domain adaptation +ot_sinkhorn_semi = ot.da.SinkhornTransport(reg_e=1e-1) +ot_sinkhorn_semi.fit(Xs=Xs, Xt=Xt, ys=ys, yt=yt) +transp_Xs_sinkhorn_semi = ot_sinkhorn_semi.transform(Xs=Xs) + +# semi supervised DA uses available labaled target samples to modify the cost +# matrix involved in the OT problem. The cost of transporting a source sample +# of class A onto a target sample of class B != A is set to infinite, or a +# very large value + + +############################################################################## +# Fig 1 : plots source and target samples + matrix of pairwise distance +############################################################################## + +pl.figure(1, figsize=(10, 10)) +pl.subplot(2, 2, 1) +pl.scatter(Xs[:, 0], Xs[:, 1], c=ys, marker='+', label='Source samples') +pl.xticks([]) +pl.yticks([]) +pl.legend(loc=0) +pl.title('Source samples') + +pl.subplot(2, 2, 2) +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', label='Target samples') +pl.xticks([]) +pl.yticks([]) +pl.legend(loc=0) +pl.title('Target samples') + +pl.subplot(2, 2, 3) +pl.imshow(ot_sinkhorn_un.cost_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Cost matrix - unsupervised DA') + +pl.subplot(2, 2, 4) +pl.imshow(ot_sinkhorn_semi.cost_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Cost matrix - semisupervised DA') + +pl.tight_layout() + +# the optimal coupling in the semi-supervised DA case will exhibit " shape +# similar" to the cost matrix, (block diagonal matrix) + +############################################################################## +# Fig 2 : plots optimal couplings for the different methods +############################################################################## + +pl.figure(2, figsize=(8, 4)) + +pl.subplot(1, 2, 1) +pl.imshow(ot_sinkhorn_un.coupling_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Optimal coupling\nUnsupervised DA') + +pl.subplot(1, 2, 2) +pl.imshow(ot_sinkhorn_semi.coupling_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Optimal coupling\nSemi-supervised DA') + +pl.tight_layout() + + +############################################################################## +# Fig 3 : plot transported samples +############################################################################## + +# display transported samples +pl.figure(4, figsize=(8, 4)) +pl.subplot(1, 2, 1) +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', + label='Target samples', alpha=0.5) +pl.scatter(transp_Xs_sinkhorn_un[:, 0], transp_Xs_sinkhorn_un[:, 1], c=ys, + marker='+', label='Transp samples', s=30) +pl.title('Transported samples\nEmdTransport') +pl.legend(loc=0) +pl.xticks([]) +pl.yticks([]) + +pl.subplot(1, 2, 2) +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', + label='Target samples', alpha=0.5) +pl.scatter(transp_Xs_sinkhorn_semi[:, 0], transp_Xs_sinkhorn_semi[:, 1], c=ys, + marker='+', label='Transp samples', s=30) +pl.title('Transported samples\nSinkhornTransport') +pl.xticks([]) +pl.yticks([]) + +pl.tight_layout() +pl.show() diff --git a/ot/da.py b/ot/da.py index e694668..1d3d0ba 100644 --- a/ot/da.py +++ b/ot/da.py @@ -966,8 +966,12 @@ class BaseTransport(BaseEstimator): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label Returns ------- @@ -1023,8 +1027,12 @@ class BaseTransport(BaseEstimator): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label Returns ------- @@ -1045,8 +1053,12 @@ class BaseTransport(BaseEstimator): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label batch_size : int, optional (default=128) The batch size for out of sample inverse transform @@ -1110,8 +1122,12 @@ class BaseTransport(BaseEstimator): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label batch_size : int, optional (default=128) The batch size for out of sample inverse transform @@ -1241,8 +1257,12 @@ class SinkhornTransport(BaseTransport): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label Returns ------- @@ -1333,8 +1353,12 @@ class EMDTransport(BaseTransport): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label Returns ------- @@ -1434,8 +1458,12 @@ class SinkhornLpl1Transport(BaseTransport): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label Returns ------- @@ -1545,8 +1573,12 @@ class SinkhornL1l2Transport(BaseTransport): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label Returns ------- @@ -1662,8 +1694,12 @@ class MappingTransport(BaseEstimator): The class labels Xt : array-like, shape (n_target_samples, n_features) The training input samples. - yt : array-like, shape (n_labeled_target_samples,) - The class labels + yt : array-like, shape (n_target_samples,) + The class labels. If some target samples are unlabeled, fill the + yt's elements with -1. + + Warning: Note that, due to this convention -1 cannot be used as a + class label Returns ------- -- cgit v1.2.3