From 363c5f92a4865527320edcff97036e62a7ca28c9 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Mon, 4 Sep 2017 09:12:32 +0200 Subject: doc string + example --- examples/da/plot_otda_semi_supervised.py | 142 +++++++++++++++++++++++++++++++ 1 file changed, 142 insertions(+) create mode 100644 examples/da/plot_otda_semi_supervised.py (limited to 'examples/da/plot_otda_semi_supervised.py') diff --git a/examples/da/plot_otda_semi_supervised.py b/examples/da/plot_otda_semi_supervised.py new file mode 100644 index 0000000..6e6296b --- /dev/null +++ b/examples/da/plot_otda_semi_supervised.py @@ -0,0 +1,142 @@ +# -*- coding: utf-8 -*- +""" +============================================ +OTDA unsupervised vs semi-supervised setting +============================================ + +This example introduces a semi supervised domain adaptation in a 2D setting. +It explicits the problem of semi supervised domain adaptation and introduces +some optimal transport approaches to solve it. + +Quantities such as optimal couplings, greater coupling coefficients and +transported samples are represented in order to give a visual understanding +of what the transport methods are doing. +""" + +# Authors: Remi Flamary +# Stanislas Chambon +# +# License: MIT License + +import matplotlib.pylab as pl +import ot + + +############################################################################## +# generate data +############################################################################## + +n_samples_source = 150 +n_samples_target = 150 + +Xs, ys = ot.datasets.get_data_classif('3gauss', n_samples_source) +Xt, yt = ot.datasets.get_data_classif('3gauss2', n_samples_target) + +# Cost matrix +M = ot.dist(Xs, Xt, metric='sqeuclidean') + + +############################################################################## +# Transport source samples onto target samples +############################################################################## + +# unsupervised domain adaptation +ot_sinkhorn_un = ot.da.SinkhornTransport(reg_e=1e-1) +ot_sinkhorn_un.fit(Xs=Xs, Xt=Xt) +transp_Xs_sinkhorn_un = ot_sinkhorn_un.transform(Xs=Xs) + +# semi-supervised domain adaptation +ot_sinkhorn_semi = ot.da.SinkhornTransport(reg_e=1e-1) +ot_sinkhorn_semi.fit(Xs=Xs, Xt=Xt, ys=ys, yt=yt) +transp_Xs_sinkhorn_semi = ot_sinkhorn_semi.transform(Xs=Xs) + +# semi supervised DA uses available labaled target samples to modify the cost +# matrix involved in the OT problem. The cost of transporting a source sample +# of class A onto a target sample of class B != A is set to infinite, or a +# very large value + + +############################################################################## +# Fig 1 : plots source and target samples + matrix of pairwise distance +############################################################################## + +pl.figure(1, figsize=(10, 10)) +pl.subplot(2, 2, 1) +pl.scatter(Xs[:, 0], Xs[:, 1], c=ys, marker='+', label='Source samples') +pl.xticks([]) +pl.yticks([]) +pl.legend(loc=0) +pl.title('Source samples') + +pl.subplot(2, 2, 2) +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', label='Target samples') +pl.xticks([]) +pl.yticks([]) +pl.legend(loc=0) +pl.title('Target samples') + +pl.subplot(2, 2, 3) +pl.imshow(ot_sinkhorn_un.cost_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Cost matrix - unsupervised DA') + +pl.subplot(2, 2, 4) +pl.imshow(ot_sinkhorn_semi.cost_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Cost matrix - semisupervised DA') + +pl.tight_layout() + +# the optimal coupling in the semi-supervised DA case will exhibit " shape +# similar" to the cost matrix, (block diagonal matrix) + +############################################################################## +# Fig 2 : plots optimal couplings for the different methods +############################################################################## + +pl.figure(2, figsize=(8, 4)) + +pl.subplot(1, 2, 1) +pl.imshow(ot_sinkhorn_un.coupling_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Optimal coupling\nUnsupervised DA') + +pl.subplot(1, 2, 2) +pl.imshow(ot_sinkhorn_semi.coupling_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Optimal coupling\nSemi-supervised DA') + +pl.tight_layout() + + +############################################################################## +# Fig 3 : plot transported samples +############################################################################## + +# display transported samples +pl.figure(4, figsize=(8, 4)) +pl.subplot(1, 2, 1) +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', + label='Target samples', alpha=0.5) +pl.scatter(transp_Xs_sinkhorn_un[:, 0], transp_Xs_sinkhorn_un[:, 1], c=ys, + marker='+', label='Transp samples', s=30) +pl.title('Transported samples\nEmdTransport') +pl.legend(loc=0) +pl.xticks([]) +pl.yticks([]) + +pl.subplot(1, 2, 2) +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', + label='Target samples', alpha=0.5) +pl.scatter(transp_Xs_sinkhorn_semi[:, 0], transp_Xs_sinkhorn_semi[:, 1], c=ys, + marker='+', label='Transp samples', s=30) +pl.title('Transported samples\nSinkhornTransport') +pl.xticks([]) +pl.yticks([]) + +pl.tight_layout() +pl.show() -- cgit v1.2.3 From 669a6bee4200f6a2f1f6bbf597712684ff7272a8 Mon Sep 17 00:00:00 2001 From: Slasnista Date: Mon, 4 Sep 2017 09:17:59 +0200 Subject: commenting the example --- examples/da/plot_otda_semi_supervised.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'examples/da/plot_otda_semi_supervised.py') diff --git a/examples/da/plot_otda_semi_supervised.py b/examples/da/plot_otda_semi_supervised.py index 6e6296b..8095c4d 100644 --- a/examples/da/plot_otda_semi_supervised.py +++ b/examples/da/plot_otda_semi_supervised.py @@ -32,9 +32,6 @@ n_samples_target = 150 Xs, ys = ot.datasets.get_data_classif('3gauss', n_samples_source) Xt, yt = ot.datasets.get_data_classif('3gauss2', n_samples_target) -# Cost matrix -M = ot.dist(Xs, Xt, metric='sqeuclidean') - ############################################################################## # Transport source samples onto target samples @@ -55,6 +52,13 @@ transp_Xs_sinkhorn_semi = ot_sinkhorn_semi.transform(Xs=Xs) # of class A onto a target sample of class B != A is set to infinite, or a # very large value +# note that in the present case we consider that all the target samples are +# labeled. For daily applications, some target sample might not have labels, +# in this case the element of yt corresponding to these samples should be +# filled with -1. + +# Warning: we recall that -1 cannot be used as a class label + ############################################################################## # Fig 1 : plots source and target samples + matrix of pairwise distance @@ -92,6 +96,7 @@ pl.tight_layout() # the optimal coupling in the semi-supervised DA case will exhibit " shape # similar" to the cost matrix, (block diagonal matrix) + ############################################################################## # Fig 2 : plots optimal couplings for the different methods ############################################################################## -- cgit v1.2.3