diff options
author | Rémi Flamary <remi.flamary@gmail.com> | 2017-09-15 13:57:01 +0200 |
---|---|---|
committer | Rémi Flamary <remi.flamary@gmail.com> | 2017-09-15 13:57:01 +0200 |
commit | dd3546baf9c59733b2109a971293eba48d2eaed3 (patch) | |
tree | dbc9c5dd126eecf537acbe7d205b91250f2bdc9b /examples | |
parent | bad3d95523d005a4fbf64dd009c716b9dd560fe3 (diff) |
add all files for doc
Diffstat (limited to 'examples')
-rw-r--r-- | examples/plot_gromov.py | 40 | ||||
-rwxr-xr-x | examples/plot_gromov_barycenter.py | 42 | ||||
-rw-r--r-- | examples/plot_otda_semi_supervised.py | 148 |
3 files changed, 190 insertions, 40 deletions
diff --git a/examples/plot_gromov.py b/examples/plot_gromov.py index 5132024..d3f724c 100644 --- a/examples/plot_gromov.py +++ b/examples/plot_gromov.py @@ -20,13 +20,14 @@ from mpl_toolkits.mplot3d import Axes3D # noqa import ot
-"""
-Sample two Gaussian distributions (2D and 3D)
-=============================================
-The Gromov-Wasserstein distance allows to compute distances with samples that
-do not belong to the same metric space. For demonstration purpose, we sample
-two Gaussian distributions in 2- and 3-dimensional spaces.
-"""
+##############################################################################
+# Sample two Gaussian distributions (2D and 3D)
+# ---------------------------------------------
+#
+# The Gromov-Wasserstein distance allows to compute distances with samples that
+# do not belong to the same metric space. For demonstration purpose, we sample
+# two Gaussian distributions in 2- and 3-dimensional spaces.
+
n_samples = 30 # nb samples
@@ -42,10 +43,11 @@ P = sp.linalg.sqrtm(cov_t) xt = np.random.randn(n_samples, 3).dot(P) + mu_t
-"""
-Plotting the distributions
-==========================
-"""
+##############################################################################
+# Plotting the distributions
+# --------------------------
+
+
fig = pl.figure()
ax1 = fig.add_subplot(121)
ax1.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
@@ -54,10 +56,10 @@ ax2.scatter(xt[:, 0], xt[:, 1], xt[:, 2], color='r') pl.show()
-"""
-Compute distance kernels, normalize them and then display
-=========================================================
-"""
+##############################################################################
+# Compute distance kernels, normalize them and then display
+# ---------------------------------------------------------
+
C1 = sp.spatial.distance.cdist(xs, xs)
C2 = sp.spatial.distance.cdist(xt, xt)
@@ -72,10 +74,10 @@ pl.subplot(122) pl.imshow(C2)
pl.show()
-"""
-Compute Gromov-Wasserstein plans and distance
-=============================================
-"""
+##############################################################################
+# Compute Gromov-Wasserstein plans and distance
+# ---------------------------------------------
+
p = ot.unif(n_samples)
q = ot.unif(n_samples)
diff --git a/examples/plot_gromov_barycenter.py b/examples/plot_gromov_barycenter.py index 93533c0..180b0cf 100755 --- a/examples/plot_gromov_barycenter.py +++ b/examples/plot_gromov_barycenter.py @@ -24,12 +24,12 @@ from sklearn.decomposition import PCA import ot
-"""
-Smacof MDS
-==========
-This function allows to find an embedding of points given a dissimilarity matrix
-that will be given by the output of the algorithm
-"""
+##############################################################################
+# Smacof MDS
+# ----------
+#
+# This function allows to find an embedding of points given a dissimilarity matrix
+# that will be given by the output of the algorithm
def smacof_mds(C, dim, max_iter=3000, eps=1e-9):
@@ -78,11 +78,11 @@ def smacof_mds(C, dim, max_iter=3000, eps=1e-9): return npos
-"""
-Data preparation
-================
-The four distributions are constructed from 4 simple images
-"""
+##############################################################################
+# Data preparation
+# ----------------
+#
+# The four distributions are constructed from 4 simple images
def im2mat(I):
@@ -110,12 +110,11 @@ for nb in range(4): xs = np.array([np.array(xs[0]), np.array(xs[1]),
np.array(xs[2]), np.array(xs[3])])
+##############################################################################
+# Barycenter computation
+# ----------------------
+
-"""
-Barycenter computation
-======================
-The four distributions are constructed from 4 simple images
-"""
ns = [len(xs[s]) for s in range(S)]
n_samples = 30
@@ -157,12 +156,13 @@ for i in range(2): ], p, lambdast[i], 'square_loss', 5e-4,
max_iter=100, tol=1e-3)
-"""
-Visualization
-=============
-"""
-"""The PCA helps in getting consistency between the rotations"""
+##############################################################################
+# Visualization
+# -------------
+#
+# The PCA helps in getting consistency between the rotations
+
clf = PCA(n_components=2)
npos = [0, 0, 0, 0]
diff --git a/examples/plot_otda_semi_supervised.py b/examples/plot_otda_semi_supervised.py new file mode 100644 index 0000000..7963aef --- /dev/null +++ b/examples/plot_otda_semi_supervised.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- +""" +============================================ +OTDA unsupervised vs semi-supervised setting +============================================ + +This example introduces a semi supervised domain adaptation in a 2D setting. +It explicits the problem of semi supervised domain adaptation and introduces +some optimal transport approaches to solve it. + +Quantities such as optimal couplings, greater coupling coefficients and +transported samples are represented in order to give a visual understanding +of what the transport methods are doing. +""" + +# Authors: Remi Flamary <remi.flamary@unice.fr> +# Stanislas Chambon <stan.chambon@gmail.com> +# +# License: MIT License + +import matplotlib.pylab as pl +import ot + + +############################################################################## +# Generate data +# ------------- + +n_samples_source = 150 +n_samples_target = 150 + +Xs, ys = ot.datasets.get_data_classif('3gauss', n_samples_source) +Xt, yt = ot.datasets.get_data_classif('3gauss2', n_samples_target) + + +############################################################################## +# Transport source samples onto target samples +# -------------------------------------------- + + +# unsupervised domain adaptation +ot_sinkhorn_un = ot.da.SinkhornTransport(reg_e=1e-1) +ot_sinkhorn_un.fit(Xs=Xs, Xt=Xt) +transp_Xs_sinkhorn_un = ot_sinkhorn_un.transform(Xs=Xs) + +# semi-supervised domain adaptation +ot_sinkhorn_semi = ot.da.SinkhornTransport(reg_e=1e-1) +ot_sinkhorn_semi.fit(Xs=Xs, Xt=Xt, ys=ys, yt=yt) +transp_Xs_sinkhorn_semi = ot_sinkhorn_semi.transform(Xs=Xs) + +# semi supervised DA uses available labaled target samples to modify the cost +# matrix involved in the OT problem. The cost of transporting a source sample +# of class A onto a target sample of class B != A is set to infinite, or a +# very large value + +# note that in the present case we consider that all the target samples are +# labeled. For daily applications, some target sample might not have labels, +# in this case the element of yt corresponding to these samples should be +# filled with -1. + +# Warning: we recall that -1 cannot be used as a class label + + +############################################################################## +# Fig 1 : plots source and target samples + matrix of pairwise distance +# --------------------------------------------------------------------- + +pl.figure(1, figsize=(10, 10)) +pl.subplot(2, 2, 1) +pl.scatter(Xs[:, 0], Xs[:, 1], c=ys, marker='+', label='Source samples') +pl.xticks([]) +pl.yticks([]) +pl.legend(loc=0) +pl.title('Source samples') + +pl.subplot(2, 2, 2) +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', label='Target samples') +pl.xticks([]) +pl.yticks([]) +pl.legend(loc=0) +pl.title('Target samples') + +pl.subplot(2, 2, 3) +pl.imshow(ot_sinkhorn_un.cost_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Cost matrix - unsupervised DA') + +pl.subplot(2, 2, 4) +pl.imshow(ot_sinkhorn_semi.cost_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Cost matrix - semisupervised DA') + +pl.tight_layout() + +# the optimal coupling in the semi-supervised DA case will exhibit " shape +# similar" to the cost matrix, (block diagonal matrix) + + +############################################################################## +# Fig 2 : plots optimal couplings for the different methods +# --------------------------------------------------------- + +pl.figure(2, figsize=(8, 4)) + +pl.subplot(1, 2, 1) +pl.imshow(ot_sinkhorn_un.coupling_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Optimal coupling\nUnsupervised DA') + +pl.subplot(1, 2, 2) +pl.imshow(ot_sinkhorn_semi.coupling_, interpolation='nearest') +pl.xticks([]) +pl.yticks([]) +pl.title('Optimal coupling\nSemi-supervised DA') + +pl.tight_layout() + + +############################################################################## +# Fig 3 : plot transported samples +# -------------------------------- + +# display transported samples +pl.figure(4, figsize=(8, 4)) +pl.subplot(1, 2, 1) +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', + label='Target samples', alpha=0.5) +pl.scatter(transp_Xs_sinkhorn_un[:, 0], transp_Xs_sinkhorn_un[:, 1], c=ys, + marker='+', label='Transp samples', s=30) +pl.title('Transported samples\nEmdTransport') +pl.legend(loc=0) +pl.xticks([]) +pl.yticks([]) + +pl.subplot(1, 2, 2) +pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', + label='Target samples', alpha=0.5) +pl.scatter(transp_Xs_sinkhorn_semi[:, 0], transp_Xs_sinkhorn_semi[:, 1], c=ys, + marker='+', label='Transp samples', s=30) +pl.title('Transported samples\nSinkhornTransport') +pl.xticks([]) +pl.yticks([]) + +pl.tight_layout() +pl.show() |