add all files for doc

author: Rémi Flamary <remi.flamary@gmail.com> 2017-09-15 13:57:01 +0200
committer: Rémi Flamary <remi.flamary@gmail.com> 2017-09-15 13:57:01 +0200
commit: dd3546baf9c59733b2109a971293eba48d2eaed3 (patch)
tree: dbc9c5dd126eecf537acbe7d205b91250f2bdc9b /examples
parent: bad3d95523d005a4fbf64dd009c716b9dd560fe3 (diff)
3 files changed, 190 insertions, 40 deletions
diff --git a/examples/plot_gromov.py b/examples/plot_gromov.py
index 5132024..d3f724c 100644
--- a/examples/plot_gromov.py
+++ b/examples/plot_gromov.py
@@ -20,13 +20,14 @@ from mpl_toolkits.mplot3d import Axes3D  # noqa
 import ot
 
 
-"""
-Sample two Gaussian distributions (2D and 3D)
-=============================================
-The Gromov-Wasserstein distance allows to compute distances with samples that
-do not belong to the same metric space. For demonstration purpose, we sample
-two Gaussian distributions in 2- and 3-dimensional spaces.
-"""
+##############################################################################
+# Sample two Gaussian distributions (2D and 3D)
+# ---------------------------------------------
+#
+# The Gromov-Wasserstein distance allows to compute distances with samples that
+# do not belong to the same metric space. For demonstration purpose, we sample
+# two Gaussian distributions in 2- and 3-dimensional spaces.
+
 
 n_samples = 30  # nb samples
 
@@ -42,10 +43,11 @@ P = sp.linalg.sqrtm(cov_t)
 xt = np.random.randn(n_samples, 3).dot(P) + mu_t
 
 
-"""
-Plotting the distributions
-==========================
-"""
+##############################################################################
+# Plotting the distributions
+# --------------------------
+
+
 fig = pl.figure()
 ax1 = fig.add_subplot(121)
 ax1.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
@@ -54,10 +56,10 @@ ax2.scatter(xt[:, 0], xt[:, 1], xt[:, 2], color='r')
 pl.show()
 
 
-"""
-Compute distance kernels, normalize them and then display
-=========================================================
-"""
+##############################################################################
+# Compute distance kernels, normalize them and then display
+# ---------------------------------------------------------
+
 
 C1 = sp.spatial.distance.cdist(xs, xs)
 C2 = sp.spatial.distance.cdist(xt, xt)
@@ -72,10 +74,10 @@ pl.subplot(122)
 pl.imshow(C2)
 pl.show()
 
-"""
-Compute Gromov-Wasserstein plans and distance
-=============================================
-"""
+##############################################################################
+# Compute Gromov-Wasserstein plans and distance
+# ---------------------------------------------
+
 
 p = ot.unif(n_samples)
 q = ot.unif(n_samples)
diff --git a/examples/plot_gromov_barycenter.py b/examples/plot_gromov_barycenter.py
index 93533c0..180b0cf 100755
--- a/examples/plot_gromov_barycenter.py
+++ b/examples/plot_gromov_barycenter.py
@@ -24,12 +24,12 @@ from sklearn.decomposition import PCA
 
 import ot
 
-"""
-Smacof MDS
-==========
-This function allows to find an embedding of points given a dissimilarity matrix
-that will be given by the output of the algorithm
-"""
+##############################################################################
+# Smacof MDS
+# ----------
+#
+# This function allows to find an embedding of points given a dissimilarity matrix
+# that will be given by the output of the algorithm
 
 
 def smacof_mds(C, dim, max_iter=3000, eps=1e-9):
@@ -78,11 +78,11 @@ def smacof_mds(C, dim, max_iter=3000, eps=1e-9):
     return npos
 
 
-"""
-Data preparation
-================
-The four distributions are constructed from 4 simple images
-"""
+##############################################################################
+# Data preparation
+# ----------------
+#
+# The four distributions are constructed from 4 simple images
 
 
 def im2mat(I):
@@ -110,12 +110,11 @@ for nb in range(4):
 xs = np.array([np.array(xs[0]), np.array(xs[1]),
                np.array(xs[2]), np.array(xs[3])])
 
+##############################################################################
+# Barycenter computation
+# ----------------------
+
 
-"""
-Barycenter computation
-======================
-The four distributions are constructed from 4 simple images
-"""
 ns = [len(xs[s]) for s in range(S)]
 n_samples = 30
 
@@ -157,12 +156,13 @@ for i in range(2):
                                             ], p, lambdast[i], 'square_loss', 5e-4,
                                            max_iter=100, tol=1e-3)
 
-"""
-Visualization
-=============
-"""
 
-"""The PCA helps in getting consistency between the rotations"""
+##############################################################################
+# Visualization
+# -------------
+#
+# The PCA helps in getting consistency between the rotations
+
 
 clf = PCA(n_components=2)
 npos = [0, 0, 0, 0]
diff --git a/examples/plot_otda_semi_supervised.py b/examples/plot_otda_semi_supervised.py
new file mode 100644
index 0000000..7963aef
--- /dev/null
+++ b/examples/plot_otda_semi_supervised.py
@@ -0,0 +1,148 @@
+# -*- coding: utf-8 -*-
+"""
+============================================
+OTDA unsupervised vs semi-supervised setting
+============================================
+
+This example introduces a semi supervised domain adaptation in a 2D setting.
+It explicits the problem of semi supervised domain adaptation and introduces
+some optimal transport approaches to solve it.
+
+Quantities such as optimal couplings, greater coupling coefficients and
+transported samples are represented in order to give a visual understanding
+of what the transport methods are doing.
+"""
+
+# Authors: Remi Flamary <remi.flamary@unice.fr>
+#          Stanislas Chambon <stan.chambon@gmail.com>
+#
+# License: MIT License
+
+import matplotlib.pylab as pl
+import ot
+
+
+##############################################################################
+# Generate data
+# -------------
+
+n_samples_source = 150
+n_samples_target = 150
+
+Xs, ys = ot.datasets.get_data_classif('3gauss', n_samples_source)
+Xt, yt = ot.datasets.get_data_classif('3gauss2', n_samples_target)
+
+
+##############################################################################
+# Transport source samples onto target samples
+# --------------------------------------------
+
+
+# unsupervised domain adaptation
+ot_sinkhorn_un = ot.da.SinkhornTransport(reg_e=1e-1)
+ot_sinkhorn_un.fit(Xs=Xs, Xt=Xt)
+transp_Xs_sinkhorn_un = ot_sinkhorn_un.transform(Xs=Xs)
+
+# semi-supervised domain adaptation
+ot_sinkhorn_semi = ot.da.SinkhornTransport(reg_e=1e-1)
+ot_sinkhorn_semi.fit(Xs=Xs, Xt=Xt, ys=ys, yt=yt)
+transp_Xs_sinkhorn_semi = ot_sinkhorn_semi.transform(Xs=Xs)
+
+# semi supervised DA uses available labaled target samples to modify the cost
+# matrix involved in the OT problem. The cost of transporting a source sample
+# of class A onto a target sample of class B != A is set to infinite, or a
+# very large value
+
+# note that in the present case we consider that all the target samples are
+# labeled. For daily applications, some target sample might not have labels,
+# in this case the element of yt corresponding to these samples should be
+# filled with -1.
+
+# Warning: we recall that -1 cannot be used as a class label
+
+
+##############################################################################
+# Fig 1 : plots source and target samples + matrix of pairwise distance
+# ---------------------------------------------------------------------
+
+pl.figure(1, figsize=(10, 10))
+pl.subplot(2, 2, 1)
+pl.scatter(Xs[:, 0], Xs[:, 1], c=ys, marker='+', label='Source samples')
+pl.xticks([])
+pl.yticks([])
+pl.legend(loc=0)
+pl.title('Source  samples')
+
+pl.subplot(2, 2, 2)
+pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', label='Target samples')
+pl.xticks([])
+pl.yticks([])
+pl.legend(loc=0)
+pl.title('Target samples')
+
+pl.subplot(2, 2, 3)
+pl.imshow(ot_sinkhorn_un.cost_, interpolation='nearest')
+pl.xticks([])
+pl.yticks([])
+pl.title('Cost matrix - unsupervised DA')
+
+pl.subplot(2, 2, 4)
+pl.imshow(ot_sinkhorn_semi.cost_, interpolation='nearest')
+pl.xticks([])
+pl.yticks([])
+pl.title('Cost matrix - semisupervised DA')
+
+pl.tight_layout()
+
+# the optimal coupling in the semi-supervised DA case will exhibit " shape
+# similar" to the cost matrix, (block diagonal matrix)
+
+
+##############################################################################
+# Fig 2 : plots optimal couplings for the different methods
+# ---------------------------------------------------------
+
+pl.figure(2, figsize=(8, 4))
+
+pl.subplot(1, 2, 1)
+pl.imshow(ot_sinkhorn_un.coupling_, interpolation='nearest')
+pl.xticks([])
+pl.yticks([])
+pl.title('Optimal coupling\nUnsupervised DA')
+
+pl.subplot(1, 2, 2)
+pl.imshow(ot_sinkhorn_semi.coupling_, interpolation='nearest')
+pl.xticks([])
+pl.yticks([])
+pl.title('Optimal coupling\nSemi-supervised DA')
+
+pl.tight_layout()
+
+
+##############################################################################
+# Fig 3 : plot transported samples
+# --------------------------------
+
+# display transported samples
+pl.figure(4, figsize=(8, 4))
+pl.subplot(1, 2, 1)
+pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o',
+           label='Target samples', alpha=0.5)
+pl.scatter(transp_Xs_sinkhorn_un[:, 0], transp_Xs_sinkhorn_un[:, 1], c=ys,
+           marker='+', label='Transp samples', s=30)
+pl.title('Transported samples\nEmdTransport')
+pl.legend(loc=0)
+pl.xticks([])
+pl.yticks([])
+
+pl.subplot(1, 2, 2)
+pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o',
+           label='Target samples', alpha=0.5)
+pl.scatter(transp_Xs_sinkhorn_semi[:, 0], transp_Xs_sinkhorn_semi[:, 1], c=ys,
+           marker='+', label='Transp samples', s=30)
+pl.title('Transported samples\nSinkhornTransport')
+pl.xticks([])
+pl.yticks([])
+
+pl.tight_layout()
+pl.show()
author	Rémi Flamary <remi.flamary@gmail.com>	2017-09-15 13:57:01 +0200
committer	Rémi Flamary <remi.flamary@gmail.com>	2017-09-15 13:57:01 +0200
commit	dd3546baf9c59733b2109a971293eba48d2eaed3 (patch)
tree	dbc9c5dd126eecf537acbe7d205b91250f2bdc9b /examples
parent	bad3d95523d005a4fbf64dd009c716b9dd560fe3 (diff)