From 8c525174bb664cafa98dfff73dce9d42d7818f71 Mon Sep 17 00:00:00 2001 From: Nicolas Courty Date: Thu, 31 Aug 2017 16:44:18 +0200 Subject: Minor corrections suggested by @agramfort + new barycenter example + test function --- README.md | 2 +- data/carre.png | Bin 0 -> 168 bytes data/coeur.png | Bin 0 -> 225 bytes data/rond.png | Bin 0 -> 230 bytes data/triangle.png | Bin 0 -> 254 bytes examples/plot_gromov.py | 14 +-- examples/plot_gromov_barycenter.py | 240 +++++++++++++++++++++++++++++++++++++ ot/gromov.py | 36 +++--- test/test_gromov.py | 38 ++++++ 9 files changed, 302 insertions(+), 28 deletions(-) create mode 100755 data/carre.png create mode 100755 data/coeur.png create mode 100755 data/rond.png create mode 100755 data/triangle.png create mode 100755 examples/plot_gromov_barycenter.py create mode 100644 test/test_gromov.py diff --git a/README.md b/README.md index 257244b..22b20a4 100644 --- a/README.md +++ b/README.md @@ -185,4 +185,4 @@ You can also post bug reports and feature requests in Github issues. Make sure t [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016). [Wasserstein Discriminant Analysis](https://arxiv.org/pdf/1608.08063.pdf). arXiv preprint arXiv:1608.08063. -[12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon, [Gromov-Wasserstein averaging of kernel and distance matrices](http://proceedings.mlr.press/v48/peyre16.html) International Conference on Machine Learning (ICML). 2016. +[12] Gabriel Peyré, Marco Cuturi, and Justin Solomon, [Gromov-Wasserstein averaging of kernel and distance matrices](http://proceedings.mlr.press/v48/peyre16.html) International Conference on Machine Learning (ICML). 2016. diff --git a/data/carre.png b/data/carre.png new file mode 100755 index 0000000..45ff0ef Binary files /dev/null and b/data/carre.png differ diff --git a/data/coeur.png b/data/coeur.png new file mode 100755 index 0000000..3f511a6 Binary files /dev/null and b/data/coeur.png differ diff --git a/data/rond.png b/data/rond.png new file mode 100755 index 0000000..1c1a068 Binary files /dev/null and b/data/rond.png differ diff --git a/data/triangle.png b/data/triangle.png new file mode 100755 index 0000000..ca36d09 Binary files /dev/null and b/data/triangle.png differ diff --git a/examples/plot_gromov.py b/examples/plot_gromov.py index a33fde1..9bbdbde 100644 --- a/examples/plot_gromov.py +++ b/examples/plot_gromov.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- """ -==================== +========================== Gromov-Wasserstein example -==================== +========================== This example is designed to show how to use the Gromov-Wassertsein distance computation in POT. """ @@ -14,14 +14,14 @@ computation in POT. import scipy as sp import numpy as np +import matplotlib.pylab as pl import ot -import matplotlib.pylab as pl """ Sample two Gaussian distributions (2D and 3D) -==================== +============================================= The Gromov-Wasserstein distance allows to compute distances with samples that do not belong to the same metric space. For demonstration purpose, we sample two Gaussian distributions in 2- and 3-dimensional spaces. """ @@ -42,7 +42,7 @@ xt = np.random.randn(n, 3).dot(P) + mu_t """ Plotting the distributions -==================== +========================== """ fig = pl.figure() ax1 = fig.add_subplot(121) @@ -54,7 +54,7 @@ pl.show() """ Compute distance kernels, normalize them and then display -==================== +========================================================= """ C1 = sp.spatial.distance.cdist(xs, xs) @@ -72,7 +72,7 @@ pl.show() """ Compute Gromov-Wasserstein plans and distance -==================== +============================================= """ p = ot.unif(n) diff --git a/examples/plot_gromov_barycenter.py b/examples/plot_gromov_barycenter.py new file mode 100755 index 0000000..6a72b3b --- /dev/null +++ b/examples/plot_gromov_barycenter.py @@ -0,0 +1,240 @@ +# -*- coding: utf-8 -*- +""" +===================================== +Gromov-Wasserstein Barycenter example +===================================== +This example is designed to show how to use the Gromov-Wassertsein distance +computation in POT. +""" + +# Author: Erwan Vautier +# Nicolas Courty +# +# License: MIT License + + +import numpy as np +import scipy as sp + +import scipy.ndimage as spi +import matplotlib.pylab as pl +from sklearn import manifold +from sklearn.decomposition import PCA + +import ot + +""" + +Smacof MDS +========== +This function allows to find an embedding of points given a dissimilarity matrix +that will be given by the output of the algorithm +""" + + +def smacof_mds(C, dim, maxIter=3000, eps=1e-9): + """ + Returns an interpolated point cloud following the dissimilarity matrix C using SMACOF + multidimensional scaling (MDS) in specific dimensionned target space + + Parameters + ---------- + C : np.ndarray(ns,ns) + dissimilarity matrix + dim : Integer + dimension of the targeted space + maxIter : Maximum number of iterations of the SMACOF algorithm for a single run + + eps : relative tolerance w.r.t stress to declare converge + + + Returns + ------- + npos : R**dim ndarray + Embedded coordinates of the interpolated point cloud (defined with one isometry) + + + """ + + seed = np.random.RandomState(seed=3) + + mds = manifold.MDS( + dim, + max_iter=3000, + eps=1e-9, + dissimilarity='precomputed', + n_init=1) + pos = mds.fit(C).embedding_ + + nmds = manifold.MDS( + 2, + max_iter=3000, + eps=1e-9, + dissimilarity="precomputed", + random_state=seed, + n_init=1) + npos = nmds.fit_transform(C, init=pos) + + return npos + + +""" +Data preparation +================ +The four distributions are constructed from 4 simple images +""" + + +def im2mat(I): + """Converts and image to matrix (one pixel per line)""" + return I.reshape((I.shape[0] * I.shape[1], I.shape[2])) + + +carre = spi.imread('../data/carre.png').astype(np.float64) / 256 +rond = spi.imread('../data/rond.png').astype(np.float64) / 256 +triangle = spi.imread('../data/triangle.png').astype(np.float64) / 256 +fleche = spi.imread('../data/coeur.png').astype(np.float64) / 256 + +shapes = [carre, rond, triangle, fleche] + +S = 4 +xs = [[] for i in range(S)] + + +for nb in range(4): + for i in range(8): + for j in range(8): + if shapes[nb][i, j] < 0.95: + xs[nb].append([j, 8 - i]) + +xs = np.array([np.array(xs[0]), np.array(xs[1]), + np.array(xs[2]), np.array(xs[3])]) + + +""" +Barycenter computation +====================== +The four distributions are constructed from 4 simple images +""" +ns = [len(xs[s]) for s in range(S)] +N = 30 + +"""Compute all distances matrices for the four shapes""" +Cs = [sp.spatial.distance.cdist(xs[s], xs[s]) for s in range(S)] +Cs = [cs / cs.max() for cs in Cs] + +ps = [ot.unif(ns[s]) for s in range(S)] +p = ot.unif(N) + + +lambdast = [[float(i) / 3, float(3 - i) / 3] for i in [1, 2]] + +Ct01 = [0 for i in range(2)] +for i in range(2): + Ct01[i] = ot.gromov.gromov_barycenters(N, [Cs[0], Cs[1]], [ + ps[0], ps[1]], p, lambdast[i], 'square_loss', 5e-4, numItermax=100, stopThr=1e-3) + +Ct02 = [0 for i in range(2)] +for i in range(2): + Ct02[i] = ot.gromov.gromov_barycenters(N, [Cs[0], Cs[2]], [ + ps[0], ps[2]], p, lambdast[i], 'square_loss', 5e-4, numItermax=100, stopThr=1e-3) + +Ct13 = [0 for i in range(2)] +for i in range(2): + Ct13[i] = ot.gromov.gromov_barycenters(N, [Cs[1], Cs[3]], [ + ps[1], ps[3]], p, lambdast[i], 'square_loss', 5e-4, numItermax=100, stopThr=1e-3) + +Ct23 = [0 for i in range(2)] +for i in range(2): + Ct23[i] = ot.gromov.gromov_barycenters(N, [Cs[2], Cs[3]], [ + ps[2], ps[3]], p, lambdast[i], 'square_loss', 5e-4, numItermax=100, stopThr=1e-3) + +""" +Visualization +============= +""" + +"""The PCA helps in getting consistency between the rotations""" + +clf = PCA(n_components=2) +npos = [0, 0, 0, 0] +npos = [smacof_mds(Cs[s], 2) for s in range(S)] + +npost01 = [0, 0] +npost01 = [smacof_mds(Ct01[s], 2) for s in range(2)] +npost01 = [clf.fit_transform(npost01[s]) for s in range(2)] + +npost02 = [0, 0] +npost02 = [smacof_mds(Ct02[s], 2) for s in range(2)] +npost02 = [clf.fit_transform(npost02[s]) for s in range(2)] + +npost13 = [0, 0] +npost13 = [smacof_mds(Ct13[s], 2) for s in range(2)] +npost13 = [clf.fit_transform(npost13[s]) for s in range(2)] + +npost23 = [0, 0] +npost23 = [smacof_mds(Ct23[s], 2) for s in range(2)] +npost23 = [clf.fit_transform(npost23[s]) for s in range(2)] + + +fig = pl.figure(figsize=(10, 10)) + +ax1 = pl.subplot2grid((4, 4), (0, 0)) +pl.xlim((-1, 1)) +pl.ylim((-1, 1)) +ax1.scatter(npos[0][:, 0], npos[0][:, 1], color='r') + +ax2 = pl.subplot2grid((4, 4), (0, 1)) +pl.xlim((-1, 1)) +pl.ylim((-1, 1)) +ax2.scatter(npost01[1][:, 0], npost01[1][:, 1], color='b') + +ax3 = pl.subplot2grid((4, 4), (0, 2)) +pl.xlim((-1, 1)) +pl.ylim((-1, 1)) +ax3.scatter(npost01[0][:, 0], npost01[0][:, 1], color='b') + +ax4 = pl.subplot2grid((4, 4), (0, 3)) +pl.xlim((-1, 1)) +pl.ylim((-1, 1)) +ax4.scatter(npos[1][:, 0], npos[1][:, 1], color='r') + +ax5 = pl.subplot2grid((4, 4), (1, 0)) +pl.xlim((-1, 1)) +pl.ylim((-1, 1)) +ax5.scatter(npost02[1][:, 0], npost02[1][:, 1], color='b') + +ax6 = pl.subplot2grid((4, 4), (1, 3)) +pl.xlim((-1, 1)) +pl.ylim((-1, 1)) +ax6.scatter(npost13[1][:, 0], npost13[1][:, 1], color='b') + +ax7 = pl.subplot2grid((4, 4), (2, 0)) +pl.xlim((-1, 1)) +pl.ylim((-1, 1)) +ax7.scatter(npost02[0][:, 0], npost02[0][:, 1], color='b') + +ax8 = pl.subplot2grid((4, 4), (2, 3)) +pl.xlim((-1, 1)) +pl.ylim((-1, 1)) +ax8.scatter(npost13[0][:, 0], npost13[0][:, 1], color='b') + +ax9 = pl.subplot2grid((4, 4), (3, 0)) +pl.xlim((-1, 1)) +pl.ylim((-1, 1)) +ax9.scatter(npos[2][:, 0], npos[2][:, 1], color='r') + +ax10 = pl.subplot2grid((4, 4), (3, 1)) +pl.xlim((-1, 1)) +pl.ylim((-1, 1)) +ax10.scatter(npost23[1][:, 0], npost23[1][:, 1], color='b') + +ax11 = pl.subplot2grid((4, 4), (3, 2)) +pl.xlim((-1, 1)) +pl.ylim((-1, 1)) +ax11.scatter(npost23[0][:, 0], npost23[0][:, 1], color='b') + +ax12 = pl.subplot2grid((4, 4), (3, 3)) +pl.xlim((-1, 1)) +pl.ylim((-1, 1)) +ax12.scatter(npos[3][:, 0], npos[3][:, 1], color='r') diff --git a/ot/gromov.py b/ot/gromov.py index 7cf3b42..421ed3f 100644 --- a/ot/gromov.py +++ b/ot/gromov.py @@ -23,7 +23,7 @@ def square_loss(a, b): Returns the value of L(a,b)=(1/2)*|a-b|^2 """ - return (1 / 2) * (a - b)**2 + return 0.5 * (a - b)**2 def kl_loss(a, b): @@ -54,9 +54,9 @@ def tensor_square_loss(C1, C2, T): Parameters ---------- - C1 : np.ndarray(ns,ns) + C1 : ndarray, shape (ns, ns) Metric cost matrix in the source space - C2 : np.ndarray(nt,nt) + C2 : ndarray, shape (nt, nt) Metric costfr matrix in the target space T : np.ndarray(ns,nt) Coupling between source and target spaces @@ -87,7 +87,7 @@ def tensor_square_loss(C1, C2, T): return b tens = -np.dot(h1(C1), T).dot(h2(C2).T) - tens = tens - tens.min() + tens -= tens.min() return np.array(tens) @@ -112,9 +112,9 @@ def tensor_kl_loss(C1, C2, T): Parameters ---------- - C1 : np.ndarray(ns,ns) + C1 : ndarray, shape (ns, ns) Metric cost matrix in the source space - C2 : np.ndarray(nt,nt) + C2 : ndarray, shape (nt, nt) Metric costfr matrix in the target space T : np.ndarray(ns,nt) Coupling between source and target spaces @@ -149,7 +149,7 @@ def tensor_kl_loss(C1, C2, T): return np.log(b + 1e-15) tens = -np.dot(h1(C1), T).dot(h2(C2).T) - tens = tens - tens.min() + tens -= tens.min() return np.array(tens) @@ -175,9 +175,8 @@ def update_square_loss(p, lambdas, T, Cs): """ - tmpsum = np.sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s]) - for s in range(len(T))]) - ppt = np.dot(p, p.T) + tmpsum = sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s]) for s in range(len(T))]) + ppt = np.outer(p, p) return(np.divide(tmpsum, ppt)) @@ -203,9 +202,8 @@ def update_kl_loss(p, lambdas, T, Cs): """ - tmpsum = np.sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s]) - for s in range(len(T))]) - ppt = np.dot(p, p.T) + tmpsum = sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s]) for s in range(len(T))]) + ppt = np.outer(p, p) return(np.exp(np.divide(tmpsum, ppt))) @@ -239,9 +237,9 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon, numItermax=1000, stopThr Parameters ---------- - C1 : np.ndarray(ns,ns) + C1 : ndarray, shape (ns, ns) Metric cost matrix in the source space - C2 : np.ndarray(nt,nt) + C2 : ndarray, shape (nt, nt) Metric costfr matrix in the target space p : np.ndarray(ns,) distribution in the source space @@ -271,7 +269,7 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon, numItermax=1000, stopThr C1 = np.asarray(C1, dtype=np.float64) C2 = np.asarray(C2, dtype=np.float64) - T = np.dot(p, q.T) # Initialization + T = np.outer(p, q) # Initialization cpt = 0 err = 1 @@ -333,9 +331,9 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon, numItermax=1000, stopTh Parameters ---------- - C1 : np.ndarray(ns,ns) + C1 : ndarray, shape (ns, ns) Metric cost matrix in the source space - C2 : np.ndarray(nt,nt) + C2 : ndarray, shape (nt, nt) Metric costfr matrix in the target space p : np.ndarray(ns,) distribution in the source space @@ -434,8 +432,6 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon, numItermax=1000 Cs = [np.asarray(Cs[s], dtype=np.float64) for s in range(S)] lambdas = np.asarray(lambdas, dtype=np.float64) - T = [0 for s in range(S)] - # Initialization of C : random SPD matrix xalea = np.random.randn(N, 2) C = dist(xalea, xalea) diff --git a/test/test_gromov.py b/test/test_gromov.py new file mode 100644 index 0000000..75eeaab --- /dev/null +++ b/test/test_gromov.py @@ -0,0 +1,38 @@ +"""Tests for module gromov """ + +# Author: Erwan Vautier +# Nicolas Courty +# +# License: MIT License + +import numpy as np +import ot + + +def test_gromov(): + n = 50 # nb samples + + mu_s = np.array([0, 0]) + cov_s = np.array([[1, 0], [0, 1]]) + + xs = ot.datasets.get_2D_samples_gauss(n, mu_s, cov_s) + + xt = [xs[n - (i + 1)] for i in range(n)] + xt = np.array(xt) + + p = ot.unif(n) + q = ot.unif(n) + + C1 = ot.dist(xs, xs) + C2 = ot.dist(xt, xt) + + C1 /= C1.max() + C2 /= C2.max() + + G = ot.gromov_wasserstein(C1, C2, p, q, 'square_loss', epsilon=5e-4) + + # check constratints + np.testing.assert_allclose( + p, G.sum(1), atol=1e-04) # cf convergence gromov + np.testing.assert_allclose( + q, G.sum(0), atol=1e-04) # cf convergence gromov -- cgit v1.2.3