diff options
author | Gard Spreemann <gspr@nonempty.org> | 2021-11-09 17:05:13 +0100 |
---|---|---|
committer | Gard Spreemann <gspr@nonempty.org> | 2021-11-09 17:05:13 +0100 |
commit | a9fdc844907decddf54bed3ebeea8d8b2cf0fc5c (patch) | |
tree | 449a03fce8fafb78b6badd12b6e633f1e5d73a64 /examples | |
parent | a16b9471d7114ec08977479b7249efe747702b97 (diff) | |
parent | f1628794d521a8dfa00af383b5e06cd6d34af619 (diff) |
Merge tag '0.8.0' into dfsg/latest
Diffstat (limited to 'examples')
28 files changed, 2054 insertions, 299 deletions
diff --git a/examples/README.txt b/examples/README.txt index 69a9f84..b48487f 100644 --- a/examples/README.txt +++ b/examples/README.txt @@ -1,7 +1,7 @@ Examples gallery ================ -This is a gallery of all the POT example files. +This is a gallery of all the POT example files. OT and regularized OT diff --git a/examples/backends/README.txt b/examples/backends/README.txt new file mode 100644 index 0000000..3ee0e27 --- /dev/null +++ b/examples/backends/README.txt @@ -0,0 +1,4 @@ + + +POT backend examples +--------------------
\ No newline at end of file diff --git a/examples/backends/plot_optim_gromov_pytorch.py b/examples/backends/plot_optim_gromov_pytorch.py new file mode 100644 index 0000000..969707f --- /dev/null +++ b/examples/backends/plot_optim_gromov_pytorch.py @@ -0,0 +1,260 @@ +r""" +================================= +Optimizing the Gromov-Wasserstein distance with PyTorch +================================= + +In this exemple we use the pytorch backend to optimize the Gromov-Wasserstein +(GW) loss between two graphs expressed as empirical distribution. + +In the first example we optimize the weights on the node of a simple template +graph so that it minimizes the GW with a given Stochastic Block Model graph. +We can see that this actually recovers the proportion of classes in the SBM +and allows for an accurate clustering of the nodes using the GW optimal plan. + +In a second example we optimize simultaneously the weights and the sructure of +the template graph which allows us to perform graph compression and to recover +other properties of the SBM. + +The backend actually uses the gradients expressed in [38] to optimize the +weights. + +[38] C. Vincent-Cuaz, T. Vayer, R. Flamary, M. Corneli, N. Courty, Online Graph +Dictionary Learning, International Conference on Machine Learning (ICML), 2021. + +""" +# Author: Rémi Flamary <remi.flamary@polytechnique.edu> +# +# License: MIT License + +# sphinx_gallery_thumbnail_number = 3 + +from sklearn.manifold import MDS +import numpy as np +import matplotlib.pylab as pl +import torch + +import ot +from ot.gromov import gromov_wasserstein2 + +# %% +# Graph generation +# --------------- + +rng = np.random.RandomState(42) + + +def get_sbm(n, nc, ratio, P): + nbpc = np.round(n * ratio).astype(int) + n = np.sum(nbpc) + C = np.zeros((n, n)) + for c1 in range(nc): + for c2 in range(c1 + 1): + if c1 == c2: + for i in range(np.sum(nbpc[:c1]), np.sum(nbpc[:c1 + 1])): + for j in range(np.sum(nbpc[:c2]), i): + if rng.rand() <= P[c1, c2]: + C[i, j] = 1 + else: + for i in range(np.sum(nbpc[:c1]), np.sum(nbpc[:c1 + 1])): + for j in range(np.sum(nbpc[:c2]), np.sum(nbpc[:c2 + 1])): + if rng.rand() <= P[c1, c2]: + C[i, j] = 1 + + return C + C.T + + +n = 100 +nc = 3 +ratio = np.array([.5, .3, .2]) +P = np.array(0.6 * np.eye(3) + 0.05 * np.ones((3, 3))) +C1 = get_sbm(n, nc, ratio, P) + +# get 2d position for nodes +x1 = MDS(dissimilarity='precomputed', random_state=0).fit_transform(1 - C1) + + +def plot_graph(x, C, color='C0', s=None): + for j in range(C.shape[0]): + for i in range(j): + if C[i, j] > 0: + pl.plot([x[i, 0], x[j, 0]], [x[i, 1], x[j, 1]], alpha=0.2, color='k') + pl.scatter(x[:, 0], x[:, 1], c=color, s=s, zorder=10, edgecolors='k', cmap='tab10', vmax=9) + + +pl.figure(1, (10, 5)) +pl.clf() +pl.subplot(1, 2, 1) +plot_graph(x1, C1, color='C0') +pl.title("SBM Graph") +pl.axis("off") +pl.subplot(1, 2, 2) +pl.imshow(C1, interpolation='nearest') +pl.title("Adjacency matrix") +pl.axis("off") + + +# %% +# Optimizing GW w.r.t. the weights on a template structure +# ------------------------------------------------ +# The adajacency matrix C1 is block diagonal with 3 blocks. We want to +# optimize the weights of a simple template C0=eye(3) and see if we can +# recover the proportion of classes from the SBM (up to a permutation). + +C0 = np.eye(3) + + +def min_weight_gw(C1, C2, a2, nb_iter_max=100, lr=1e-2): + """ solve min_a GW(C1,C2,a, a2) by gradient descent""" + + # use pyTorch for our data + C1_torch = torch.tensor(C1) + C2_torch = torch.tensor(C2) + + a0 = rng.rand(C1.shape[0]) # random_init + a0 /= a0.sum() # on simplex + a1_torch = torch.tensor(a0).requires_grad_(True) + a2_torch = torch.tensor(a2) + + loss_iter = [] + + for i in range(nb_iter_max): + + loss = gromov_wasserstein2(C1_torch, C2_torch, a1_torch, a2_torch) + + loss_iter.append(loss.clone().detach().cpu().numpy()) + loss.backward() + + #print("{:03d} | {}".format(i, loss_iter[-1])) + + # performs a step of projected gradient descent + with torch.no_grad(): + grad = a1_torch.grad + a1_torch -= grad * lr # step + a1_torch.grad.zero_() + a1_torch.data = ot.utils.proj_simplex(a1_torch) + + a1 = a1_torch.clone().detach().cpu().numpy() + + return a1, loss_iter + + +a0_est, loss_iter0 = min_weight_gw(C0, C1, ot.unif(n), nb_iter_max=100, lr=1e-2) + +pl.figure(2) +pl.plot(loss_iter0) +pl.title("Loss along iterations") + +print("Estimated weights : ", a0_est) +print("True proportions : ", ratio) + + +# %% +# It is clear that the optimization has converged and that we recover the +# ratio of the different classes in the SBM graph up to a permutation. + + +# %% +# Community clustering with uniform and estimated weights +# -------------------------------------------- +# The GW OT plan can be used to perform a clustering of the nodes of a graph +# when computing the GW with a simple template like C0 by labeling nodes in +# the original graph using by the index of the noe in the template receiving +# the most mass. +# +# We show here the result of such a clustering when using uniform weights on +# the template C0 and when using the optimal weights previously estimated. + + +T_unif = ot.gromov_wasserstein(C1, C0, ot.unif(n), ot.unif(3)) +label_unif = T_unif.argmax(1) + +T_est = ot.gromov_wasserstein(C1, C0, ot.unif(n), a0_est) +label_est = T_est.argmax(1) + +pl.figure(3, (10, 5)) +pl.clf() +pl.subplot(1, 2, 1) +plot_graph(x1, C1, color=label_unif) +pl.title("Graph clustering unif. weights") +pl.axis("off") +pl.subplot(1, 2, 2) +plot_graph(x1, C1, color=label_est) +pl.title("Graph clustering est. weights") +pl.axis("off") + + +# %% +# Graph compression with GW +# ------------------------- + +# Now we optimize both the weights and structure of a small graph that +# minimize the GW distance wrt our data graph. This can be seen as graph +# compression but can also recover important properties of an SBM such +# as its class proportion but also its matrix of probability of links between +# classes + + +def graph_compession_gw(nb_nodes, C2, a2, nb_iter_max=100, lr=1e-2): + """ solve min_a GW(C1,C2,a, a2) by gradient descent""" + + # use pyTorch for our data + + C2_torch = torch.tensor(C2) + a2_torch = torch.tensor(a2) + + a0 = rng.rand(nb_nodes) # random_init + a0 /= a0.sum() # on simplex + a1_torch = torch.tensor(a0).requires_grad_(True) + C0 = np.eye(nb_nodes) + C1_torch = torch.tensor(C0).requires_grad_(True) + + loss_iter = [] + + for i in range(nb_iter_max): + + loss = gromov_wasserstein2(C1_torch, C2_torch, a1_torch, a2_torch) + + loss_iter.append(loss.clone().detach().cpu().numpy()) + loss.backward() + + #print("{:03d} | {}".format(i, loss_iter[-1])) + + # performs a step of projected gradient descent + with torch.no_grad(): + grad = a1_torch.grad + a1_torch -= grad * lr # step + a1_torch.grad.zero_() + a1_torch.data = ot.utils.proj_simplex(a1_torch) + + grad = C1_torch.grad + C1_torch -= grad * lr # step + C1_torch.grad.zero_() + C1_torch.data = torch.clamp(C1_torch, 0, 1) + + a1 = a1_torch.clone().detach().cpu().numpy() + C1 = C1_torch.clone().detach().cpu().numpy() + + return a1, C1, loss_iter + + +nb_nodes = 3 +a0_est2, C0_est2, loss_iter2 = graph_compession_gw(nb_nodes, C1, ot.unif(n), + nb_iter_max=100, lr=5e-2) + +pl.figure(4) +pl.plot(loss_iter2) +pl.title("Loss along iterations") + + +print("Estimated weights : ", a0_est2) +print("True proportions : ", ratio) + +pl.figure(6, (10, 3.5)) +pl.clf() +pl.subplot(1, 2, 1) +pl.imshow(P, vmin=0, vmax=1) +pl.title('True SBM P matrix') +pl.subplot(1, 2, 2) +pl.imshow(C0_est2, vmin=0, vmax=1) +pl.title('Estimated C0 matrix') +pl.colorbar() diff --git a/examples/backends/plot_sliced_wass_grad_flow_pytorch.py b/examples/backends/plot_sliced_wass_grad_flow_pytorch.py new file mode 100644 index 0000000..05b9952 --- /dev/null +++ b/examples/backends/plot_sliced_wass_grad_flow_pytorch.py @@ -0,0 +1,185 @@ +r""" +================================= +Sliced Wasserstein barycenter and gradient flow with PyTorch +================================= + +In this exemple we use the pytorch backend to optimize the sliced Wasserstein +loss between two empirical distributions [31]. + +In the first example one we perform a +gradient flow on the support of a distribution that minimize the sliced +Wassersein distance as poposed in [36]. + +In the second exemple we optimize with a gradient descent the sliced +Wasserstein barycenter between two distributions as in [31]. + +[31] Bonneel, Nicolas, et al. "Sliced and radon wasserstein barycenters of +measures." Journal of Mathematical Imaging and Vision 51.1 (2015): 22-45 + +[36] Liutkus, A., Simsekli, U., Majewski, S., Durmus, A., & Stöter, F. R. +(2019, May). Sliced-Wasserstein flows: Nonparametric generative modeling +via optimal transport and diffusions. In International Conference on +Machine Learning (pp. 4104-4113). PMLR. + + +""" +# Author: Rémi Flamary <remi.flamary@polytechnique.edu> +# +# License: MIT License + + +# %% +# Loading the data + + +import numpy as np +import matplotlib.pylab as pl +import torch +import ot +import matplotlib.animation as animation + +I1 = pl.imread('../../data/redcross.png').astype(np.float64)[::4, ::4, 2] +I2 = pl.imread('../../data/tooth.png').astype(np.float64)[::4, ::4, 2] + +sz = I2.shape[0] +XX, YY = np.meshgrid(np.arange(sz), np.arange(sz)) + +x1 = np.stack((XX[I1 == 0], YY[I1 == 0]), 1) * 1.0 +x2 = np.stack((XX[I2 == 0] + 60, -YY[I2 == 0] + 32), 1) * 1.0 +x3 = np.stack((XX[I2 == 0], -YY[I2 == 0] + 32), 1) * 1.0 + +pl.figure(1, (8, 4)) +pl.scatter(x1[:, 0], x1[:, 1], alpha=0.5) +pl.scatter(x2[:, 0], x2[:, 1], alpha=0.5) + +# %% +# Sliced Wasserstein gradient flow with Pytorch +# --------------------------------------------- + + +device = "cuda" if torch.cuda.is_available() else "cpu" + +# use pyTorch for our data +x1_torch = torch.tensor(x1).to(device=device).requires_grad_(True) +x2_torch = torch.tensor(x2).to(device=device) + + +lr = 1e3 +nb_iter_max = 100 + +x_all = np.zeros((nb_iter_max, x1.shape[0], 2)) + +loss_iter = [] + +# generator for random permutations +gen = torch.Generator() +gen.manual_seed(42) + +for i in range(nb_iter_max): + + loss = ot.sliced_wasserstein_distance(x1_torch, x2_torch, n_projections=20, seed=gen) + + loss_iter.append(loss.clone().detach().cpu().numpy()) + loss.backward() + + # performs a step of projected gradient descent + with torch.no_grad(): + grad = x1_torch.grad + x1_torch -= grad * lr / (1 + i / 5e1) # step + x1_torch.grad.zero_() + x_all[i, :, :] = x1_torch.clone().detach().cpu().numpy() + +xb = x1_torch.clone().detach().cpu().numpy() + +pl.figure(2, (8, 4)) +pl.scatter(x1[:, 0], x1[:, 1], alpha=0.5, label='$\mu^{(0)}$') +pl.scatter(x2[:, 0], x2[:, 1], alpha=0.5, label=r'$\nu$') +pl.scatter(xb[:, 0], xb[:, 1], alpha=0.5, label='$\mu^{(100)}$') +pl.title('Sliced Wasserstein gradient flow') +pl.legend() +ax = pl.axis() + +# %% +# Animate trajectories of the gradient flow along iteration +# ------------------------------------------------------- + +pl.figure(3, (8, 4)) + + +def _update_plot(i): + pl.clf() + pl.scatter(x1[:, 0], x1[:, 1], alpha=0.5, label='$\mu^{(0)}$') + pl.scatter(x2[:, 0], x2[:, 1], alpha=0.5, label=r'$\nu$') + pl.scatter(x_all[i, :, 0], x_all[i, :, 1], alpha=0.5, label='$\mu^{(100)}$') + pl.title('Sliced Wasserstein gradient flow Iter. {}'.format(i)) + pl.axis(ax) + return 1 + + +ani = animation.FuncAnimation(pl.gcf(), _update_plot, nb_iter_max, interval=100, repeat_delay=2000) + +# %% +# Compute the Sliced Wasserstein Barycenter +# +x1_torch = torch.tensor(x1).to(device=device) +x3_torch = torch.tensor(x3).to(device=device) +xbinit = np.random.randn(500, 2) * 10 + 16 +xbary_torch = torch.tensor(xbinit).to(device=device).requires_grad_(True) + +lr = 1e3 +nb_iter_max = 100 + +x_all = np.zeros((nb_iter_max, xbary_torch.shape[0], 2)) + +loss_iter = [] + +# generator for random permutations +gen = torch.Generator() +gen.manual_seed(42) + +alpha = 0.5 + +for i in range(nb_iter_max): + + loss = alpha * ot.sliced_wasserstein_distance(xbary_torch, x3_torch, n_projections=50, seed=gen) \ + + (1 - alpha) * ot.sliced_wasserstein_distance(xbary_torch, x1_torch, n_projections=50, seed=gen) + + loss_iter.append(loss.clone().detach().cpu().numpy()) + loss.backward() + + # performs a step of projected gradient descent + with torch.no_grad(): + grad = xbary_torch.grad + xbary_torch -= grad * lr # / (1 + i / 5e1) # step + xbary_torch.grad.zero_() + x_all[i, :, :] = xbary_torch.clone().detach().cpu().numpy() + +xb = xbary_torch.clone().detach().cpu().numpy() + +pl.figure(4, (8, 4)) +pl.scatter(x1[:, 0], x1[:, 1], alpha=0.5, label='$\mu$') +pl.scatter(x2[:, 0], x2[:, 1], alpha=0.5, label=r'$\nu$') +pl.scatter(xb[:, 0] + 30, xb[:, 1], alpha=0.5, label='Barycenter') +pl.title('Sliced Wasserstein barycenter') +pl.legend() +ax = pl.axis() + + +# %% +# Animate trajectories of the barycenter along gradient descent +# ------------------------------------------------------- + +pl.figure(5, (8, 4)) + + +def _update_plot(i): + pl.clf() + pl.scatter(x1[:, 0], x1[:, 1], alpha=0.5, label='$\mu^{(0)}$') + pl.scatter(x2[:, 0], x2[:, 1], alpha=0.5, label=r'$\nu$') + pl.scatter(x_all[i, :, 0] + 30, x_all[i, :, 1], alpha=0.5, label='$\mu^{(100)}$') + pl.title('Sliced Wasserstein barycenter Iter. {}'.format(i)) + pl.axis(ax) + return 1 + + +ani = animation.FuncAnimation(pl.gcf(), _update_plot, nb_iter_max, interval=100, repeat_delay=2000) diff --git a/examples/backends/plot_unmix_optim_torch.py b/examples/backends/plot_unmix_optim_torch.py new file mode 100644 index 0000000..9ae66e9 --- /dev/null +++ b/examples/backends/plot_unmix_optim_torch.py @@ -0,0 +1,161 @@ +# -*- coding: utf-8 -*- +r""" +================================= +Wasserstein unmixing with PyTorch +================================= + +In this example we estimate mixing parameters from distributions that minimize +the Wasserstein distance. In other words we suppose that a target +distribution :math:`\mu^t` can be expressed as a weighted sum of source +distributions :math:`\mu^s_k` with the following model: + +.. math:: + \mu^t = \sum_{k=1}^K w_k\mu^s_k + +where :math:`\mathbf{w}` is a vector of size :math:`K` and belongs in the +distribution simplex :math:`\Delta_K`. + +In order to estimate this weight vector we propose to optimize the Wasserstein +distance between the model and the observed :math:`\mu^t` with respect to +the vector. This leads to the following optimization problem: + +.. math:: + \min_{\mathbf{w}\in\Delta_K} \quad W \left(\mu^t,\sum_{k=1}^K w_k\mu^s_k\right) + +This minimization is done in this example with a simple projected gradient +descent in PyTorch. We use the automatic backend of POT that allows us to +compute the Wasserstein distance with :any:`ot.emd2` with +differentiable losses. + +""" + +# Author: Remi Flamary <remi.flamary@polytechnique.edu> +# +# License: MIT License + +# sphinx_gallery_thumbnail_number = 2 + +import numpy as np +import matplotlib.pylab as pl +import ot +import torch + + +############################################################################## +# Generate data +# ------------- + +#%% Data + +nt = 100 +nt1 = 10 # + +ns1 = 50 +ns = 2 * ns1 + +rng = np.random.RandomState(2) + +xt = rng.randn(nt, 2) * 0.2 +xt[:nt1, 0] += 1 +xt[nt1:, 1] += 1 + + +xs1 = rng.randn(ns1, 2) * 0.2 +xs1[:, 0] += 1 +xs2 = rng.randn(ns1, 2) * 0.2 +xs2[:, 1] += 1 + +xs = np.concatenate((xs1, xs2)) + +# Sample reweighting matrix H +H = np.zeros((ns, 2)) +H[:ns1, 0] = 1 / ns1 +H[ns1:, 1] = 1 / ns1 +# each columns sums to 1 and has weights only for samples form the +# corresponding source distribution + +M = ot.dist(xs, xt) + +############################################################################## +# Plot data +# --------- + +#%% plot the distributions + +pl.figure(1) +pl.scatter(xt[:, 0], xt[:, 1], label='Target $\mu^t$', alpha=0.5) +pl.scatter(xs1[:, 0], xs1[:, 1], label='Source $\mu^s_1$', alpha=0.5) +pl.scatter(xs2[:, 0], xs2[:, 1], label='Source $\mu^s_2$', alpha=0.5) +pl.title('Sources and Target distributions') +pl.legend() + + +############################################################################## +# Optimization of the model wrt the Wasserstein distance +# ------------------------------------------------------ + + +#%% Weights optimization with gradient descent + +# convert numpy arrays to torch tensors +H2 = torch.tensor(H) +M2 = torch.tensor(M) + +# weights for the source distributions +w = torch.tensor(ot.unif(2), requires_grad=True) + +# uniform weights for target +b = torch.tensor(ot.unif(nt)) + +lr = 2e-3 # learning rate +niter = 500 # number of iterations +losses = [] # loss along the iterations + +# loss for the minimal Wasserstein estimator + + +def get_loss(w): + a = torch.mv(H2, w) # distribution reweighting + return ot.emd2(a, b, M2) # squared Wasserstein 2 + + +for i in range(niter): + + loss = get_loss(w) + losses.append(float(loss)) + + loss.backward() + + with torch.no_grad(): + w -= lr * w.grad # gradient step + w[:] = ot.utils.proj_simplex(w) # projection on the simplex + + w.grad.zero_() + + +############################################################################## +# Estimated weights and convergence of the objective +# --------------------------------------------------- + +we = w.detach().numpy() +print('Estimated mixture:', we) + +pl.figure(2) +pl.semilogy(losses) +pl.grid() +pl.title('Wasserstein distance') +pl.xlabel("Iterations") + +############################################################################## +# Ploting the reweighted source distribution +# ------------------------------------------ + +pl.figure(3) + +# compute source weights +ws = H.dot(we) + +pl.scatter(xt[:, 0], xt[:, 1], label='Target $\mu^t$', alpha=0.5) +pl.scatter(xs[:, 0], xs[:, 1], color='C3', s=ws * 20 * ns, label='Weighted sources $\sum_{k} w_k\mu^s_k$', alpha=0.5) +pl.title('Target and reweighted source distributions') +pl.legend() diff --git a/examples/backends/plot_wass1d_torch.py b/examples/backends/plot_wass1d_torch.py new file mode 100644 index 0000000..0abdd6d --- /dev/null +++ b/examples/backends/plot_wass1d_torch.py @@ -0,0 +1,152 @@ +r""" +================================= +Wasserstein 1D with PyTorch +================================= + +In this small example, we consider the following minization problem: + +.. math:: + \mu^* = \min_\mu W(\mu,\nu) + +where :math:`\nu` is a reference 1D measure. The problem is handled +by a projected gradient descent method, where the gradient is computed +by pyTorch automatic differentiation. The projection on the simplex +ensures that the iterate will remain on the probability simplex. + +This example illustrates both `wasserstein_1d` function and backend use within +the POT framework. +""" +# Author: Nicolas Courty <ncourty@irisa.fr> +# Rémi Flamary <remi.flamary@polytechnique.edu> +# +# License: MIT License + +import numpy as np +import matplotlib.pylab as pl +import matplotlib as mpl +import torch + +from ot.lp import wasserstein_1d +from ot.datasets import make_1D_gauss as gauss +from ot.utils import proj_simplex + +red = np.array(mpl.colors.to_rgb('red')) +blue = np.array(mpl.colors.to_rgb('blue')) + + +n = 100 # nb bins + +# bin positions +x = np.arange(n, dtype=np.float64) + +# Gaussian distributions +a = gauss(n, m=20, s=5) # m= mean, s= std +b = gauss(n, m=60, s=10) + +# enforce sum to one on the support +a = a / a.sum() +b = b / b.sum() + + +device = "cuda" if torch.cuda.is_available() else "cpu" + +# use pyTorch for our data +x_torch = torch.tensor(x).to(device=device) +a_torch = torch.tensor(a).to(device=device).requires_grad_(True) +b_torch = torch.tensor(b).to(device=device) + +lr = 1e-6 +nb_iter_max = 800 + +loss_iter = [] + +pl.figure(1, figsize=(8, 4)) +pl.plot(x, a, 'b', label='Source distribution') +pl.plot(x, b, 'r', label='Target distribution') + +for i in range(nb_iter_max): + # Compute the Wasserstein 1D with torch backend + loss = wasserstein_1d(x_torch, x_torch, a_torch, b_torch, p=2) + # record the corresponding loss value + loss_iter.append(loss.clone().detach().cpu().numpy()) + loss.backward() + + # performs a step of projected gradient descent + with torch.no_grad(): + grad = a_torch.grad + a_torch -= a_torch.grad * lr # step + a_torch.grad.zero_() + a_torch.data = proj_simplex(a_torch) # projection onto the simplex + + # plot one curve every 10 iterations + if i % 10 == 0: + mix = float(i) / nb_iter_max + pl.plot(x, a_torch.clone().detach().cpu().numpy(), c=(1 - mix) * blue + mix * red) + +pl.legend() +pl.title('Distribution along the iterations of the projected gradient descent') +pl.show() + +pl.figure(2) +pl.plot(range(nb_iter_max), loss_iter, lw=3) +pl.title('Evolution of the loss along iterations', fontsize=16) +pl.show() + +# %% +# Wasserstein barycenter +# --------- +# In this example, we consider the following Wasserstein barycenter problem +# $$ \\eta^* = \\min_\\eta\;\;\; (1-t)W(\\mu,\\eta) + tW(\\eta,\\nu)$$ +# where :math:`\\mu` and :math:`\\nu` are reference 1D measures, and :math:`t` +# is a parameter :math:`\in [0,1]`. The problem is handled by a project gradient +# descent method, where the gradient is computed by pyTorch automatic differentiation. +# The projection on the simplex ensures that the iterate will remain on the +# probability simplex. +# +# This example illustrates both `wasserstein_1d` function and backend use within the +# POT framework. + + +device = "cuda" if torch.cuda.is_available() else "cpu" + +# use pyTorch for our data +x_torch = torch.tensor(x).to(device=device) +a_torch = torch.tensor(a).to(device=device) +b_torch = torch.tensor(b).to(device=device) +bary_torch = torch.tensor((a + b).copy() / 2).to(device=device).requires_grad_(True) + + +lr = 1e-6 +nb_iter_max = 2000 + +loss_iter = [] + +# instant of the interpolation +t = 0.5 + +for i in range(nb_iter_max): + # Compute the Wasserstein 1D with torch backend + loss = (1 - t) * wasserstein_1d(x_torch, x_torch, a_torch.detach(), bary_torch, p=2) + t * wasserstein_1d(x_torch, x_torch, b_torch, bary_torch, p=2) + # record the corresponding loss value + loss_iter.append(loss.clone().detach().cpu().numpy()) + loss.backward() + + # performs a step of projected gradient descent + with torch.no_grad(): + grad = bary_torch.grad + bary_torch -= bary_torch.grad * lr # step + bary_torch.grad.zero_() + bary_torch.data = proj_simplex(bary_torch) # projection onto the simplex + +pl.figure(3, figsize=(8, 4)) +pl.plot(x, a, 'b', label='Source distribution') +pl.plot(x, b, 'r', label='Target distribution') +pl.plot(x, bary_torch.clone().detach().cpu().numpy(), c='green', label='W barycenter') +pl.legend() +pl.title('Wasserstein barycenter computed by gradient descent') +pl.show() + +pl.figure(4) +pl.plot(range(nb_iter_max), loss_iter, lw=3) +pl.title('Evolution of the loss along iterations', fontsize=16) +pl.show() diff --git a/examples/backends/plot_wass2_gan_torch.py b/examples/backends/plot_wass2_gan_torch.py new file mode 100644 index 0000000..ca5b3c9 --- /dev/null +++ b/examples/backends/plot_wass2_gan_torch.py @@ -0,0 +1,227 @@ +# -*- coding: utf-8 -*- +r""" +======================================== +Wasserstein 2 Minibatch GAN with PyTorch +======================================== + +In this example we train a Wasserstein GAN using Wasserstein 2 on minibatches +as a distribution fitting term. + +We want to train a generator :math:`G_\theta` that generates realistic +data from random noise drawn form a Gaussian :math:`\mu_n` distribution so +that the data is indistinguishable from true data in the data distribution +:math:`\mu_d`. To this end Wasserstein GAN [Arjovsky2017] aim at optimizing +the parameters :math:`\theta` of the generator with the following +optimization problem: + +.. math:: + \min_{\theta} W(\mu_d,G_\theta\#\mu_n) + + +In practice we do not have access to the full distribution :math:`\mu_d` but +samples and we cannot compute the Wasserstein distance for lare dataset. +[Arjovsky2017] proposed to approximate the dual potential of Wasserstein 1 +with a neural network recovering an optimization problem similar to GAN. +In this example +we will optimize the expectation of the Wasserstein distance over minibatches +at each iterations as proposed in [Genevay2018]. Optimizing the Minibatches +of the Wasserstein distance has been studied in[Fatras2019]. + +[Arjovsky2017] Arjovsky, M., Chintala, S., & Bottou, L. (2017, July). +Wasserstein generative adversarial networks. In International conference +on machine learning (pp. 214-223). PMLR. + +[Genevay2018] Genevay, Aude, Gabriel Peyré, and Marco Cuturi. "Learning generative models +with sinkhorn divergences." International Conference on Artificial Intelligence +and Statistics. PMLR, 2018. + +[Fatras2019] Fatras, K., Zine, Y., Flamary, R., Gribonval, R., & Courty, N. +(2020, June). Learning with minibatch Wasserstein: asymptotic and gradient +properties. In the 23nd International Conference on Artificial Intelligence +and Statistics (Vol. 108). + +""" + +# Author: Remi Flamary <remi.flamary@polytechnique.edu> +# +# License: MIT License + +# sphinx_gallery_thumbnail_number = 3 + +import numpy as np +import matplotlib.pyplot as pl +import matplotlib.animation as animation +import torch +from torch import nn +import ot + + +# %% +# Data generation +# --------------- + +torch.manual_seed(1) +sigma = 0.1 +n_dims = 2 +n_features = 2 + + +def get_data(n_samples): + c = torch.rand(size=(n_samples, 1)) + angle = c * 2 * np.pi + x = torch.cat((torch.cos(angle), torch.sin(angle)), 1) + x += torch.randn(n_samples, 2) * sigma + return x + + +# %% +# Plot data +# --------- + +# plot the distributions +x = get_data(500) +pl.figure(1) +pl.scatter(x[:, 0], x[:, 1], label='Data samples from $\mu_d$', alpha=0.5) +pl.title('Data distribution') +pl.legend() + + +# %% +# Generator Model +# --------------- + +# define the MLP model +class Generator(torch.nn.Module): + def __init__(self): + super(Generator, self).__init__() + self.fc1 = nn.Linear(n_features, 200) + self.fc2 = nn.Linear(200, 500) + self.fc3 = nn.Linear(500, n_dims) + self.relu = torch.nn.ReLU() # instead of Heaviside step fn + + def forward(self, x): + output = self.fc1(x) + output = self.relu(output) # instead of Heaviside step fn + output = self.fc2(output) + output = self.relu(output) + output = self.fc3(output) + return output + +# %% +# Training the model +# ------------------ + + +G = Generator() +optimizer = torch.optim.RMSprop(G.parameters(), lr=0.00019, eps=1e-5) + +# number of iteration and size of the batches +n_iter = 200 # set to 200 for doc build but 1000 is better ;) +size_batch = 500 + +# generate statis samples to see their trajectory along training +n_visu = 100 +xnvisu = torch.randn(n_visu, n_features) +xvisu = torch.zeros(n_iter, n_visu, n_dims) + +ab = torch.ones(size_batch) / size_batch +losses = [] + + +for i in range(n_iter): + + # generate noise samples + xn = torch.randn(size_batch, n_features) + + # generate data samples + xd = get_data(size_batch) + + # generate sample along iterations + xvisu[i, :, :] = G(xnvisu).detach() + + # generate smaples and compte distance matrix + xg = G(xn) + M = ot.dist(xg, xd) + + loss = ot.emd2(ab, ab, M) + losses.append(float(loss.detach())) + + if i % 10 == 0: + print("Iter: {:3d}, loss={}".format(i, losses[-1])) + + loss.backward() + optimizer.step() + + del M + +pl.figure(2) +pl.semilogy(losses) +pl.grid() +pl.title('Wasserstein distance') +pl.xlabel("Iterations") + + +# %% +# Plot trajectories of generated samples along iterations +# ------------------------------------------------------- + + +pl.figure(3, (10, 10)) + +ivisu = [0, 10, 25, 50, 75, 125, 15, 175, 199] + +for i in range(9): + pl.subplot(3, 3, i + 1) + pl.scatter(xd[:, 0], xd[:, 1], label='Data samples from $\mu_d$', alpha=0.1) + pl.scatter(xvisu[ivisu[i], :, 0], xvisu[ivisu[i], :, 1], label='Data samples from $G\#\mu_n$', alpha=0.5) + pl.xticks(()) + pl.yticks(()) + pl.title('Iter. {}'.format(ivisu[i])) + if i == 0: + pl.legend() + +# %% +# Animate trajectories of generated samples along iteration +# ------------------------------------------------------- + +pl.figure(4, (8, 8)) + + +def _update_plot(i): + pl.clf() + pl.scatter(xd[:, 0], xd[:, 1], label='Data samples from $\mu_d$', alpha=0.1) + pl.scatter(xvisu[i, :, 0], xvisu[i, :, 1], label='Data samples from $G\#\mu_n$', alpha=0.5) + pl.xticks(()) + pl.yticks(()) + pl.xlim((-1.5, 1.5)) + pl.ylim((-1.5, 1.5)) + pl.title('Iter. {}'.format(i)) + return 1 + + +i = 0 +pl.scatter(xd[:, 0], xd[:, 1], label='Data samples from $\mu_d$', alpha=0.1) +pl.scatter(xvisu[i, :, 0], xvisu[i, :, 1], label='Data samples from $G\#\mu_n$', alpha=0.5) +pl.xticks(()) +pl.yticks(()) +pl.xlim((-1.5, 1.5)) +pl.ylim((-1.5, 1.5)) +pl.title('Iter. {}'.format(ivisu[i])) + + +ani = animation.FuncAnimation(pl.gcf(), _update_plot, n_iter, interval=100, repeat_delay=2000) + +# %% +# Generate and visualize data +# --------------------------- + +size_batch = 500 +xd = get_data(size_batch) +xn = torch.randn(size_batch, 2) +x = G(xn).detach().numpy() + +pl.figure(5) +pl.scatter(xd[:, 0], xd[:, 1], label='Data samples from $\mu_d$', alpha=0.5) +pl.scatter(x[:, 0], x[:, 1], label='Data samples from $G\#\mu_n$', alpha=0.5) +pl.title('Sources and Target distributions') +pl.legend() diff --git a/examples/barycenters/plot_barycenter_1D.py b/examples/barycenters/plot_barycenter_1D.py index 63dc460..2373e99 100644 --- a/examples/barycenters/plot_barycenter_1D.py +++ b/examples/barycenters/plot_barycenter_1D.py @@ -18,10 +18,10 @@ SIAM Journal on Scientific Computing, 37(2), A1111-A1138. # # License: MIT License -# sphinx_gallery_thumbnail_number = 4 +# sphinx_gallery_thumbnail_number = 1 import numpy as np -import matplotlib.pylab as pl +import matplotlib.pyplot as plt import ot # necessary for 3d plot even if not used from mpl_toolkits.mplot3d import Axes3D # noqa @@ -51,18 +51,6 @@ M = ot.utils.dist0(n) M /= M.max() ############################################################################## -# Plot data -# --------- - -#%% plot the distributions - -pl.figure(1, figsize=(6.4, 3)) -for i in range(n_distributions): - pl.plot(x, A[:, i]) -pl.title('Distributions') -pl.tight_layout() - -############################################################################## # Barycenter computation # ---------------------- @@ -78,24 +66,20 @@ bary_l2 = A.dot(weights) reg = 1e-3 bary_wass = ot.bregman.barycenter(A, M, reg, weights) -pl.figure(2) -pl.clf() -pl.subplot(2, 1, 1) -for i in range(n_distributions): - pl.plot(x, A[:, i]) -pl.title('Distributions') +f, (ax1, ax2) = plt.subplots(2, 1, tight_layout=True, num=1) +ax1.plot(x, A, color="black") +ax1.set_title('Distributions') -pl.subplot(2, 1, 2) -pl.plot(x, bary_l2, 'r', label='l2') -pl.plot(x, bary_wass, 'g', label='Wasserstein') -pl.legend() -pl.title('Barycenters') -pl.tight_layout() +ax2.plot(x, bary_l2, 'r', label='l2') +ax2.plot(x, bary_wass, 'g', label='Wasserstein') +ax2.set_title('Barycenters') + +plt.legend() +plt.show() ############################################################################## # Barycentric interpolation # ------------------------- - #%% barycenter interpolation n_alpha = 11 @@ -106,24 +90,23 @@ B_l2 = np.zeros((n, n_alpha)) B_wass = np.copy(B_l2) -for i in range(0, n_alpha): +for i in range(n_alpha): alpha = alpha_list[i] weights = np.array([1 - alpha, alpha]) B_l2[:, i] = A.dot(weights) B_wass[:, i] = ot.bregman.barycenter(A, M, reg, weights) #%% plot interpolation +plt.figure(2) -pl.figure(3) - -cmap = pl.cm.get_cmap('viridis') +cmap = plt.cm.get_cmap('viridis') verts = [] zs = alpha_list for i, z in enumerate(zs): ys = B_l2[:, i] verts.append(list(zip(x, ys))) -ax = pl.gcf().gca(projection='3d') +ax = plt.gcf().gca(projection='3d') poly = PolyCollection(verts, facecolors=[cmap(a) for a in alpha_list]) poly.set_alpha(0.7) @@ -134,18 +117,18 @@ ax.set_ylabel('$\\alpha$') ax.set_ylim3d(0, 1) ax.set_zlabel('') ax.set_zlim3d(0, B_l2.max() * 1.01) -pl.title('Barycenter interpolation with l2') -pl.tight_layout() +plt.title('Barycenter interpolation with l2') +plt.tight_layout() -pl.figure(4) -cmap = pl.cm.get_cmap('viridis') +plt.figure(3) +cmap = plt.cm.get_cmap('viridis') verts = [] zs = alpha_list for i, z in enumerate(zs): ys = B_wass[:, i] verts.append(list(zip(x, ys))) -ax = pl.gcf().gca(projection='3d') +ax = plt.gcf().gca(projection='3d') poly = PolyCollection(verts, facecolors=[cmap(a) for a in alpha_list]) poly.set_alpha(0.7) @@ -156,7 +139,7 @@ ax.set_ylabel('$\\alpha$') ax.set_ylim3d(0, 1) ax.set_zlabel('') ax.set_zlim3d(0, B_l2.max() * 1.01) -pl.title('Barycenter interpolation with Wasserstein') -pl.tight_layout() +plt.title('Barycenter interpolation with Wasserstein') +plt.tight_layout() -pl.show() +plt.show() diff --git a/examples/barycenters/plot_barycenter_lp_vs_entropic.py b/examples/barycenters/plot_barycenter_lp_vs_entropic.py index 57a6bac..6502f16 100644 --- a/examples/barycenters/plot_barycenter_lp_vs_entropic.py +++ b/examples/barycenters/plot_barycenter_lp_vs_entropic.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ ================================================================================= -1D Wasserstein barycenter comparison between exact LP and entropic regularization +1D Wasserstein barycenter: exact LP vs entropic regularization ================================================================================= This example illustrates the computation of regularized Wasserstein Barycenter diff --git a/examples/barycenters/plot_convolutional_barycenter.py b/examples/barycenters/plot_convolutional_barycenter.py index cbcd4a1..3721f31 100644 --- a/examples/barycenters/plot_convolutional_barycenter.py +++ b/examples/barycenters/plot_convolutional_barycenter.py @@ -6,17 +6,18 @@ Convolutional Wasserstein Barycenter example ============================================ -This example is designed to illustrate how the Convolutional Wasserstein Barycenter -function of POT works. +This example is designed to illustrate how the Convolutional Wasserstein +Barycenter function of POT works. """ # Author: Nicolas Courty <ncourty@irisa.fr> # # License: MIT License - +import os +from pathlib import Path import numpy as np -import pylab as pl +import matplotlib.pyplot as plt import ot ############################################################################## @@ -25,22 +26,19 @@ import ot # # The four distributions are constructed from 4 simple images +this_file = os.path.realpath('__file__') +data_path = os.path.join(Path(this_file).parent.parent.parent, 'data') -f1 = 1 - pl.imread('../../data/redcross.png')[:, :, 2] -f2 = 1 - pl.imread('../../data/duck.png')[:, :, 2] -f3 = 1 - pl.imread('../../data/heart.png')[:, :, 2] -f4 = 1 - pl.imread('../../data/tooth.png')[:, :, 2] +f1 = 1 - plt.imread(os.path.join(data_path, 'redcross.png'))[:, :, 2] +f2 = 1 - plt.imread(os.path.join(data_path, 'tooth.png'))[:, :, 2] +f3 = 1 - plt.imread(os.path.join(data_path, 'heart.png'))[:, :, 2] +f4 = 1 - plt.imread(os.path.join(data_path, 'duck.png'))[:, :, 2] -A = [] f1 = f1 / np.sum(f1) f2 = f2 / np.sum(f2) f3 = f3 / np.sum(f3) f4 = f4 / np.sum(f4) -A.append(f1) -A.append(f2) -A.append(f3) -A.append(f4) -A = np.array(A) +A = np.array([f1, f2, f3, f4]) nb_images = 5 @@ -57,14 +55,13 @@ v4 = np.array((0, 0, 0, 1)) # ---------------------------------------- # -pl.figure(figsize=(10, 10)) -pl.title('Convolutional Wasserstein Barycenters in POT') +fig, axes = plt.subplots(nb_images, nb_images, figsize=(7, 7)) +plt.suptitle('Convolutional Wasserstein Barycenters in POT') cm = 'Blues' # regularization parameter reg = 0.004 for i in range(nb_images): for j in range(nb_images): - pl.subplot(nb_images, nb_images, i * nb_images + j + 1) tx = float(i) / (nb_images - 1) ty = float(j) / (nb_images - 1) @@ -74,19 +71,19 @@ for i in range(nb_images): weights = (1 - ty) * tmp1 + ty * tmp2 if i == 0 and j == 0: - pl.imshow(f1, cmap=cm) - pl.axis('off') + axes[i, j].imshow(f1, cmap=cm) elif i == 0 and j == (nb_images - 1): - pl.imshow(f3, cmap=cm) - pl.axis('off') + axes[i, j].imshow(f3, cmap=cm) elif i == (nb_images - 1) and j == 0: - pl.imshow(f2, cmap=cm) - pl.axis('off') + axes[i, j].imshow(f2, cmap=cm) elif i == (nb_images - 1) and j == (nb_images - 1): - pl.imshow(f4, cmap=cm) - pl.axis('off') + axes[i, j].imshow(f4, cmap=cm) else: # call to barycenter computation - pl.imshow(ot.bregman.convolutional_barycenter2d(A, reg, weights), cmap=cm) - pl.axis('off') -pl.show() + axes[i, j].imshow( + ot.bregman.convolutional_barycenter2d(A, reg, weights), + cmap=cm + ) + axes[i, j].axis('off') +plt.tight_layout() +plt.show() diff --git a/examples/barycenters/plot_debiased_barycenter.py b/examples/barycenters/plot_debiased_barycenter.py new file mode 100644 index 0000000..2a603dd --- /dev/null +++ b/examples/barycenters/plot_debiased_barycenter.py @@ -0,0 +1,131 @@ +# -*- coding: utf-8 -*- +""" +================================= +Debiased Sinkhorn barycenter demo +================================= + +This example illustrates the computation of the debiased Sinkhorn barycenter +as proposed in [37]_. + + +.. [37] Janati, H., Cuturi, M., Gramfort, A. Proceedings of the 37th + International Conference on Machine Learning, PMLR 119:4692-4701, 2020 +""" + +# Author: Hicham Janati <hicham.janati100@gmail.com> +# +# License: MIT License +# sphinx_gallery_thumbnail_number = 3 + +import os +from pathlib import Path + +import numpy as np +import matplotlib.pyplot as plt + +import ot +from ot.bregman import (barycenter, barycenter_debiased, + convolutional_barycenter2d, + convolutional_barycenter2d_debiased) + +############################################################################## +# Debiased barycenter of 1D Gaussians +# ------------------------------------ + +#%% parameters + +n = 100 # nb bins + +# bin positions +x = np.arange(n, dtype=np.float64) + +# Gaussian distributions +a1 = ot.datasets.make_1D_gauss(n, m=20, s=5) # m= mean, s= std +a2 = ot.datasets.make_1D_gauss(n, m=60, s=8) + +# creating matrix A containing all distributions +A = np.vstack((a1, a2)).T +n_distributions = A.shape[1] + +# loss matrix + normalization +M = ot.utils.dist0(n) +M /= M.max() + +#%% barycenter computation + +alpha = 0.2 # 0<=alpha<=1 +weights = np.array([1 - alpha, alpha]) + +epsilons = [5e-3, 1e-2, 5e-2] + + +bars = [barycenter(A, M, reg, weights) for reg in epsilons] +bars_debiased = [barycenter_debiased(A, M, reg, weights) for reg in epsilons] +labels = ["Sinkhorn barycenter", "Debiased barycenter"] +colors = ["indianred", "gold"] + +f, axes = plt.subplots(1, len(epsilons), tight_layout=True, sharey=True, + figsize=(12, 4), num=1) +for ax, eps, bar, bar_debiased in zip(axes, epsilons, bars, bars_debiased): + ax.plot(A[:, 0], color="k", ls="--", label="Input data", alpha=0.3) + ax.plot(A[:, 1], color="k", ls="--", alpha=0.3) + for data, label, color in zip([bar, bar_debiased], labels, colors): + ax.plot(data, color=color, label=label, lw=2) + ax.set_title(r"$\varepsilon = %.3f$" % eps) +plt.legend() +plt.show() + + +############################################################################## +# Debiased barycenter of 2D images +# --------------------------------- +this_file = os.path.realpath('__file__') +data_path = os.path.join(Path(this_file).parent.parent.parent, 'data') +f1 = 1 - plt.imread(os.path.join(data_path, 'heart.png'))[:, :, 2] +f2 = 1 - plt.imread(os.path.join(data_path, 'duck.png'))[:, :, 2] + +A = np.asarray([f1, f2]) + 1e-2 +A /= A.sum(axis=(1, 2))[:, None, None] + +############################################################################## +# Display the input images + +fig, axes = plt.subplots(1, 2, figsize=(7, 4), num=2) +for ax, img in zip(axes, A): + ax.imshow(img, cmap="Greys") + ax.axis("off") +fig.tight_layout() +plt.show() + + +############################################################################## +# Barycenter computation and visualization +# ---------------------------------------- +# + +bars_sinkhorn, bars_debiased = [], [] +epsilons = [5e-3, 7e-3, 1e-2] +for eps in epsilons: + bar = convolutional_barycenter2d(A, eps) + bar_debiased, log = convolutional_barycenter2d_debiased(A, eps, log=True) + bars_sinkhorn.append(bar) + bars_debiased.append(bar_debiased) + +titles = ["Sinkhorn", "Debiased"] +all_bars = [bars_sinkhorn, bars_debiased] +fig, axes = plt.subplots(2, 3, figsize=(8, 6), num=3) +for jj, (method, ax_row, bars) in enumerate(zip(titles, axes, all_bars)): + for ii, (ax, img, eps) in enumerate(zip(ax_row, bars, epsilons)): + ax.imshow(img, cmap="Greys") + if jj == 0: + ax.set_title(r"$\varepsilon = %.3f$" % eps, fontsize=13) + ax.set_xticks([]) + ax.set_yticks([]) + ax.spines['top'].set_visible(False) + ax.spines['right'].set_visible(False) + ax.spines['bottom'].set_visible(False) + ax.spines['left'].set_visible(False) + if ii == 0: + ax.set_ylabel(method, fontsize=15) +fig.tight_layout() +plt.show() diff --git a/examples/barycenters/plot_free_support_barycenter.py b/examples/barycenters/plot_free_support_barycenter.py index 27ddc8e..2d68a39 100644 --- a/examples/barycenters/plot_free_support_barycenter.py +++ b/examples/barycenters/plot_free_support_barycenter.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- """ -==================================================== +======================================================== 2D free support Wasserstein barycenters of distributions -==================================================== +======================================================== Illustration of 2D Wasserstein barycenters if distributions are weighted sum of diracs. diff --git a/examples/domain-adaptation/plot_otda_color_images.py b/examples/domain-adaptation/plot_otda_color_images.py index 929365e..06dc8ab 100644 --- a/examples/domain-adaptation/plot_otda_color_images.py +++ b/examples/domain-adaptation/plot_otda_color_images.py @@ -19,17 +19,20 @@ SIAM Journal on Imaging Sciences, 7(3), 1853-1882. # sphinx_gallery_thumbnail_number = 2 +import os +from pathlib import Path + import numpy as np -import matplotlib.pylab as pl +from matplotlib import pyplot as plt import ot -r = np.random.RandomState(42) +rng = np.random.RandomState(42) -def im2mat(I): +def im2mat(img): """Converts an image to matrix (one pixel per line)""" - return I.reshape((I.shape[0] * I.shape[1], I.shape[2])) + return img.reshape((img.shape[0] * img.shape[1], img.shape[2])) def mat2im(X, shape): @@ -37,8 +40,8 @@ def mat2im(X, shape): return X.reshape(shape) -def minmax(I): - return np.clip(I, 0, 1) +def minmax(img): + return np.clip(img, 0, 1) ############################################################################## @@ -46,16 +49,19 @@ def minmax(I): # ------------- # Loading images -I1 = pl.imread('../../data/ocean_day.jpg').astype(np.float64) / 256 -I2 = pl.imread('../../data/ocean_sunset.jpg').astype(np.float64) / 256 +this_file = os.path.realpath('__file__') +data_path = os.path.join(Path(this_file).parent.parent.parent, 'data') + +I1 = plt.imread(os.path.join(data_path, 'ocean_day.jpg')).astype(np.float64) / 256 +I2 = plt.imread(os.path.join(data_path, 'ocean_sunset.jpg')).astype(np.float64) / 256 X1 = im2mat(I1) X2 = im2mat(I2) # training samples -nb = 1000 -idx1 = r.randint(X1.shape[0], size=(nb,)) -idx2 = r.randint(X2.shape[0], size=(nb,)) +nb = 500 +idx1 = rng.randint(X1.shape[0], size=(nb,)) +idx2 = rng.randint(X2.shape[0], size=(nb,)) Xs = X1[idx1, :] Xt = X2[idx2, :] @@ -65,39 +71,39 @@ Xt = X2[idx2, :] # Plot original image # ------------------- -pl.figure(1, figsize=(6.4, 3)) +plt.figure(1, figsize=(6.4, 3)) -pl.subplot(1, 2, 1) -pl.imshow(I1) -pl.axis('off') -pl.title('Image 1') +plt.subplot(1, 2, 1) +plt.imshow(I1) +plt.axis('off') +plt.title('Image 1') -pl.subplot(1, 2, 2) -pl.imshow(I2) -pl.axis('off') -pl.title('Image 2') +plt.subplot(1, 2, 2) +plt.imshow(I2) +plt.axis('off') +plt.title('Image 2') ############################################################################## # Scatter plot of colors # ---------------------- -pl.figure(2, figsize=(6.4, 3)) +plt.figure(2, figsize=(6.4, 3)) -pl.subplot(1, 2, 1) -pl.scatter(Xs[:, 0], Xs[:, 2], c=Xs) -pl.axis([0, 1, 0, 1]) -pl.xlabel('Red') -pl.ylabel('Blue') -pl.title('Image 1') +plt.subplot(1, 2, 1) +plt.scatter(Xs[:, 0], Xs[:, 2], c=Xs) +plt.axis([0, 1, 0, 1]) +plt.xlabel('Red') +plt.ylabel('Blue') +plt.title('Image 1') -pl.subplot(1, 2, 2) -pl.scatter(Xt[:, 0], Xt[:, 2], c=Xt) -pl.axis([0, 1, 0, 1]) -pl.xlabel('Red') -pl.ylabel('Blue') -pl.title('Image 2') -pl.tight_layout() +plt.subplot(1, 2, 2) +plt.scatter(Xt[:, 0], Xt[:, 2], c=Xt) +plt.axis([0, 1, 0, 1]) +plt.xlabel('Red') +plt.ylabel('Blue') +plt.title('Image 2') +plt.tight_layout() ############################################################################## @@ -130,37 +136,37 @@ I2te = minmax(mat2im(transp_Xt_sinkhorn, I2.shape)) # Plot new images # --------------- -pl.figure(3, figsize=(8, 4)) +plt.figure(3, figsize=(8, 4)) -pl.subplot(2, 3, 1) -pl.imshow(I1) -pl.axis('off') -pl.title('Image 1') +plt.subplot(2, 3, 1) +plt.imshow(I1) +plt.axis('off') +plt.title('Image 1') -pl.subplot(2, 3, 2) -pl.imshow(I1t) -pl.axis('off') -pl.title('Image 1 Adapt') +plt.subplot(2, 3, 2) +plt.imshow(I1t) +plt.axis('off') +plt.title('Image 1 Adapt') -pl.subplot(2, 3, 3) -pl.imshow(I1te) -pl.axis('off') -pl.title('Image 1 Adapt (reg)') +plt.subplot(2, 3, 3) +plt.imshow(I1te) +plt.axis('off') +plt.title('Image 1 Adapt (reg)') -pl.subplot(2, 3, 4) -pl.imshow(I2) -pl.axis('off') -pl.title('Image 2') +plt.subplot(2, 3, 4) +plt.imshow(I2) +plt.axis('off') +plt.title('Image 2') -pl.subplot(2, 3, 5) -pl.imshow(I2t) -pl.axis('off') -pl.title('Image 2 Adapt') +plt.subplot(2, 3, 5) +plt.imshow(I2t) +plt.axis('off') +plt.title('Image 2 Adapt') -pl.subplot(2, 3, 6) -pl.imshow(I2te) -pl.axis('off') -pl.title('Image 2 Adapt (reg)') -pl.tight_layout() +plt.subplot(2, 3, 6) +plt.imshow(I2te) +plt.axis('off') +plt.title('Image 2 Adapt (reg)') +plt.tight_layout() -pl.show() +plt.show() diff --git a/examples/domain-adaptation/plot_otda_jcpot.py b/examples/domain-adaptation/plot_otda_jcpot.py index c495690..0d974f4 100644 --- a/examples/domain-adaptation/plot_otda_jcpot.py +++ b/examples/domain-adaptation/plot_otda_jcpot.py @@ -1,8 +1,8 @@ # -*- coding: utf-8 -*- """ -======================== +================================ OT for multi-source target shift -======================== +================================ This example introduces a target shift problem with two 2D source and 1 target domain. diff --git a/examples/domain-adaptation/plot_otda_linear_mapping.py b/examples/domain-adaptation/plot_otda_linear_mapping.py index dbf16b8..a44096a 100644 --- a/examples/domain-adaptation/plot_otda_linear_mapping.py +++ b/examples/domain-adaptation/plot_otda_linear_mapping.py @@ -13,9 +13,11 @@ Linear OT mapping estimation # License: MIT License # sphinx_gallery_thumbnail_number = 2 +import os +from pathlib import Path import numpy as np -import pylab as pl +from matplotlib import pyplot as plt import ot ############################################################################## @@ -26,17 +28,19 @@ n = 1000 d = 2 sigma = .1 +rng = np.random.RandomState(42) + # source samples -angles = np.random.rand(n, 1) * 2 * np.pi +angles = rng.rand(n, 1) * 2 * np.pi xs = np.concatenate((np.sin(angles), np.cos(angles)), - axis=1) + sigma * np.random.randn(n, 2) + axis=1) + sigma * rng.randn(n, 2) xs[:n // 2, 1] += 2 # target samples -anglet = np.random.rand(n, 1) * 2 * np.pi +anglet = rng.rand(n, 1) * 2 * np.pi xt = np.concatenate((np.sin(anglet), np.cos(anglet)), - axis=1) + sigma * np.random.randn(n, 2) + axis=1) + sigma * rng.randn(n, 2) xt[:n // 2, 1] += 2 @@ -48,9 +52,9 @@ xt = xt.dot(A) + b # Plot data # --------- -pl.figure(1, (5, 5)) -pl.plot(xs[:, 0], xs[:, 1], '+') -pl.plot(xt[:, 0], xt[:, 1], 'o') +plt.figure(1, (5, 5)) +plt.plot(xs[:, 0], xs[:, 1], '+') +plt.plot(xt[:, 0], xt[:, 1], 'o') ############################################################################## @@ -66,22 +70,22 @@ xst = xs.dot(Ae) + be # Plot transported samples # ------------------------ -pl.figure(1, (5, 5)) -pl.clf() -pl.plot(xs[:, 0], xs[:, 1], '+') -pl.plot(xt[:, 0], xt[:, 1], 'o') -pl.plot(xst[:, 0], xst[:, 1], '+') +plt.figure(1, (5, 5)) +plt.clf() +plt.plot(xs[:, 0], xs[:, 1], '+') +plt.plot(xt[:, 0], xt[:, 1], 'o') +plt.plot(xst[:, 0], xst[:, 1], '+') -pl.show() +plt.show() ############################################################################## # Load image data # --------------- -def im2mat(I): +def im2mat(img): """Converts and image to matrix (one pixel per line)""" - return I.reshape((I.shape[0] * I.shape[1], I.shape[2])) + return img.reshape((img.shape[0] * img.shape[1], img.shape[2])) def mat2im(X, shape): @@ -89,13 +93,16 @@ def mat2im(X, shape): return X.reshape(shape) -def minmax(I): - return np.clip(I, 0, 1) +def minmax(img): + return np.clip(img, 0, 1) # Loading images -I1 = pl.imread('../../data/ocean_day.jpg').astype(np.float64) / 256 -I2 = pl.imread('../../data/ocean_sunset.jpg').astype(np.float64) / 256 +this_file = os.path.realpath('__file__') +data_path = os.path.join(Path(this_file).parent.parent.parent, 'data') + +I1 = plt.imread(os.path.join(data_path, 'ocean_day.jpg')).astype(np.float64) / 256 +I2 = plt.imread(os.path.join(data_path, 'ocean_sunset.jpg')).astype(np.float64) / 256 X1 = im2mat(I1) @@ -123,24 +130,24 @@ I2t = minmax(mat2im(xts, I2.shape)) # Plot transformed images # ----------------------- -pl.figure(2, figsize=(10, 7)) +plt.figure(2, figsize=(10, 7)) -pl.subplot(2, 2, 1) -pl.imshow(I1) -pl.axis('off') -pl.title('Im. 1') +plt.subplot(2, 2, 1) +plt.imshow(I1) +plt.axis('off') +plt.title('Im. 1') -pl.subplot(2, 2, 2) -pl.imshow(I2) -pl.axis('off') -pl.title('Im. 2') +plt.subplot(2, 2, 2) +plt.imshow(I2) +plt.axis('off') +plt.title('Im. 2') -pl.subplot(2, 2, 3) -pl.imshow(I1t) -pl.axis('off') -pl.title('Mapping Im. 1') +plt.subplot(2, 2, 3) +plt.imshow(I1t) +plt.axis('off') +plt.title('Mapping Im. 1') -pl.subplot(2, 2, 4) -pl.imshow(I2t) -pl.axis('off') -pl.title('Inverse mapping Im. 2') +plt.subplot(2, 2, 4) +plt.imshow(I2t) +plt.axis('off') +plt.title('Inverse mapping Im. 2') diff --git a/examples/domain-adaptation/plot_otda_mapping_colors_images.py b/examples/domain-adaptation/plot_otda_mapping_colors_images.py index ee5c8b0..dbece70 100644 --- a/examples/domain-adaptation/plot_otda_mapping_colors_images.py +++ b/examples/domain-adaptation/plot_otda_mapping_colors_images.py @@ -21,17 +21,19 @@ discrete optimal transport", Neural Information Processing Systems (NIPS), 2016. # License: MIT License # sphinx_gallery_thumbnail_number = 3 +import os +from pathlib import Path import numpy as np -import matplotlib.pylab as pl +from matplotlib import pyplot as plt import ot -r = np.random.RandomState(42) +rng = np.random.RandomState(42) -def im2mat(I): +def im2mat(img): """Converts and image to matrix (one pixel per line)""" - return I.reshape((I.shape[0] * I.shape[1], I.shape[2])) + return img.reshape((img.shape[0] * img.shape[1], img.shape[2])) def mat2im(X, shape): @@ -39,8 +41,8 @@ def mat2im(X, shape): return X.reshape(shape) -def minmax(I): - return np.clip(I, 0, 1) +def minmax(img): + return np.clip(img, 0, 1) ############################################################################## @@ -48,17 +50,19 @@ def minmax(I): # ------------- # Loading images -I1 = pl.imread('../../data/ocean_day.jpg').astype(np.float64) / 256 -I2 = pl.imread('../../data/ocean_sunset.jpg').astype(np.float64) / 256 +this_file = os.path.realpath('__file__') +data_path = os.path.join(Path(this_file).parent.parent.parent, 'data') +I1 = plt.imread(os.path.join(data_path, 'ocean_day.jpg')).astype(np.float64) / 256 +I2 = plt.imread(os.path.join(data_path, 'ocean_sunset.jpg')).astype(np.float64) / 256 X1 = im2mat(I1) X2 = im2mat(I2) # training samples -nb = 1000 -idx1 = r.randint(X1.shape[0], size=(nb,)) -idx2 = r.randint(X2.shape[0], size=(nb,)) +nb = 500 +idx1 = rng.randint(X1.shape[0], size=(nb,)) +idx2 = rng.randint(X2.shape[0], size=(nb,)) Xs = X1[idx1, :] Xt = X2[idx2, :] @@ -99,76 +103,76 @@ Image_mapping_gaussian = minmax(mat2im(X1tn, I1.shape)) # Plot original images # -------------------- -pl.figure(1, figsize=(6.4, 3)) -pl.subplot(1, 2, 1) -pl.imshow(I1) -pl.axis('off') -pl.title('Image 1') +plt.figure(1, figsize=(6.4, 3)) +plt.subplot(1, 2, 1) +plt.imshow(I1) +plt.axis('off') +plt.title('Image 1') -pl.subplot(1, 2, 2) -pl.imshow(I2) -pl.axis('off') -pl.title('Image 2') -pl.tight_layout() +plt.subplot(1, 2, 2) +plt.imshow(I2) +plt.axis('off') +plt.title('Image 2') +plt.tight_layout() ############################################################################## # Plot pixel values distribution # ------------------------------ -pl.figure(2, figsize=(6.4, 5)) +plt.figure(2, figsize=(6.4, 5)) -pl.subplot(1, 2, 1) -pl.scatter(Xs[:, 0], Xs[:, 2], c=Xs) -pl.axis([0, 1, 0, 1]) -pl.xlabel('Red') -pl.ylabel('Blue') -pl.title('Image 1') +plt.subplot(1, 2, 1) +plt.scatter(Xs[:, 0], Xs[:, 2], c=Xs) +plt.axis([0, 1, 0, 1]) +plt.xlabel('Red') +plt.ylabel('Blue') +plt.title('Image 1') -pl.subplot(1, 2, 2) -pl.scatter(Xt[:, 0], Xt[:, 2], c=Xt) -pl.axis([0, 1, 0, 1]) -pl.xlabel('Red') -pl.ylabel('Blue') -pl.title('Image 2') -pl.tight_layout() +plt.subplot(1, 2, 2) +plt.scatter(Xt[:, 0], Xt[:, 2], c=Xt) +plt.axis([0, 1, 0, 1]) +plt.xlabel('Red') +plt.ylabel('Blue') +plt.title('Image 2') +plt.tight_layout() ############################################################################## # Plot transformed images # ----------------------- -pl.figure(2, figsize=(10, 5)) +plt.figure(2, figsize=(10, 5)) -pl.subplot(2, 3, 1) -pl.imshow(I1) -pl.axis('off') -pl.title('Im. 1') +plt.subplot(2, 3, 1) +plt.imshow(I1) +plt.axis('off') +plt.title('Im. 1') -pl.subplot(2, 3, 4) -pl.imshow(I2) -pl.axis('off') -pl.title('Im. 2') +plt.subplot(2, 3, 4) +plt.imshow(I2) +plt.axis('off') +plt.title('Im. 2') -pl.subplot(2, 3, 2) -pl.imshow(Image_emd) -pl.axis('off') -pl.title('EmdTransport') +plt.subplot(2, 3, 2) +plt.imshow(Image_emd) +plt.axis('off') +plt.title('EmdTransport') -pl.subplot(2, 3, 5) -pl.imshow(Image_sinkhorn) -pl.axis('off') -pl.title('SinkhornTransport') +plt.subplot(2, 3, 5) +plt.imshow(Image_sinkhorn) +plt.axis('off') +plt.title('SinkhornTransport') -pl.subplot(2, 3, 3) -pl.imshow(Image_mapping_linear) -pl.axis('off') -pl.title('MappingTransport (linear)') +plt.subplot(2, 3, 3) +plt.imshow(Image_mapping_linear) +plt.axis('off') +plt.title('MappingTransport (linear)') -pl.subplot(2, 3, 6) -pl.imshow(Image_mapping_gaussian) -pl.axis('off') -pl.title('MappingTransport (gaussian)') -pl.tight_layout() +plt.subplot(2, 3, 6) +plt.imshow(Image_mapping_gaussian) +plt.axis('off') +plt.title('MappingTransport (gaussian)') +plt.tight_layout() -pl.show() +plt.show() diff --git a/examples/gromov/plot_barycenter_fgw.py b/examples/gromov/plot_barycenter_fgw.py index 3f81765..556e08f 100644 --- a/examples/gromov/plot_barycenter_fgw.py +++ b/examples/gromov/plot_barycenter_fgw.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ ================================= -Plot graphs' barycenter using FGW +Plot graphs barycenter using FGW ================================= This example illustrates the computation barycenter of labeled graphs using diff --git a/examples/gromov/plot_fgw.py b/examples/gromov/plot_fgw.py index 97fe619..5475fb3 100644 --- a/examples/gromov/plot_fgw.py +++ b/examples/gromov/plot_fgw.py @@ -26,7 +26,7 @@ from ot.gromov import gromov_wasserstein, fused_gromov_wasserstein ############################################################################## # Generate data -# --------- +# ------------- #%% parameters # We create two 1D random measures @@ -76,7 +76,7 @@ pl.show() ############################################################################## # Create structure matrices and across-feature distance matrix -# --------- +# ------------------------------------------------------------ #%% Structure matrices and across-features distance matrix C1 = ot.dist(xs) @@ -88,7 +88,7 @@ Got = ot.emd([], [], M) ############################################################################## # Plot matrices -# --------- +# ------------- #%% cmap = 'Reds' @@ -131,7 +131,7 @@ pl.show() ############################################################################## # Compute FGW/GW -# --------- +# -------------- #%% Computing FGW and GW alpha = 1e-3 @@ -145,7 +145,7 @@ Gg, log = gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', verbose=True, ############################################################################## # Visualize transport matrices -# --------- +# ---------------------------- #%% visu OT matrix cmap = 'Blues' diff --git a/examples/gromov/plot_gromov.py b/examples/gromov/plot_gromov.py index deb2f86..5a362cf 100644 --- a/examples/gromov/plot_gromov.py +++ b/examples/gromov/plot_gromov.py @@ -104,3 +104,37 @@ pl.imshow(gw, cmap='jet') pl.title('Entropic Gromov Wasserstein')
pl.show()
+
+#############################################################################
+#
+# Compute GW with a scalable stochastic method with any loss function
+# ----------------------------------------------------------------------
+
+
+def loss(x, y):
+ return np.abs(x - y)
+
+
+pgw, plog = ot.gromov.pointwise_gromov_wasserstein(C1, C2, p, q, loss, max_iter=100,
+ log=True)
+
+sgw, slog = ot.gromov.sampled_gromov_wasserstein(C1, C2, p, q, loss, epsilon=0.1, max_iter=100,
+ log=True)
+
+print('Pointwise Gromov-Wasserstein distance estimated: ' + str(plog['gw_dist_estimated']))
+print('Variance estimated: ' + str(plog['gw_dist_std']))
+print('Sampled Gromov-Wasserstein distance: ' + str(slog['gw_dist_estimated']))
+print('Variance estimated: ' + str(slog['gw_dist_std']))
+
+
+pl.figure(1, (10, 5))
+
+pl.subplot(1, 2, 1)
+pl.imshow(pgw.toarray(), cmap='jet')
+pl.title('Pointwise Gromov Wasserstein')
+
+pl.subplot(1, 2, 2)
+pl.imshow(sgw, cmap='jet')
+pl.title('Sampled Gromov Wasserstein')
+
+pl.show()
diff --git a/examples/gromov/plot_gromov_barycenter.py b/examples/gromov/plot_gromov_barycenter.py index f6f031a..7fe081f 100755 --- a/examples/gromov/plot_gromov_barycenter.py +++ b/examples/gromov/plot_gromov_barycenter.py @@ -13,11 +13,13 @@ computation in POT. #
# License: MIT License
+import os
+from pathlib import Path
import numpy as np
import scipy as sp
-import matplotlib.pylab as pl
+from matplotlib import pyplot as plt
from sklearn import manifold
from sklearn.decomposition import PCA
@@ -84,22 +86,24 @@ def smacof_mds(C, dim, max_iter=3000, eps=1e-9): # The four distributions are constructed from 4 simple images
-def im2mat(I):
+def im2mat(img):
"""Converts and image to matrix (one pixel per line)"""
- return I.reshape((I.shape[0] * I.shape[1], I.shape[2]))
+ return img.reshape((img.shape[0] * img.shape[1], img.shape[2]))
-square = pl.imread('../../data/square.png').astype(np.float64)[:, :, 2]
-cross = pl.imread('../../data/cross.png').astype(np.float64)[:, :, 2]
-triangle = pl.imread('../../data/triangle.png').astype(np.float64)[:, :, 2]
-star = pl.imread('../../data/star.png').astype(np.float64)[:, :, 2]
+this_file = os.path.realpath('__file__')
+data_path = os.path.join(Path(this_file).parent.parent.parent, 'data')
+
+square = plt.imread(os.path.join(data_path, 'square.png')).astype(np.float64)[:, :, 2]
+cross = plt.imread(os.path.join(data_path, 'cross.png')).astype(np.float64)[:, :, 2]
+triangle = plt.imread(os.path.join(data_path, 'triangle.png')).astype(np.float64)[:, :, 2]
+star = plt.imread(os.path.join(data_path, 'star.png')).astype(np.float64)[:, :, 2]
shapes = [square, cross, triangle, star]
S = 4
xs = [[] for i in range(S)]
-
for nb in range(4):
for i in range(8):
for j in range(8):
@@ -184,64 +188,64 @@ npost23 = [smacof_mds(Ct23[s], 2) for s in range(2)] npost23 = [clf.fit_transform(npost23[s]) for s in range(2)]
-fig = pl.figure(figsize=(10, 10))
+fig = plt.figure(figsize=(10, 10))
-ax1 = pl.subplot2grid((4, 4), (0, 0))
-pl.xlim((-1, 1))
-pl.ylim((-1, 1))
+ax1 = plt.subplot2grid((4, 4), (0, 0))
+plt.xlim((-1, 1))
+plt.ylim((-1, 1))
ax1.scatter(npos[0][:, 0], npos[0][:, 1], color='r')
-ax2 = pl.subplot2grid((4, 4), (0, 1))
-pl.xlim((-1, 1))
-pl.ylim((-1, 1))
+ax2 = plt.subplot2grid((4, 4), (0, 1))
+plt.xlim((-1, 1))
+plt.ylim((-1, 1))
ax2.scatter(npost01[1][:, 0], npost01[1][:, 1], color='b')
-ax3 = pl.subplot2grid((4, 4), (0, 2))
-pl.xlim((-1, 1))
-pl.ylim((-1, 1))
+ax3 = plt.subplot2grid((4, 4), (0, 2))
+plt.xlim((-1, 1))
+plt.ylim((-1, 1))
ax3.scatter(npost01[0][:, 0], npost01[0][:, 1], color='b')
-ax4 = pl.subplot2grid((4, 4), (0, 3))
-pl.xlim((-1, 1))
-pl.ylim((-1, 1))
+ax4 = plt.subplot2grid((4, 4), (0, 3))
+plt.xlim((-1, 1))
+plt.ylim((-1, 1))
ax4.scatter(npos[1][:, 0], npos[1][:, 1], color='r')
-ax5 = pl.subplot2grid((4, 4), (1, 0))
-pl.xlim((-1, 1))
-pl.ylim((-1, 1))
+ax5 = plt.subplot2grid((4, 4), (1, 0))
+plt.xlim((-1, 1))
+plt.ylim((-1, 1))
ax5.scatter(npost02[1][:, 0], npost02[1][:, 1], color='b')
-ax6 = pl.subplot2grid((4, 4), (1, 3))
-pl.xlim((-1, 1))
-pl.ylim((-1, 1))
+ax6 = plt.subplot2grid((4, 4), (1, 3))
+plt.xlim((-1, 1))
+plt.ylim((-1, 1))
ax6.scatter(npost13[1][:, 0], npost13[1][:, 1], color='b')
-ax7 = pl.subplot2grid((4, 4), (2, 0))
-pl.xlim((-1, 1))
-pl.ylim((-1, 1))
+ax7 = plt.subplot2grid((4, 4), (2, 0))
+plt.xlim((-1, 1))
+plt.ylim((-1, 1))
ax7.scatter(npost02[0][:, 0], npost02[0][:, 1], color='b')
-ax8 = pl.subplot2grid((4, 4), (2, 3))
-pl.xlim((-1, 1))
-pl.ylim((-1, 1))
+ax8 = plt.subplot2grid((4, 4), (2, 3))
+plt.xlim((-1, 1))
+plt.ylim((-1, 1))
ax8.scatter(npost13[0][:, 0], npost13[0][:, 1], color='b')
-ax9 = pl.subplot2grid((4, 4), (3, 0))
-pl.xlim((-1, 1))
-pl.ylim((-1, 1))
+ax9 = plt.subplot2grid((4, 4), (3, 0))
+plt.xlim((-1, 1))
+plt.ylim((-1, 1))
ax9.scatter(npos[2][:, 0], npos[2][:, 1], color='r')
-ax10 = pl.subplot2grid((4, 4), (3, 1))
-pl.xlim((-1, 1))
-pl.ylim((-1, 1))
+ax10 = plt.subplot2grid((4, 4), (3, 1))
+plt.xlim((-1, 1))
+plt.ylim((-1, 1))
ax10.scatter(npost23[1][:, 0], npost23[1][:, 1], color='b')
-ax11 = pl.subplot2grid((4, 4), (3, 2))
-pl.xlim((-1, 1))
-pl.ylim((-1, 1))
+ax11 = plt.subplot2grid((4, 4), (3, 2))
+plt.xlim((-1, 1))
+plt.ylim((-1, 1))
ax11.scatter(npost23[0][:, 0], npost23[0][:, 1], color='b')
-ax12 = pl.subplot2grid((4, 4), (3, 3))
-pl.xlim((-1, 1))
-pl.ylim((-1, 1))
+ax12 = plt.subplot2grid((4, 4), (3, 3))
+plt.xlim((-1, 1))
+plt.ylim((-1, 1))
ax12.scatter(npos[3][:, 0], npos[3][:, 1], color='r')
diff --git a/examples/plot_Intro_OT.py b/examples/plot_Intro_OT.py new file mode 100644 index 0000000..2e2c6fd --- /dev/null +++ b/examples/plot_Intro_OT.py @@ -0,0 +1,373 @@ +# coding: utf-8 +""" +============================================= +Introduction to Optimal Transport with Python +============================================= + +This example gives an introduction on how to use Optimal Transport in Python. + +""" + +# Author: Remi Flamary, Nicolas Courty, Aurelie Boisbunon +# +# License: MIT License +# sphinx_gallery_thumbnail_number = 1 + +############################################################################## +# POT Python Optimal Transport Toolbox +# ------------------------------------ +# +# POT installation +# ``````````````````` +# +# * Install with pip:: +# +# pip install pot +# * Install with conda:: +# +# conda install -c conda-forge pot +# +# Import the toolbox +# ``````````````````` +# + +import numpy as np # always need it +import pylab as pl # do the plots + +import ot # ot + +import time + +############################################################################## +# Getting help +# ````````````` +# +# Online documentation : `<https://pythonot.github.io/all.html>`_ +# +# Or inline help: +# + +help(ot.dist) + + +############################################################################## +# First OT Problem +# ---------------- +# +# We will solve the Bakery/Cafés problem of transporting croissants from a +# number of Bakeries to Cafés in a City (in this case Manhattan). We did a +# quick google map search in Manhattan for bakeries and Cafés: +# +# .. image:: images/bak.png +# :align: center +# :alt: bakery-cafe-manhattan +# :width: 600px +# :height: 280px +# +# We extracted from this search their positions and generated fictional +# production and sale number (that both sum to the same value). +# +# We have acess to the position of Bakeries ``bakery_pos`` and their +# respective production ``bakery_prod`` which describe the source +# distribution. The Cafés where the croissants are sold are defined also by +# their position ``cafe_pos`` and ``cafe_prod``, and describe the target +# distribution. For fun we also provide a +# map ``Imap`` that will illustrate the position of these shops in the city. +# +# +# Now we load the data +# +# + +data = np.load('../data/manhattan.npz') + +bakery_pos = data['bakery_pos'] +bakery_prod = data['bakery_prod'] +cafe_pos = data['cafe_pos'] +cafe_prod = data['cafe_prod'] +Imap = data['Imap'] + +print('Bakery production: {}'.format(bakery_prod)) +print('Cafe sale: {}'.format(cafe_prod)) +print('Total croissants : {}'.format(cafe_prod.sum())) + + +############################################################################## +# Plotting bakeries in the city +# ----------------------------- +# +# Next we plot the position of the bakeries and cafés on the map. The size of +# the circle is proportional to their production. +# + +pl.figure(1, (7, 6)) +pl.clf() +pl.imshow(Imap, interpolation='bilinear') # plot the map +pl.scatter(bakery_pos[:, 0], bakery_pos[:, 1], s=bakery_prod, c='r', ec='k', label='Bakeries') +pl.scatter(cafe_pos[:, 0], cafe_pos[:, 1], s=cafe_prod, c='b', ec='k', label='Cafés') +pl.legend() +pl.title('Manhattan Bakeries and Cafés') + + +############################################################################## +# Cost matrix +# ----------- +# +# +# We can now compute the cost matrix between the bakeries and the cafés, which +# will be the transport cost matrix. This can be done using the +# `ot.dist <https://pythonot.github.io/all.html#ot.dist>`_ function that +# defaults to squared Euclidean distance but can return other things such as +# cityblock (or Manhattan distance). +# + +C = ot.dist(bakery_pos, cafe_pos) + +labels = [str(i) for i in range(len(bakery_prod))] +f = pl.figure(2, (14, 7)) +pl.clf() +pl.subplot(121) +pl.imshow(Imap, interpolation='bilinear') # plot the map +for i in range(len(cafe_pos)): + pl.text(cafe_pos[i, 0], cafe_pos[i, 1], labels[i], color='b', + fontsize=14, fontweight='bold', ha='center', va='center') +for i in range(len(bakery_pos)): + pl.text(bakery_pos[i, 0], bakery_pos[i, 1], labels[i], color='r', + fontsize=14, fontweight='bold', ha='center', va='center') +pl.title('Manhattan Bakeries and Cafés') + +ax = pl.subplot(122) +im = pl.imshow(C, cmap="coolwarm") +pl.title('Cost matrix') +cbar = pl.colorbar(im, ax=ax, shrink=0.5, use_gridspec=True) +cbar.ax.set_ylabel("cost", rotation=-90, va="bottom") + +pl.xlabel('Cafés') +pl.ylabel('Bakeries') +pl.tight_layout() + + +############################################################################## +# The red cells in the matrix image show the bakeries and cafés that are +# further away, and thus more costly to transport from one to the other, while +# the blue ones show those that are very close to each other, with respect to +# the squared Euclidean distance. + + +############################################################################## +# Solving the OT problem with `ot.emd <https://pythonot.github.io/all.html#ot.emd>`_ +# ----------------------------------------------------------------------------------- + +start = time.time() +ot_emd = ot.emd(bakery_prod, cafe_prod, C) +time_emd = time.time() - start + +############################################################################## +# The function returns the transport matrix, which we can then visualize (next section). + +############################################################################## +# Transportation plan vizualization +# ````````````````````````````````` +# +# A good vizualization of the OT matrix in the 2D plane is to denote the +# transportation of mass between a Bakery and a Café by a line. This can easily +# be done with a double ``for`` loop. +# +# In order to make it more interpretable one can also use the ``alpha`` +# parameter of plot and set it to ``alpha=G[i,j]/G.max()``. + +# Plot the matrix and the map +f = pl.figure(3, (14, 7)) +pl.clf() +pl.subplot(121) +pl.imshow(Imap, interpolation='bilinear') # plot the map +for i in range(len(bakery_pos)): + for j in range(len(cafe_pos)): + pl.plot([bakery_pos[i, 0], cafe_pos[j, 0]], [bakery_pos[i, 1], cafe_pos[j, 1]], + '-k', lw=3. * ot_emd[i, j] / ot_emd.max()) +for i in range(len(cafe_pos)): + pl.text(cafe_pos[i, 0], cafe_pos[i, 1], labels[i], color='b', fontsize=14, + fontweight='bold', ha='center', va='center') +for i in range(len(bakery_pos)): + pl.text(bakery_pos[i, 0], bakery_pos[i, 1], labels[i], color='r', fontsize=14, + fontweight='bold', ha='center', va='center') +pl.title('Manhattan Bakeries and Cafés') + +ax = pl.subplot(122) +im = pl.imshow(ot_emd) +for i in range(len(bakery_prod)): + for j in range(len(cafe_prod)): + text = ax.text(j, i, '{0:g}'.format(ot_emd[i, j]), + ha="center", va="center", color="w") +pl.title('Transport matrix') + +pl.xlabel('Cafés') +pl.ylabel('Bakeries') +pl.tight_layout() + +############################################################################## +# The transport matrix gives the number of croissants that can be transported +# from each bakery to each café. We can see that the bakeries only need to +# transport croissants to one or two cafés, the transport matrix being very +# sparse. + +############################################################################## +# OT loss and dual variables +# -------------------------- +# +# The resulting wasserstein loss loss is of the form: +# +# .. math:: +# W=\sum_{i,j}\gamma_{i,j}C_{i,j} +# +# where :math:`\gamma` is the optimal transport matrix. +# + +W = np.sum(ot_emd * C) +print('Wasserstein loss (EMD) = {0:.2f}'.format(W)) + +############################################################################## +# Regularized OT with Sinkhorn +# ---------------------------- +# +# The Sinkhorn algorithm is very simple to code. You can implement it directly +# using the following pseudo-code +# +# .. image:: images/sinkhorn.png +# :align: center +# :alt: Sinkhorn algorithm +# :width: 440px +# :height: 240px +# +# In this algorithm, :math:`\oslash` corresponds to the element-wise division. +# +# An alternative is to use the POT toolbox with +# `ot.sinkhorn <https://pythonot.github.io/all.html#ot.sinkhorn>`_ +# +# Be careful of numerical problems. A good pre-processing for Sinkhorn is to +# divide the cost matrix ``C`` by its maximum value. + +############################################################################## +# Algorithm +# ````````` + +# Compute Sinkhorn transport matrix from algorithm +reg = 0.1 +K = np.exp(-C / C.max() / reg) +nit = 100 +u = np.ones((len(bakery_prod), )) +for i in range(1, nit): + v = cafe_prod / np.dot(K.T, u) + u = bakery_prod / (np.dot(K, v)) +ot_sink_algo = np.atleast_2d(u).T * (K * v.T) # Equivalent to np.dot(np.diag(u), np.dot(K, np.diag(v))) + +# Compute Sinkhorn transport matrix with POT +ot_sinkhorn = ot.sinkhorn(bakery_prod, cafe_prod, reg=reg, M=C / C.max()) + +# Difference between the 2 +print('Difference between algo and ot.sinkhorn = {0:.2g}'.format(np.sum(np.power(ot_sink_algo - ot_sinkhorn, 2)))) + +############################################################################## +# Plot the matrix and the map +# ``````````````````````````` + +print('Min. of Sinkhorn\'s transport matrix = {0:.2g}'.format(np.min(ot_sinkhorn))) + +f = pl.figure(4, (13, 6)) +pl.clf() +pl.subplot(121) +pl.imshow(Imap, interpolation='bilinear') # plot the map +for i in range(len(bakery_pos)): + for j in range(len(cafe_pos)): + pl.plot([bakery_pos[i, 0], cafe_pos[j, 0]], + [bakery_pos[i, 1], cafe_pos[j, 1]], + '-k', lw=3. * ot_sinkhorn[i, j] / ot_sinkhorn.max()) +for i in range(len(cafe_pos)): + pl.text(cafe_pos[i, 0], cafe_pos[i, 1], labels[i], color='b', + fontsize=14, fontweight='bold', ha='center', va='center') +for i in range(len(bakery_pos)): + pl.text(bakery_pos[i, 0], bakery_pos[i, 1], labels[i], color='r', + fontsize=14, fontweight='bold', ha='center', va='center') +pl.title('Manhattan Bakeries and Cafés') + +ax = pl.subplot(122) +im = pl.imshow(ot_sinkhorn) +for i in range(len(bakery_prod)): + for j in range(len(cafe_prod)): + text = ax.text(j, i, np.round(ot_sinkhorn[i, j], 1), + ha="center", va="center", color="w") +pl.title('Transport matrix') + +pl.xlabel('Cafés') +pl.ylabel('Bakeries') +pl.tight_layout() + + +############################################################################## +# We notice right away that the matrix is not sparse at all with Sinkhorn, +# each bakery delivering croissants to all 5 cafés with that solution. Also, +# this solution gives a transport with fractions, which does not make sense +# in the case of croissants. This was not the case with EMD. + +############################################################################## +# Varying the regularization parameter in Sinkhorn +# ```````````````````````````````````````````````` +# + +reg_parameter = np.logspace(-3, 0, 20) +W_sinkhorn_reg = np.zeros((len(reg_parameter), )) +time_sinkhorn_reg = np.zeros((len(reg_parameter), )) + +f = pl.figure(5, (14, 5)) +pl.clf() +max_ot = 100 # plot matrices with the same colorbar +for k in range(len(reg_parameter)): + start = time.time() + ot_sinkhorn = ot.sinkhorn(bakery_prod, cafe_prod, reg=reg_parameter[k], M=C / C.max()) + time_sinkhorn_reg[k] = time.time() - start + + if k % 4 == 0 and k > 0: # we only plot a few + ax = pl.subplot(1, 5, k / 4) + im = pl.imshow(ot_sinkhorn, vmin=0, vmax=max_ot) + pl.title('reg={0:.2g}'.format(reg_parameter[k])) + pl.xlabel('Cafés') + pl.ylabel('Bakeries') + + # Compute the Wasserstein loss for Sinkhorn, and compare with EMD + W_sinkhorn_reg[k] = np.sum(ot_sinkhorn * C) +pl.tight_layout() + + +############################################################################## +# This series of graph shows that the solution of Sinkhorn starts with something +# very similar to EMD (although not sparse) for very small values of the +# regularization parameter, and tends to a more uniform solution as the +# regularization parameter increases. +# + +############################################################################## +# Wasserstein loss and computational time +# ``````````````````````````````````````` +# + +# Plot the matrix and the map +f = pl.figure(6, (4, 4)) +pl.clf() +pl.title("Comparison between Sinkhorn and EMD") + +pl.plot(reg_parameter, W_sinkhorn_reg, 'o', label="Sinkhorn") +XLim = pl.xlim() +pl.plot(XLim, [W, W], '--k', label="EMD") +pl.legend() +pl.xlabel("reg") +pl.ylabel("Wasserstein loss") + +############################################################################## +# In this last graph, we show the impact of the regularization parameter on +# the Wasserstein loss. We can see that higher +# values of ``reg`` leads to a much higher Wasserstein loss. +# +# The Wasserstein loss of EMD is displayed for +# comparison. The Wasserstein loss of Sinkhorn can be a little lower than that +# of EMD for low values of ``reg``, but it quickly gets much higher. +# diff --git a/examples/plot_OT_1D_smooth.py b/examples/plot_OT_1D_smooth.py index 75cd295..b07f99f 100644 --- a/examples/plot_OT_1D_smooth.py +++ b/examples/plot_OT_1D_smooth.py @@ -87,7 +87,7 @@ pl.show() ############################################################################## # Solve Smooth OT -# -------------- +# --------------- #%% Smooth OT with KL regularization diff --git a/examples/plot_OT_2D_samples.py b/examples/plot_OT_2D_samples.py index 1544e82..af1bc12 100644 --- a/examples/plot_OT_2D_samples.py +++ b/examples/plot_OT_2D_samples.py @@ -107,7 +107,7 @@ pl.show() ############################################################################## # Emprirical Sinkhorn -# ---------------- +# ------------------- #%% sinkhorn diff --git a/examples/sliced-wasserstein/README.txt b/examples/sliced-wasserstein/README.txt new file mode 100644 index 0000000..a575345 --- /dev/null +++ b/examples/sliced-wasserstein/README.txt @@ -0,0 +1,4 @@ + + +Sliced Wasserstein Distance +---------------------------
\ No newline at end of file diff --git a/examples/sliced-wasserstein/plot_variance.py b/examples/sliced-wasserstein/plot_variance.py new file mode 100644 index 0000000..7d73907 --- /dev/null +++ b/examples/sliced-wasserstein/plot_variance.py @@ -0,0 +1,86 @@ +# -*- coding: utf-8 -*- +""" +============================== +2D Sliced Wasserstein Distance +============================== + +This example illustrates the computation of the sliced Wasserstein Distance as +proposed in [31]. + +[31] Bonneel, Nicolas, et al. "Sliced and radon wasserstein barycenters of +measures." Journal of Mathematical Imaging and Vision 51.1 (2015): 22-45 + +""" + +# Author: Adrien Corenflos <adrien.corenflos@aalto.fi> +# +# License: MIT License + +import matplotlib.pylab as pl +import numpy as np + +import ot + +############################################################################## +# Generate data +# ------------- + +# %% parameters and data generation + +n = 500 # nb samples + +mu_s = np.array([0, 0]) +cov_s = np.array([[1, 0], [0, 1]]) + +mu_t = np.array([4, 4]) +cov_t = np.array([[1, -.8], [-.8, 1]]) + +xs = ot.datasets.make_2D_samples_gauss(n, mu_s, cov_s) +xt = ot.datasets.make_2D_samples_gauss(n, mu_t, cov_t) + +a, b = np.ones((n,)) / n, np.ones((n,)) / n # uniform distribution on samples + +############################################################################## +# Plot data +# --------- + +# %% plot samples + +pl.figure(1) +pl.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples') +pl.plot(xt[:, 0], xt[:, 1], 'xr', label='Target samples') +pl.legend(loc=0) +pl.title('Source and target distributions') + +############################################################################### +# Sliced Wasserstein distance for different seeds and number of projections +# ------------------------------------------------------------------------- + +n_seed = 50 +n_projections_arr = np.logspace(0, 3, 25, dtype=int) +res = np.empty((n_seed, 25)) + +# %% Compute statistics +for seed in range(n_seed): + for i, n_projections in enumerate(n_projections_arr): + res[seed, i] = ot.sliced_wasserstein_distance(xs, xt, a, b, n_projections, seed=seed) + +res_mean = np.mean(res, axis=0) +res_std = np.std(res, axis=0) + +############################################################################### +# Plot Sliced Wasserstein Distance +# -------------------------------- + +pl.figure(2) +pl.plot(n_projections_arr, res_mean, label="SWD") +pl.fill_between(n_projections_arr, res_mean - 2 * res_std, res_mean + 2 * res_std, alpha=0.5) + +pl.legend() +pl.xscale('log') + +pl.xlabel("Number of projections") +pl.ylabel("Distance") +pl.title('Sliced Wasserstein Distance with 95% confidence inverval') + +pl.show() diff --git a/examples/unbalanced-partial/plot_UOT_1D.py b/examples/unbalanced-partial/plot_UOT_1D.py index 2ea8b05..183849c 100644 --- a/examples/unbalanced-partial/plot_UOT_1D.py +++ b/examples/unbalanced-partial/plot_UOT_1D.py @@ -61,8 +61,7 @@ ot.plot.plot1D_mat(a, b, M, 'Cost matrix M') ############################################################################## # Solve Unbalanced Sinkhorn -# -------------- - +# ------------------------- # Sinkhorn diff --git a/examples/unbalanced-partial/plot_partial_wass_and_gromov.py b/examples/unbalanced-partial/plot_partial_wass_and_gromov.py index 0c5cbf9..ac4194c 100755 --- a/examples/unbalanced-partial/plot_partial_wass_and_gromov.py +++ b/examples/unbalanced-partial/plot_partial_wass_and_gromov.py @@ -4,7 +4,7 @@ Partial Wasserstein and Gromov-Wasserstein example
==================================================
-This example is designed to show how to use the Partial (Gromov-)Wassertsein
+This example is designed to show how to use the Partial (Gromov-)Wasserstein
distance computation in POT.
"""
@@ -123,11 +123,12 @@ C1 = sp.spatial.distance.cdist(xs, xs) C2 = sp.spatial.distance.cdist(xt, xt)
# transport 100% of the mass
-print('-----m = 1')
+print('------m = 1')
m = 1
res0, log0 = ot.partial.partial_gromov_wasserstein(C1, C2, p, q, m=m, log=True)
res, log = ot.partial.entropic_partial_gromov_wasserstein(C1, C2, p, q, 10,
- m=m, log=True)
+ m=m, log=True,
+ verbose=True)
print('Wasserstein distance (m = 1): ' + str(log0['partial_gw_dist']))
print('Entropic Wasserstein distance (m = 1): ' + str(log['partial_gw_dist']))
@@ -136,18 +137,20 @@ pl.figure(1, (10, 5)) pl.title("mass to be transported m = 1")
pl.subplot(1, 2, 1)
pl.imshow(res0, cmap='jet')
-pl.title('Wasserstein')
+pl.title('Gromov-Wasserstein')
pl.subplot(1, 2, 2)
pl.imshow(res, cmap='jet')
-pl.title('Entropic Wasserstein')
+pl.title('Entropic Gromov-Wasserstein')
pl.show()
# transport 2/3 of the mass
-print('-----m = 2/3')
+print('------m = 2/3')
m = 2 / 3
-res0, log0 = ot.partial.partial_gromov_wasserstein(C1, C2, p, q, m=m, log=True)
+res0, log0 = ot.partial.partial_gromov_wasserstein(C1, C2, p, q, m=m, log=True,
+ verbose=True)
res, log = ot.partial.entropic_partial_gromov_wasserstein(C1, C2, p, q, 10,
- m=m, log=True)
+ m=m, log=True,
+ verbose=True)
print('Partial Wasserstein distance (m = 2/3): ' +
str(log0['partial_gw_dist']))
@@ -158,8 +161,8 @@ pl.figure(1, (10, 5)) pl.title("mass to be transported m = 2/3")
pl.subplot(1, 2, 1)
pl.imshow(res0, cmap='jet')
-pl.title('Partial Wasserstein')
+pl.title('Partial Gromov-Wasserstein')
pl.subplot(1, 2, 2)
pl.imshow(res, cmap='jet')
-pl.title('Entropic partial Wasserstein')
+pl.title('Entropic partial Gromov-Wasserstein')
pl.show()
diff --git a/examples/unbalanced-partial/plot_regpath.py b/examples/unbalanced-partial/plot_regpath.py new file mode 100644 index 0000000..4a51c2d --- /dev/null +++ b/examples/unbalanced-partial/plot_regpath.py @@ -0,0 +1,135 @@ +# -*- coding: utf-8 -*- +""" +================================================================ +Regularization path of l2-penalized unbalanced optimal transport +================================================================ +This example illustrate the regularization path for 2D unbalanced +optimal transport. We present here both the fully relaxed case +and the semi-relaxed case. + +[Chapel et al., 2021] Chapel, L., Flamary, R., Wu, H., Févotte, C., +and Gasso, G. (2021). Unbalanced optimal transport through non-negative +penalized linear regression. +""" + +# Author: Haoran Wu <haoran.wu@univ-ubs.fr> +# License: MIT License + + +import numpy as np +import matplotlib.pylab as pl +import ot + +############################################################################## +# Generate data +# ------------- + +#%% parameters and data generation + +n = 50 # nb samples + +mu_s = np.array([-1, -1]) +cov_s = np.array([[1, 0], [0, 1]]) + +mu_t = np.array([4, 4]) +cov_t = np.array([[1, -.8], [-.8, 1]]) + +np.random.seed(0) +xs = ot.datasets.make_2D_samples_gauss(n, mu_s, cov_s) +xt = ot.datasets.make_2D_samples_gauss(n, mu_t, cov_t) + +a, b = np.ones((n,)) / n, np.ones((n,)) / n # uniform distribution on samples + +# loss matrix +M = ot.dist(xs, xt) +M /= M.max() + +############################################################################## +# Plot data +# --------- + +#%% plot 2 distribution samples + +pl.figure(1) +pl.scatter(xs[:, 0], xs[:, 1], c='C0', label='Source') +pl.scatter(xt[:, 0], xt[:, 1], c='C1', label='Target') +pl.legend(loc=2) +pl.title('Source and target distributions') +pl.show() + +############################################################################## +# Compute semi-relaxed and fully relaxed regularization paths +# ----------- + +#%% +final_gamma = 1e-8 +t, t_list, g_list = ot.regpath.regularization_path(a, b, M, reg=final_gamma, + semi_relaxed=False) +t2, t_list2, g_list2 = ot.regpath.regularization_path(a, b, M, reg=final_gamma, + semi_relaxed=True) + + +############################################################################## +# Plot the regularization path +# ---------------- + +#%% fully relaxed l2-penalized UOT + +pl.figure(2) +selected_gamma = [2e-1, 1e-1, 5e-2, 1e-3] +for p in range(4): + tp = ot.regpath.compute_transport_plan(selected_gamma[p], g_list, + t_list) + P = tp.reshape((n, n)) + pl.subplot(2, 2, p + 1) + if P.sum() > 0: + P = P / P.max() + for i in range(n): + for j in range(n): + if P[i, j] > 0: + pl.plot([xs[i, 0], xt[j, 0]], [xs[i, 1], xt[j, 1]], color='C2', + alpha=P[i, j] * 0.3) + pl.scatter(xs[:, 0], xs[:, 1], c='C0', alpha=0.2) + pl.scatter(xt[:, 0], xt[:, 1], c='C1', alpha=0.2) + pl.scatter(xs[:, 0], xs[:, 1], c='C0', s=P.sum(1).ravel() * (1 + p) * 2, + label='Re-weighted source', alpha=1) + pl.scatter(xt[:, 0], xt[:, 1], c='C1', s=P.sum(0).ravel() * (1 + p) * 2, + label='Re-weighted target', alpha=1) + pl.plot([], [], color='C2', alpha=0.8, label='OT plan') + pl.title(r'$\ell_2$ UOT $\gamma$={}'.format(selected_gamma[p]), + fontsize=11) + if p < 2: + pl.xticks(()) +pl.show() + + +############################################################################## +# Plot the semi-relaxed regularization path +# ------------------- + +#%% semi-relaxed l2-penalized UOT + +pl.figure(3) +selected_gamma = [10, 1, 1e-1, 1e-2] +for p in range(4): + tp = ot.regpath.compute_transport_plan(selected_gamma[p], g_list2, + t_list2) + P = tp.reshape((n, n)) + pl.subplot(2, 2, p + 1) + if P.sum() > 0: + P = P / P.max() + for i in range(n): + for j in range(n): + if P[i, j] > 0: + pl.plot([xs[i, 0], xt[j, 0]], [xs[i, 1], xt[j, 1]], color='C2', + alpha=P[i, j] * 0.3) + pl.scatter(xs[:, 0], xs[:, 1], c='C0', alpha=0.2) + pl.scatter(xt[:, 0], xt[:, 1], c='C1', alpha=1, label='Target marginal') + pl.scatter(xs[:, 0], xs[:, 1], c='C0', s=P.sum(1).ravel() * 2 * (1 + p), + label='Source marginal', alpha=1) + pl.plot([], [], color='C2', alpha=0.8, label='OT plan') + pl.title(r'Semi-relaxed $l_2$ UOT $\gamma$={}'.format(selected_gamma[p]), + fontsize=11) + if p < 2: + pl.xticks(()) +pl.show() |