From 7ab9037f1e4a08439083d1bc5705be5ed2e9e10a Mon Sep 17 00:00:00 2001 From: Nicolas Courty Date: Mon, 28 Aug 2017 14:41:09 +0200 Subject: Gromov-Wasserstein distance --- README.md | 4 +- examples/plot_gromov.py | 98 ++++++++++ ot/__init__.py | 6 +- ot/gromov.py | 482 ++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 588 insertions(+), 2 deletions(-) create mode 100644 examples/plot_gromov.py create mode 100644 ot/gromov.py diff --git a/README.md b/README.md index 7a65106..a42f17b 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ It provides the following solvers: * Conditional gradient [6] and Generalized conditional gradient for regularized OT [7]. * Joint OT matrix and mapping estimation [8]. * Wasserstein Discriminant Analysis [11] (requires autograd + pymanopt). - +* Gromov-Wasserstein distances [12] Some demonstrations (both in Python and Jupyter Notebook format) are available in the examples folder. @@ -182,3 +182,5 @@ You can also post bug reports and feature requests in Github issues. Make sure t [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). [Scaling algorithms for unbalanced transport problems](https://arxiv.org/pdf/1607.05816.pdf). arXiv preprint arXiv:1607.05816. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016). [Wasserstein Discriminant Analysis](https://arxiv.org/pdf/1608.08063.pdf). arXiv preprint arXiv:1608.08063. + +[12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon, [Gromov-Wasserstein averaging of kernel and distance matrices](http://proceedings.mlr.press/v48/peyre16.html) International Conference on Machine Learning (ICML). 2016. diff --git a/examples/plot_gromov.py b/examples/plot_gromov.py new file mode 100644 index 0000000..11e5336 --- /dev/null +++ b/examples/plot_gromov.py @@ -0,0 +1,98 @@ +# -*- coding: utf-8 -*- +""" +==================== +Gromov-Wasserstein example +==================== + +This example is designed to show how to use the Gromov-Wassertsein distance +computation in POT. + + +""" + +# Author: Erwan Vautier +# Nicolas Courty +# +# License: MIT License + +import scipy as sp +import numpy as np + +import ot +import matplotlib.pylab as pl +from mpl_toolkits.mplot3d import Axes3D + + + +""" +Sample two Gaussian distributions (2D and 3D) +==================== + +The Gromov-Wasserstein distance allows to compute distances with samples that do not belong to the same metric space. For +demonstration purpose, we sample two Gaussian distributions in 2- and 3-dimensional spaces. + +""" +n=30 # nb samples + +mu_s=np.array([0,0]) +cov_s=np.array([[1,0],[0,1]]) + +mu_t=np.array([4,4,4]) +cov_t=np.array([[1,0,0],[0,1,0],[0,0,1]]) + + + +xs=ot.datasets.get_2D_samples_gauss(n,mu_s,cov_s) +P=sp.linalg.sqrtm(cov_t) +xt= np.random.randn(n,3).dot(P)+mu_t + + + +""" +Plotting the distributions +==================== +""" +fig=pl.figure() +ax1=fig.add_subplot(121) +ax1.plot(xs[:,0],xs[:,1],'+b',label='Source samples') +ax2=fig.add_subplot(122,projection='3d') +ax2.scatter(xt[:,0],xt[:,1],xt[:,2],color='r') +pl.show() + + +""" +Compute distance kernels, normalize them and then display +==================== +""" + +C1=sp.spatial.distance.cdist(xs,xs) +C2=sp.spatial.distance.cdist(xt,xt) + +C1/=C1.max() +C2/=C2.max() + +pl.figure() +pl.subplot(121) +pl.imshow(C1) +pl.subplot(122) +pl.imshow(C2) +pl.show() + +""" +Compute Gromov-Wasserstein plans and distance +==================== +""" + +p=ot.unif(n) +q=ot.unif(n) + +gw=ot.gromov_wasserstein(C1,C2,p,q,'square_loss',epsilon=5e-4) +gw_dist=ot.gromov_wasserstein2(C1,C2,p,q,'square_loss',epsilon=5e-4) + +print('Gromov-Wasserstein distances between the distribution: '+str(gw_dist)) + +pl.figure() +pl.imshow(gw,cmap='jet') +pl.colorbar() +pl.show() + diff --git a/ot/__init__.py b/ot/__init__.py index 6d4c4c6..a295e1b 100644 --- a/ot/__init__.py +++ b/ot/__init__.py @@ -5,6 +5,7 @@ """ # Author: Remi Flamary +# Nicolas Courty # # License: MIT License @@ -17,11 +18,13 @@ from . import utils from . import datasets from . import plot from . import da +from . import gromov # OT functions from .lp import emd, emd2 from .bregman import sinkhorn, sinkhorn2, barycenter from .da import sinkhorn_lpl1_mm +from .gromov import gromov_wasserstein, gromov_wasserstein2 # utils functions from .utils import dist, unif, tic, toc, toq @@ -30,4 +33,5 @@ __version__ = "0.3.1" __all__ = ["emd", "emd2", "sinkhorn", "sinkhorn2", "utils", 'datasets', 'bregman', 'lp', 'plot', 'tic', 'toc', 'toq', - 'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim'] + 'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim', + 'gromov_wasserstein','gromov_wasserstein2'] diff --git a/ot/gromov.py b/ot/gromov.py new file mode 100644 index 0000000..f3c62c9 --- /dev/null +++ b/ot/gromov.py @@ -0,0 +1,482 @@ + +# -*- coding: utf-8 -*- +""" +Gromov-Wasserstein transport method +=================================== + + +""" + +# Author: Erwan Vautier +# Nicolas Courty +# +# License: MIT License + +import numpy as np + +from .bregman import sinkhorn +from .utils import dist + +def square_loss(a,b): + """ + Returns the value of L(a,b)=(1/2)*|a-b|^2 + """ + + return (1/2)*(a-b)**2 + +def kl_loss(a,b): + """ + Returns the value of L(a,b)=a*log(a/b)-a+b + """ + + return a*np.log(a/b)-a+b + +def tensor_square_loss(C1,C2,T): + """ + Returns the value of \mathcal{L}(C1,C2) \otimes T with the square loss + + function as the loss function of Gromow-Wasserstein discrepancy. + + Where : + + C1 : Metric cost matrix in the source space + C2 : Metric cost matrix in the target space + T : A coupling between those two spaces + + The square-loss function L(a,b)=(1/2)*|a-b|^2 is read as : + L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with : + f1(a)=(a^2)/2 + f2(b)=(b^2)/2 + h1(a)=a + h2(b)=b + + Parameters + ---------- + C1 : np.ndarray(ns,ns) + Metric cost matrix in the source space + C2 : np.ndarray(nt,nt) + Metric costfr matrix in the target space + T : np.ndarray(ns,nt) + Coupling between source and target spaces + + + Returns + ------- + tens : (ns*nt) ndarray + \mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result + + + """ + + + C1=np.asarray(C1,dtype=np.float64) + C2=np.asarray(C2,dtype=np.float64) + T=np.asarray(T,dtype=np.float64) + + + def f1(a): + return (a**2)/2 + + def f2(b): + return (b**2)/2 + + def h1(a): + return a + + def h2(b): + return b + + tens=-np.dot(h1(C1),T).dot(h2(C2).T) + tens=tens-tens.min() + + + return np.array(tens) + + +def tensor_kl_loss(C1,C2,T): + """ + Returns the value of \mathcal{L}(C1,C2) \otimes T with the square loss + + function as the loss function of Gromow-Wasserstein discrepancy. + + Where : + + C1 : Metric cost matrix in the source space + C2 : Metric cost matrix in the target space + T : A coupling between those two spaces + + The square-loss function L(a,b)=(1/2)*|a-b|^2 is read as : + L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with : + f1(a)=a*log(a)-a + f2(b)=b + h1(a)=a + h2(b)=log(b) + + Parameters + ---------- + C1 : np.ndarray(ns,ns) + Metric cost matrix in the source space + C2 : np.ndarray(nt,nt) + Metric costfr matrix in the target space + T : np.ndarray(ns,nt) + Coupling between source and target spaces + + + Returns + ------- + tens : (ns*nt) ndarray + \mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result + + References + ---------- + + .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon, "Gromov-Wasserstein averaging of kernel and distance matrices." International Conference on Machine Learning (ICML). 2016. + + """ + + + C1=np.asarray(C1,dtype=np.float64) + C2=np.asarray(C2,dtype=np.float64) + T=np.asarray(T,dtype=np.float64) + + + def f1(a): + return a*np.log(a+1e-15)-a + + def f2(b): + return b + + def h1(a): + return a + + def h2(b): + return np.log(b+1e-15) + + tens=-np.dot(h1(C1),T).dot(h2(C2).T) + tens=tens-tens.min() + + + + return np.array(tens) + +def update_square_loss(p,lambdas,T,Cs): + """ + Updates C according to the L2 Loss kernel with the S Ts couplings calculated at each iteration + + + Parameters + ---------- + p : np.ndarray(N,) + weights in the targeted barycenter + lambdas : list of the S spaces' weights + T : list of S np.ndarray(ns,N) + the S Ts couplings calculated at each iteration + Cs : Cs : list of S np.ndarray(ns,ns) + Metric cost matrices + + Returns + ---------- + C updated + + + """ + tmpsum=np.sum([lambdas[s]*np.dot(T[s].T,Cs[s]).dot(T[s]) for s in range(len(T))]) + ppt=np.dot(p,p.T) + + return(np.divide(tmpsum,ppt)) + +def update_kl_loss(p,lambdas,T,Cs): + """ + Updates C according to the KL Loss kernel with the S Ts couplings calculated at each iteration + + + Parameters + ---------- + p : np.ndarray(N,) + weights in the targeted barycenter + lambdas : list of the S spaces' weights + T : list of S np.ndarray(ns,N) + the S Ts couplings calculated at each iteration + Cs : Cs : list of S np.ndarray(ns,ns) + Metric cost matrices + + Returns + ---------- + C updated + + + """ + tmpsum=np.sum([lambdas[s]*np.dot(T[s].T,Cs[s]).dot(T[s]) for s in range(len(T))]) + ppt=np.dot(p,p.T) + + return(np.exp(np.divide(tmpsum,ppt))) + + +def gromov_wasserstein(C1,C2,p,q,loss_fun,epsilon,numItermax = 1000, stopThr=1e-9,verbose=False, log=False): + """ + Returns the gromov-wasserstein coupling between the two measured similarity matrices + + (C1,p) and (C2,q) + + The function solves the following optimization problem: + + .. math:: + \GW = arg\min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T)) + + s.t. \GW 1 = p + + \GW^T 1= q + + \GW\geq 0 + + Where : + + C1 : Metric cost matrix in the source space + C2 : Metric cost matrix in the target space + p : distribution in the source space + q : distribution in the target space + L : loss function to account for the misfit between the similarity matrices + H : entropy + + + Parameters + ---------- + C1 : np.ndarray(ns,ns) + Metric cost matrix in the source space + C2 : np.ndarray(nt,nt) + Metric costfr matrix in the target space + p : np.ndarray(ns,) + distribution in the source space + q : np.ndarray(nt) + distribution in the target space + loss_fun : loss function used for the solver either 'square_loss' or 'kl_loss' + epsilon : float + Regularization term >0 + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshold on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + forcing : np.ndarray(N,2) + list of forced couplings (where N is the number of forcing) + + Returns + ------- + T : coupling between the two spaces that minimizes : + \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T)) + + """ + + + C1=np.asarray(C1,dtype=np.float64) + C2=np.asarray(C2,dtype=np.float64) + + T=np.dot(p,q.T) #Initialization + + cpt = 0 + err=1 + + while (err>stopThr and cpt0 + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshold on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + forcing : np.ndarray(N,2) + list of forced couplings (where N is the number of forcing) + + Returns + ------- + T : coupling between the two spaces that minimizes : + \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T)) + + """ + + if log: + gw,logv=gromov_wasserstein(C1,C2,p,q,loss_fun,epsilon,numItermax,stopThr,verbose,log) + else: + gw=gromov_wasserstein(C1,C2,p,q,loss_fun,epsilon,numItermax,stopThr,verbose,log) + + if loss_fun=='square_loss': + gw_dist=np.sum(gw*tensor_square_loss(C1,C2,gw)) + + + elif loss_fun=='kl_loss': + gw_dist=np.sum(gw*tensor_kl_loss(C1,C2,gw)) + + if log: + return gw_dist, logv + else: + return gw_dist + + +def gromov_barycenters(N,Cs,ps,p,lambdas,loss_fun,epsilon,numItermax = 1000, stopThr=1e-9, verbose=False, log=False): + """ + Returns the gromov-wasserstein barycenters of S measured similarity matrices + + (Cs)_{s=1}^{s=S} + + The function solves the following optimization problem: + + .. math:: + C = argmin_C\in R^NxN \sum_s \lambda_s GW(C,Cs,p,ps) + + + Where : + + Cs : metric cost matrix + ps : distribution + + Parameters + ---------- + N : Integer + Size of the targeted barycenter + Cs : list of S np.ndarray(ns,ns) + Metric cost matrices + ps : list of S np.ndarray(ns,) + sample weights in the S spaces + p : np.ndarray(N,) + weights in the targeted barycenter + lambdas : list of the S spaces' weights + L : tensor-matrix multiplication function based on specific loss function + update : function(p,lambdas,T,Cs) that updates C according to a specific Kernel + with the S Ts couplings calculated at each iteration + epsilon : float + Regularization term >0 + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + Returns + ------- + C : Similarity matrix in the barycenter space (permutated arbitrarily) + + """ + + + S=len(Cs) + + Cs=[np.asarray(Cs[s],dtype=np.float64) for s in range(S)] + lambdas=np.asarray(lambdas,dtype=np.float64) + + T=[0 for s in range(S)] + + #Initialization of C : random SPD matrix + xalea=np.random.randn(N,2) + C=dist(xalea,xalea) + C/=C.max() + + cpt=0 + err=1 + + error=[] + + while(err>stopThr and cpt