summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNicolas Courty <Nico@pc-mna-08.univ-ubs.fr>2017-08-28 15:04:04 +0200
committerNicolas Courty <Nico@pc-mna-08.univ-ubs.fr>2017-08-28 15:04:04 +0200
commit0a68bf4e83ee9092c3f3878115fea894922d7d56 (patch)
treee01c94d682e2b02d4ae900c6e7d6bcd408490e98
parent7ab9037f1e4a08439083d1bc5705be5ed2e9e10a (diff)
gromov:flake8 and other
-rw-r--r--examples/plot_gromov.py63
-rw-r--r--ot/gromov.py382
2 files changed, 216 insertions, 229 deletions
diff --git a/examples/plot_gromov.py b/examples/plot_gromov.py
index 11e5336..a33fde1 100644
--- a/examples/plot_gromov.py
+++ b/examples/plot_gromov.py
@@ -3,11 +3,8 @@
====================
Gromov-Wasserstein example
====================
-
-This example is designed to show how to use the Gromov-Wassertsein distance
-computation in POT.
-
-
+This example is designed to show how to use the Gromov-Wassertsein distance
+computation in POT.
"""
# Author: Erwan Vautier <erwan.vautier@gmail.com>
@@ -20,43 +17,38 @@ import numpy as np
import ot
import matplotlib.pylab as pl
-from mpl_toolkits.mplot3d import Axes3D
-
"""
Sample two Gaussian distributions (2D and 3D)
====================
-
-The Gromov-Wasserstein distance allows to compute distances with samples that do not belong to the same metric space. For
-demonstration purpose, we sample two Gaussian distributions in 2- and 3-dimensional spaces.
-
+The Gromov-Wasserstein distance allows to compute distances with samples that do not belong to the same metric space.
+For demonstration purpose, we sample two Gaussian distributions in 2- and 3-dimensional spaces.
"""
-n=30 # nb samples
-mu_s=np.array([0,0])
-cov_s=np.array([[1,0],[0,1]])
+n = 30 # nb samples
-mu_t=np.array([4,4,4])
-cov_t=np.array([[1,0,0],[0,1,0],[0,0,1]])
+mu_s = np.array([0, 0])
+cov_s = np.array([[1, 0], [0, 1]])
+mu_t = np.array([4, 4, 4])
+cov_t = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
-xs=ot.datasets.get_2D_samples_gauss(n,mu_s,cov_s)
-P=sp.linalg.sqrtm(cov_t)
-xt= np.random.randn(n,3).dot(P)+mu_t
-
+xs = ot.datasets.get_2D_samples_gauss(n, mu_s, cov_s)
+P = sp.linalg.sqrtm(cov_t)
+xt = np.random.randn(n, 3).dot(P) + mu_t
"""
Plotting the distributions
====================
"""
-fig=pl.figure()
-ax1=fig.add_subplot(121)
-ax1.plot(xs[:,0],xs[:,1],'+b',label='Source samples')
-ax2=fig.add_subplot(122,projection='3d')
-ax2.scatter(xt[:,0],xt[:,1],xt[:,2],color='r')
+fig = pl.figure()
+ax1 = fig.add_subplot(121)
+ax1.plot(xs[:, 0], xs[:, 1], '+b', label='Source samples')
+ax2 = fig.add_subplot(122, projection='3d')
+ax2.scatter(xt[:, 0], xt[:, 1], xt[:, 2], color='r')
pl.show()
@@ -65,11 +57,11 @@ Compute distance kernels, normalize them and then display
====================
"""
-C1=sp.spatial.distance.cdist(xs,xs)
-C2=sp.spatial.distance.cdist(xt,xt)
+C1 = sp.spatial.distance.cdist(xs, xs)
+C2 = sp.spatial.distance.cdist(xt, xt)
-C1/=C1.max()
-C2/=C2.max()
+C1 /= C1.max()
+C2 /= C2.max()
pl.figure()
pl.subplot(121)
@@ -83,16 +75,15 @@ Compute Gromov-Wasserstein plans and distance
====================
"""
-p=ot.unif(n)
-q=ot.unif(n)
+p = ot.unif(n)
+q = ot.unif(n)
-gw=ot.gromov_wasserstein(C1,C2,p,q,'square_loss',epsilon=5e-4)
-gw_dist=ot.gromov_wasserstein2(C1,C2,p,q,'square_loss',epsilon=5e-4)
+gw = ot.gromov_wasserstein(C1, C2, p, q, 'square_loss', epsilon=5e-4)
+gw_dist = ot.gromov_wasserstein2(C1, C2, p, q, 'square_loss', epsilon=5e-4)
-print('Gromov-Wasserstein distances between the distribution: '+str(gw_dist))
+print('Gromov-Wasserstein distances between the distribution: ' + str(gw_dist))
pl.figure()
-pl.imshow(gw,cmap='jet')
+pl.imshow(gw, cmap='jet')
pl.colorbar()
pl.show()
-
diff --git a/ot/gromov.py b/ot/gromov.py
index f3c62c9..7cf3b42 100644
--- a/ot/gromov.py
+++ b/ot/gromov.py
@@ -17,39 +17,41 @@ import numpy as np
from .bregman import sinkhorn
from .utils import dist
-def square_loss(a,b):
+
+def square_loss(a, b):
"""
Returns the value of L(a,b)=(1/2)*|a-b|^2
"""
-
- return (1/2)*(a-b)**2
-def kl_loss(a,b):
+ return (1 / 2) * (a - b)**2
+
+
+def kl_loss(a, b):
"""
Returns the value of L(a,b)=a*log(a/b)-a+b
"""
-
- return a*np.log(a/b)-a+b
-def tensor_square_loss(C1,C2,T):
+ return a * np.log(a / b) - a + b
+
+
+def tensor_square_loss(C1, C2, T):
"""
- Returns the value of \mathcal{L}(C1,C2) \otimes T with the square loss
-
+ Returns the value of \mathcal{L}(C1,C2) \otimes T with the square loss
function as the loss function of Gromow-Wasserstein discrepancy.
-
+
Where :
-
+
C1 : Metric cost matrix in the source space
C2 : Metric cost matrix in the target space
T : A coupling between those two spaces
-
+
The square-loss function L(a,b)=(1/2)*|a-b|^2 is read as :
L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with :
f1(a)=(a^2)/2
f2(b)=(b^2)/2
h1(a)=a
h2(b)=b
-
+
Parameters
----------
C1 : np.ndarray(ns,ns)
@@ -64,54 +66,50 @@ def tensor_square_loss(C1,C2,T):
-------
tens : (ns*nt) ndarray
\mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
-
-
+
+
"""
-
- C1=np.asarray(C1,dtype=np.float64)
- C2=np.asarray(C2,dtype=np.float64)
- T=np.asarray(T,dtype=np.float64)
-
-
+ C1 = np.asarray(C1, dtype=np.float64)
+ C2 = np.asarray(C2, dtype=np.float64)
+ T = np.asarray(T, dtype=np.float64)
+
def f1(a):
- return (a**2)/2
-
+ return (a**2) / 2
+
def f2(b):
- return (b**2)/2
-
+ return (b**2) / 2
+
def h1(a):
return a
-
+
def h2(b):
return b
-
- tens=-np.dot(h1(C1),T).dot(h2(C2).T)
- tens=tens-tens.min()
-
+ tens = -np.dot(h1(C1), T).dot(h2(C2).T)
+ tens = tens - tens.min()
+
return np.array(tens)
-
-
-def tensor_kl_loss(C1,C2,T):
+
+
+def tensor_kl_loss(C1, C2, T):
"""
- Returns the value of \mathcal{L}(C1,C2) \otimes T with the square loss
-
+ Returns the value of \mathcal{L}(C1,C2) \otimes T with the square loss
function as the loss function of Gromow-Wasserstein discrepancy.
-
+
Where :
-
+
C1 : Metric cost matrix in the source space
C2 : Metric cost matrix in the target space
T : A coupling between those two spaces
-
+
The square-loss function L(a,b)=(1/2)*|a-b|^2 is read as :
L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with :
f1(a)=a*log(a)-a
f2(b)=b
h1(a)=a
h2(b)=log(b)
-
+
Parameters
----------
C1 : np.ndarray(ns,ns)
@@ -126,44 +124,41 @@ def tensor_kl_loss(C1,C2,T):
-------
tens : (ns*nt) ndarray
\mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
-
+
References
----------
-
+
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon, "Gromov-Wasserstein averaging of kernel and distance matrices." International Conference on Machine Learning (ICML). 2016.
-
+
"""
-
- C1=np.asarray(C1,dtype=np.float64)
- C2=np.asarray(C2,dtype=np.float64)
- T=np.asarray(T,dtype=np.float64)
-
-
+ C1 = np.asarray(C1, dtype=np.float64)
+ C2 = np.asarray(C2, dtype=np.float64)
+ T = np.asarray(T, dtype=np.float64)
+
def f1(a):
- return a*np.log(a+1e-15)-a
-
+ return a * np.log(a + 1e-15) - a
+
def f2(b):
return b
-
+
def h1(a):
return a
-
+
def h2(b):
- return np.log(b+1e-15)
-
- tens=-np.dot(h1(C1),T).dot(h2(C2).T)
- tens=tens-tens.min()
+ return np.log(b + 1e-15)
+
+ tens = -np.dot(h1(C1), T).dot(h2(C2).T)
+ tens = tens - tens.min()
-
-
return np.array(tens)
-def update_square_loss(p,lambdas,T,Cs):
+
+def update_square_loss(p, lambdas, T, Cs):
"""
Updates C according to the L2 Loss kernel with the S Ts couplings calculated at each iteration
-
-
+
+
Parameters
----------
p : np.ndarray(N,)
@@ -173,23 +168,25 @@ def update_square_loss(p,lambdas,T,Cs):
the S Ts couplings calculated at each iteration
Cs : Cs : list of S np.ndarray(ns,ns)
Metric cost matrices
-
- Returns
+
+ Returns
----------
C updated
-
-
+
+
"""
- tmpsum=np.sum([lambdas[s]*np.dot(T[s].T,Cs[s]).dot(T[s]) for s in range(len(T))])
- ppt=np.dot(p,p.T)
-
- return(np.divide(tmpsum,ppt))
+ tmpsum = np.sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s])
+ for s in range(len(T))])
+ ppt = np.dot(p, p.T)
-def update_kl_loss(p,lambdas,T,Cs):
+ return(np.divide(tmpsum, ppt))
+
+
+def update_kl_loss(p, lambdas, T, Cs):
"""
Updates C according to the KL Loss kernel with the S Ts couplings calculated at each iteration
-
-
+
+
Parameters
----------
p : np.ndarray(N,)
@@ -199,25 +196,26 @@ def update_kl_loss(p,lambdas,T,Cs):
the S Ts couplings calculated at each iteration
Cs : Cs : list of S np.ndarray(ns,ns)
Metric cost matrices
-
- Returns
+
+ Returns
----------
C updated
-
-
+
+
"""
- tmpsum=np.sum([lambdas[s]*np.dot(T[s].T,Cs[s]).dot(T[s]) for s in range(len(T))])
- ppt=np.dot(p,p.T)
-
- return(np.exp(np.divide(tmpsum,ppt)))
+ tmpsum = np.sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s])
+ for s in range(len(T))])
+ ppt = np.dot(p, p.T)
+
+ return(np.exp(np.divide(tmpsum, ppt)))
-def gromov_wasserstein(C1,C2,p,q,loss_fun,epsilon,numItermax = 1000, stopThr=1e-9,verbose=False, log=False):
+def gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon, numItermax=1000, stopThr=1e-9, verbose=False, log=False):
"""
Returns the gromov-wasserstein coupling between the two measured similarity matrices
-
+
(C1,p) and (C2,q)
-
+
The function solves the following optimization problem:
.. math::
@@ -228,17 +226,17 @@ def gromov_wasserstein(C1,C2,p,q,loss_fun,epsilon,numItermax = 1000, stopThr=1e-
\GW^T 1= q
\GW\geq 0
-
+
Where :
-
+
C1 : Metric cost matrix in the source space
C2 : Metric cost matrix in the target space
p : distribution in the source space
- q : distribution in the target space
+ q : distribution in the target space
L : loss function to account for the misfit between the similarity matrices
H : entropy
-
-
+
+
Parameters
----------
C1 : np.ndarray(ns,ns)
@@ -259,80 +257,80 @@ def gromov_wasserstein(C1,C2,p,q,loss_fun,epsilon,numItermax = 1000, stopThr=1e-
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
+ record log if True
forcing : np.ndarray(N,2)
list of forced couplings (where N is the number of forcing)
-
+
Returns
-------
T : coupling between the two spaces that minimizes :
\sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
-
+
"""
-
- C1=np.asarray(C1,dtype=np.float64)
- C2=np.asarray(C2,dtype=np.float64)
+ C1 = np.asarray(C1, dtype=np.float64)
+ C2 = np.asarray(C2, dtype=np.float64)
+
+ T = np.dot(p, q.T) # Initialization
- T=np.dot(p,q.T) #Initialization
-
cpt = 0
- err=1
-
- while (err>stopThr and cpt<numItermax):
-
- Tprev=T
-
- if loss_fun=='square_loss':
- tens=tensor_square_loss(C1,C2,T)
-
- elif loss_fun=='kl_loss':
- tens=tensor_kl_loss(C1,C2,T)
-
- T=sinkhorn(p,q,tens,epsilon)
-
- if cpt%10==0:
+ err = 1
+
+ while (err > stopThr and cpt < numItermax):
+
+ Tprev = T
+
+ if loss_fun == 'square_loss':
+ tens = tensor_square_loss(C1, C2, T)
+
+ elif loss_fun == 'kl_loss':
+ tens = tensor_kl_loss(C1, C2, T)
+
+ T = sinkhorn(p, q, tens, epsilon)
+
+ if cpt % 10 == 0:
# we can speed up the process by checking for the error only all the 10th iterations
- err=np.linalg.norm(T-Tprev)
-
+ err = np.linalg.norm(T - Tprev)
+
if log:
log['err'].append(err)
if verbose:
- if cpt%200 ==0:
- print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19)
- print('{:5d}|{:8e}|'.format(cpt,err))
-
- cpt=cpt+1
+ if cpt % 200 == 0:
+ print('{:5s}|{:12s}'.format(
+ 'It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
+
+ cpt = cpt + 1
if log:
- return T,log
+ return T, log
else:
return T
-def gromov_wasserstein2(C1,C2,p,q,loss_fun,epsilon,numItermax = 1000, stopThr=1e-9,verbose=False, log=False):
+def gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon, numItermax=1000, stopThr=1e-9, verbose=False, log=False):
"""
Returns the gromov-wasserstein discrepancy between the two measured similarity matrices
-
+
(C1,p) and (C2,q)
-
+
The function solves the following optimization problem:
.. math::
\GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
-
+
Where :
-
+
C1 : Metric cost matrix in the source space
C2 : Metric cost matrix in the target space
p : distribution in the source space
- q : distribution in the target space
+ q : distribution in the target space
L : loss function to account for the misfit between the similarity matrices
H : entropy
-
-
+
+
Parameters
----------
C1 : np.ndarray(ns,ns)
@@ -353,55 +351,56 @@ def gromov_wasserstein2(C1,C2,p,q,loss_fun,epsilon,numItermax = 1000, stopThr=1e
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
+ record log if True
forcing : np.ndarray(N,2)
list of forced couplings (where N is the number of forcing)
-
+
Returns
-------
T : coupling between the two spaces that minimizes :
\sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
-
+
"""
-
+
if log:
- gw,logv=gromov_wasserstein(C1,C2,p,q,loss_fun,epsilon,numItermax,stopThr,verbose,log)
+ gw, logv = gromov_wasserstein(
+ C1, C2, p, q, loss_fun, epsilon, numItermax, stopThr, verbose, log)
else:
- gw=gromov_wasserstein(C1,C2,p,q,loss_fun,epsilon,numItermax,stopThr,verbose,log)
+ gw = gromov_wasserstein(C1, C2, p, q, loss_fun,
+ epsilon, numItermax, stopThr, verbose, log)
+
+ if loss_fun == 'square_loss':
+ gw_dist = np.sum(gw * tensor_square_loss(C1, C2, gw))
- if loss_fun=='square_loss':
- gw_dist=np.sum(gw*tensor_square_loss(C1,C2,gw))
-
-
- elif loss_fun=='kl_loss':
- gw_dist=np.sum(gw*tensor_kl_loss(C1,C2,gw))
+ elif loss_fun == 'kl_loss':
+ gw_dist = np.sum(gw * tensor_kl_loss(C1, C2, gw))
if log:
return gw_dist, logv
- else:
+ else:
return gw_dist
-def gromov_barycenters(N,Cs,ps,p,lambdas,loss_fun,epsilon,numItermax = 1000, stopThr=1e-9, verbose=False, log=False):
+def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon, numItermax=1000, stopThr=1e-9, verbose=False, log=False):
"""
Returns the gromov-wasserstein barycenters of S measured similarity matrices
-
+
(Cs)_{s=1}^{s=S}
-
+
The function solves the following optimization problem:
.. math::
C = argmin_C\in R^NxN \sum_s \lambda_s GW(C,Cs,p,ps)
-
+
Where :
-
+
Cs : metric cost matrix
ps : distribution
-
+
Parameters
----------
- N : Integer
+ N : Integer
Size of the targeted barycenter
Cs : list of S np.ndarray(ns,ns)
Metric cost matrices
@@ -422,61 +421,58 @@ def gromov_barycenters(N,Cs,ps,p,lambdas,loss_fun,epsilon,numItermax = 1000, sto
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
-
+ record log if True
+
Returns
-------
C : Similarity matrix in the barycenter space (permutated arbitrarily)
-
+
"""
-
-
- S=len(Cs)
-
- Cs=[np.asarray(Cs[s],dtype=np.float64) for s in range(S)]
- lambdas=np.asarray(lambdas,dtype=np.float64)
-
- T=[0 for s in range(S)]
-
- #Initialization of C : random SPD matrix
- xalea=np.random.randn(N,2)
- C=dist(xalea,xalea)
- C/=C.max()
-
- cpt=0
- err=1
-
- error=[]
-
- while(err>stopThr and cpt<numItermax):
-
- Cprev=C
-
- T=[gromov_wasserstein(Cs[s],C,ps[s],p,loss_fun,epsilon,numItermax,1e-5,verbose,log) for s in range(S)]
-
- if loss_fun=='square_loss':
- C=update_square_loss(p,lambdas,T,Cs)
-
- elif loss_fun=='kl_loss':
- C=update_kl_loss(p,lambdas,T,Cs)
-
- if cpt%10==0:
+
+ S = len(Cs)
+
+ Cs = [np.asarray(Cs[s], dtype=np.float64) for s in range(S)]
+ lambdas = np.asarray(lambdas, dtype=np.float64)
+
+ T = [0 for s in range(S)]
+
+ # Initialization of C : random SPD matrix
+ xalea = np.random.randn(N, 2)
+ C = dist(xalea, xalea)
+ C /= C.max()
+
+ cpt = 0
+ err = 1
+
+ error = []
+
+ while(err > stopThr and cpt < numItermax):
+
+ Cprev = C
+
+ T = [gromov_wasserstein(Cs[s], C, ps[s], p, loss_fun, epsilon,
+ numItermax, 1e-5, verbose, log) for s in range(S)]
+
+ if loss_fun == 'square_loss':
+ C = update_square_loss(p, lambdas, T, Cs)
+
+ elif loss_fun == 'kl_loss':
+ C = update_kl_loss(p, lambdas, T, Cs)
+
+ if cpt % 10 == 0:
# we can speed up the process by checking for the error only all the 10th iterations
- err=np.linalg.norm(C-Cprev)
+ err = np.linalg.norm(C - Cprev)
error.append(err)
-
+
if log:
log['err'].append(err)
if verbose:
- if cpt%200 ==0:
- print('{:5s}|{:12s}'.format('It.','Err')+'\n'+'-'*19)
- print('{:5d}|{:8e}|'.format(cpt,err))
-
- cpt=cpt+1
-
- return C
-
+ if cpt % 200 == 0:
+ print('{:5s}|{:12s}'.format(
+ 'It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
+ cpt = cpt + 1
- \ No newline at end of file
+ return C