From 549b95b5736b42f3fe74daf9805303a08b1ae01d Mon Sep 17 00:00:00 2001 From: tvayer Date: Tue, 28 May 2019 16:08:41 +0200 Subject: FGW+gromov changes --- examples/plot_fgw.py | 152 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 152 insertions(+) create mode 100644 examples/plot_fgw.py (limited to 'examples') diff --git a/examples/plot_fgw.py b/examples/plot_fgw.py new file mode 100644 index 0000000..5c2d0e1 --- /dev/null +++ b/examples/plot_fgw.py @@ -0,0 +1,152 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +""" +============================== +Plot Fused-gromov-Wasserstein +============================== + +This example illustrates the computation of FGW for 1D measures[18]. + +.. [18] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain + and Courty Nicolas + "Optimal Transport for structured data with application on graphs" + International Conference on Machine Learning (ICML). 2019. + +""" + +# Author: Titouan Vayer +# +# License: MIT License + +import matplotlib.pyplot as pl +import numpy as np +import ot +from ot.gromov import gromov_wasserstein,fused_gromov_wasserstein + +#%% parameters +# We create two 1D random measures +n=20 +n2=30 +sig=1 +sig2=0.1 + +np.random.seed(0) + +phi=np.arange(n)[:,None] +xs=phi+sig*np.random.randn(n,1) +ys=np.vstack((np.ones((n//2,1)),0*np.ones((n//2,1))))+sig2*np.random.randn(n,1) + +phi2=np.arange(n2)[:,None] +xt=phi2+sig*np.random.randn(n2,1) +yt=np.vstack((np.ones((n2//2,1)),0*np.ones((n2//2,1))))+sig2*np.random.randn(n2,1) +yt= yt[::-1,:] + +p=ot.unif(n) +q=ot.unif(n2) + +#%% plot the distributions + +pl.close(10) +pl.figure(10,(7,7)) + +pl.subplot(2,1,1) + +pl.scatter(ys,xs,c=phi,s=70) +pl.ylabel('Feature value a',fontsize=20) +pl.title('$\mu=\sum_i \delta_{x_i,a_i}$',fontsize=25, usetex=True, y=1) +pl.xticks(()) +pl.yticks(()) +pl.subplot(2,1,2) +pl.scatter(yt,xt,c=phi2,s=70) +pl.xlabel('coordinates x/y',fontsize=25) +pl.ylabel('Feature value b',fontsize=20) +pl.title('$\\nu=\sum_j \delta_{y_j,b_j}$',fontsize=25, usetex=True, y=1) +pl.yticks(()) +pl.tight_layout() +pl.show() + + +#%% Structure matrices and across-features distance matrix +C1=ot.dist(xs) +C2=ot.dist(xt).T +M=ot.dist(ys,yt) +w1=ot.unif(C1.shape[0]) +w2=ot.unif(C2.shape[0]) +Got=ot.emd([],[],M) + +#%% +cmap='Reds' +pl.close(10) +pl.figure(10,(5,5)) +fs=15 +l_x=[0,5,10,15] +l_y=[0,5,10,15,20,25] +gs = pl.GridSpec(5, 5) + +ax1=pl.subplot(gs[3:,:2]) + +pl.imshow(C1,cmap=cmap,interpolation='nearest') +pl.title("$C_1$",fontsize=fs) +pl.xlabel("$k$",fontsize=fs) +pl.ylabel("$i$",fontsize=fs) +pl.xticks(l_x) +pl.yticks(l_x) + +ax2=pl.subplot(gs[:3,2:]) + +pl.imshow(C2,cmap=cmap,interpolation='nearest') +pl.title("$C_2$",fontsize=fs) +pl.ylabel("$l$",fontsize=fs) +#pl.ylabel("$l$",fontsize=fs) +pl.xticks(()) +pl.yticks(l_y) +ax2.set_aspect('auto') + +ax3=pl.subplot(gs[3:,2:],sharex=ax2,sharey=ax1) +pl.imshow(M,cmap=cmap,interpolation='nearest') +pl.yticks(l_x) +pl.xticks(l_y) +pl.ylabel("$i$",fontsize=fs) +pl.title("$M_{AB}$",fontsize=fs) +pl.xlabel("$j$",fontsize=fs) +pl.tight_layout() +ax3.set_aspect('auto') +pl.show() + + +#%% Computing FGW and GW +alpha=1e-3 + +ot.tic() +Gwg,logw=fused_gromov_wasserstein(M,C1,C2,p,q,loss_fun='square_loss',alpha=alpha,verbose=True,log=True) +ot.toc() + +#%reload_ext WGW +Gg,log=gromov_wasserstein(C1,C2,p,q,loss_fun='square_loss',verbose=True,log=True) + +#%% visu OT matrix +cmap='Blues' +fs=15 +pl.figure(2,(13,5)) +pl.clf() +pl.subplot(1,3,1) +pl.imshow(Got,cmap=cmap,interpolation='nearest') +#pl.xlabel("$y$",fontsize=fs) +pl.ylabel("$i$",fontsize=fs) +pl.xticks(()) + +pl.title('Wasserstein ($M$ only)') + +pl.subplot(1,3,2) +pl.imshow(Gg,cmap=cmap,interpolation='nearest') +pl.title('Gromov ($C_1,C_2$ only)') +pl.xticks(()) +pl.subplot(1,3,3) +pl.imshow(Gwg,cmap=cmap,interpolation='nearest') +pl.title('FGW ($M+C_1,C_2$)') + +pl.xlabel("$j$",fontsize=fs) +pl.ylabel("$i$",fontsize=fs) + +pl.tight_layout() +pl.show() \ No newline at end of file -- cgit v1.2.3 From b1b514f5d9de009e63bd407dfd9c0a0cf6128876 Mon Sep 17 00:00:00 2001 From: tvayer Date: Tue, 28 May 2019 16:50:00 +0200 Subject: bary fgw --- examples/plot_barycenter_fgw.py | 172 ++++++++++++++++++++++++++++++++++++++++ examples/plot_fgw.py | 1 - ot/gromov.py | 15 ++-- 3 files changed, 180 insertions(+), 8 deletions(-) create mode 100644 examples/plot_barycenter_fgw.py (limited to 'examples') diff --git a/examples/plot_barycenter_fgw.py b/examples/plot_barycenter_fgw.py new file mode 100644 index 0000000..f416629 --- /dev/null +++ b/examples/plot_barycenter_fgw.py @@ -0,0 +1,172 @@ +# -*- coding: utf-8 -*- +""" +================================= +Plot graphs' barycenter using FGW +================================= + +This example illustrates the computation barycenter of labeled graphs using FGW + +Requires networkx >=2 + +.. [18] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain + and Courty Nicolas + "Optimal Transport for structured data with application on graphs" + International Conference on Machine Learning (ICML). 2019. + +""" + +# Author: Titouan Vayer +# +# License: MIT License + +#%% load libraries +import numpy as np +import matplotlib.pyplot as plt +import networkx as nx +import math +from scipy.sparse.csgraph import shortest_path +import matplotlib.colors as mcol +from matplotlib import cm +from ot.gromov import fgw_barycenters +#%% Graph functions + +def find_thresh(C,inf=0.5,sup=3,step=10): + """ Trick to find the adequate thresholds from where value of the C matrix are considered close enough to say that nodes are connected + Tthe threshold is found by a linesearch between values "inf" and "sup" with "step" thresholds tested. + The optimal threshold is the one which minimizes the reconstruction error between the shortest_path matrix coming from the thresholded adjency matrix + and the original matrix. + Parameters + ---------- + C : ndarray, shape (n_nodes,n_nodes) + The structure matrix to threshold + inf : float + The beginning of the linesearch + sup : float + The end of the linesearch + step : integer + Number of thresholds tested + """ + dist=[] + search=np.linspace(inf,sup,step) + for thresh in search: + Cprime=sp_to_adjency(C,0,thresh) + SC=shortest_path(Cprime,method='D') + SC[SC==float('inf')]=100 + dist.append(np.linalg.norm(SC-C)) + return search[np.argmin(dist)],dist + +def sp_to_adjency(C,threshinf=0.2,threshsup=1.8): + """ Thresholds the structure matrix in order to compute an adjency matrix. + All values between threshinf and threshsup are considered representing connected nodes and set to 1. Else are set to 0 + Parameters + ---------- + C : ndarray, shape (n_nodes,n_nodes) + The structure matrix to threshold + threshinf : float + The minimum value of distance from which the new value is set to 1 + threshsup : float + The maximum value of distance from which the new value is set to 1 + Returns + ------- + C : ndarray, shape (n_nodes,n_nodes) + The threshold matrix. Each element is in {0,1} + """ + H=np.zeros_like(C) + np.fill_diagonal(H,np.diagonal(C)) + C=C-H + C=np.minimum(np.maximum(C,threshinf),threshsup) + C[C==threshsup]=0 + C[C!=0]=1 + + return C + +def build_noisy_circular_graph(N=20,mu=0,sigma=0.3,with_noise=False,structure_noise=False,p=None): + """ Create a noisy circular graph + """ + g=nx.Graph() + g.add_nodes_from(list(range(N))) + for i in range(N): + noise=float(np.random.normal(mu,sigma,1)) + if with_noise: + g.add_node(i,attr_name=math.sin((2*i*math.pi/N))+noise) + else: + g.add_node(i,attr_name=math.sin(2*i*math.pi/N)) + g.add_edge(i,i+1) + if structure_noise: + randomint=np.random.randint(0,p) + if randomint==0: + if i<=N-3: + g.add_edge(i,i+2) + if i==N-2: + g.add_edge(i,0) + if i==N-1: + g.add_edge(i,1) + g.add_edge(N,0) + noise=float(np.random.normal(mu,sigma,1)) + if with_noise: + g.add_node(N,attr_name=math.sin((2*N*math.pi/N))+noise) + else: + g.add_node(N,attr_name=math.sin(2*N*math.pi/N)) + return g + +def graph_colors(nx_graph,vmin=0,vmax=7): + cnorm = mcol.Normalize(vmin=vmin,vmax=vmax) + cpick = cm.ScalarMappable(norm=cnorm,cmap='viridis') + cpick.set_array([]) + val_map = {} + for k,v in nx.get_node_attributes(nx_graph,'attr_name').items(): + val_map[k]=cpick.to_rgba(v) + colors=[] + for node in nx_graph.nodes(): + colors.append(val_map[node]) + return colors + +#%% create dataset +# We build a dataset of noisy circular graphs. +# Noise is added on the structures by random connections and on the features by gaussian noise. + +np.random.seed(30) +X0=[] +for k in range(9): + X0.append(build_noisy_circular_graph(np.random.randint(15,25),with_noise=True,structure_noise=True,p=3)) + +#%% Plot dataset + +plt.figure(figsize=(8,10)) +for i in range(len(X0)): + plt.subplot(3,3,i+1) + g=X0[i] + pos=nx.kamada_kawai_layout(g) + nx.draw(g,pos=pos,node_color = graph_colors(g,vmin=-1,vmax=1),with_labels=False,node_size=100) +plt.suptitle('Dataset of noisy graphs. Color indicates the label',fontsize=20) +plt.show() + + + +#%% +# We compute the barycenter using FGW. Structure matrices are computed using the shortest_path distance in the graph +# Features distances are the euclidean distances +Cs=[shortest_path(nx.adjacency_matrix(x)) for x in X0] +ps=[np.ones(len(x.nodes()))/len(x.nodes()) for x in X0] +Ys=[np.array([v for (k,v) in nx.get_node_attributes(x,'attr_name').items()]).reshape(-1,1) for x in X0] +lambdas=np.array([np.ones(len(Ys))/len(Ys)]).ravel() +sizebary=15 # we choose a barycenter with 15 nodes + +#%% + +A,C,log=fgw_barycenters(sizebary,Ys,Cs,ps,lambdas,alpha=0.95) + +#%% +bary=nx.from_numpy_matrix(sp_to_adjency(C,threshinf=0,threshsup=find_thresh(C,sup=100,step=100)[0])) +for i in range(len(A.ravel())): + bary.add_node(i,attr_name=float(A.ravel()[i])) + +#%% +pos = nx.kamada_kawai_layout(bary) +nx.draw(bary,pos=pos,node_color = graph_colors(bary,vmin=-1,vmax=1),with_labels=False) +plt.suptitle('Barycenter',fontsize=20) +plt.show() + + + + diff --git a/examples/plot_fgw.py b/examples/plot_fgw.py index 5c2d0e1..bfa7fb4 100644 --- a/examples/plot_fgw.py +++ b/examples/plot_fgw.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ ============================== diff --git a/ot/gromov.py b/ot/gromov.py index 7491664..31bd657 100644 --- a/ot/gromov.py +++ b/ot/gromov.py @@ -883,8 +883,9 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, return C -def fgw_barycenters(N,Ys,Cs,ps,lambdas,alpha,fixed_structure=False,fixed_features=False,p=None,loss_fun='square_loss', - max_iter=100, tol=1e-9,verbose=False,log=True,init_C=None,init_X=None): +def fgw_barycenters(N,Ys,Cs,ps,lambdas,alpha,fixed_structure=False,fixed_features=False, + p=None,loss_fun='square_loss',max_iter=100, tol=1e-9, + verbose=False,log=True,init_C=None,init_X=None): """ Compute the fgw barycenter as presented eq (5) in [3]. @@ -957,7 +958,7 @@ def fgw_barycenters(N,Ys,Cs,ps,lambdas,alpha,fixed_structure=False,fixed_feature X=np.zeros((N,d)) else: X = init_X - + T=[np.outer(p,q) for q in ps] # X is N,d @@ -981,7 +982,7 @@ def fgw_barycenters(N,Ys,Cs,ps,lambdas,alpha,fixed_structure=False,fixed_feature if not fixed_features: Ys_temp=[y.T for y in Ys] - X=update_feature_matrix(lambdas,Ys_temp,T,p) + X=update_feature_matrix(lambdas,Ys_temp,T,p).T # X must be N,d # Ys must be ns,d @@ -1024,11 +1025,11 @@ def fgw_barycenters(N,Ys,Cs,ps,lambdas,alpha,fixed_structure=False,fixed_feature print('{:5d}|{:8e}|'.format(cpt, err_feature)) cpt += 1 - log_['T']=T # ce sont les matrices du barycentre de la target vers les Ys + log_['T']=T # from target to Ys log_['p']=p - log_['Ms']=Ms #Ms sont de tailles N,ns + log_['Ms']=Ms #Ms are N,ns - return X.T,C,log_ + return X,C,log_ def update_sructure_matrix(p, lambdas, T, Cs): -- cgit v1.2.3 From fa989062c17f87bd96aa58ad764fd3791ea11e22 Mon Sep 17 00:00:00 2001 From: tvayer Date: Wed, 29 May 2019 15:00:50 +0200 Subject: Reame +pep8 --- README.md | 14 ++++ examples/plot_barycenter_fgw.py | 150 ++++++++++++++++++++-------------------- examples/plot_fgw.py | 138 ++++++++++++++++++------------------ test/test_gromov.py | 53 +++++++------- test/test_optim.py | 9 +-- 5 files changed, 190 insertions(+), 174 deletions(-) (limited to 'examples') diff --git a/README.md b/README.md index fd27f9d..b6b215c 100644 --- a/README.md +++ b/README.md @@ -222,3 +222,17 @@ You can also post bug reports and feature requests in Github issues. Make sure t [16] Agueh, M., & Carlier, G. (2011). [Barycenters in the Wasserstein space](https://hal.archives-ouvertes.fr/hal-00637399/document). SIAM Journal on Mathematical Analysis, 43(2), 904-924. [17] Blondel, M., Seguy, V., & Rolet, A. (2018). [Smooth and Sparse Optimal Transport](https://arxiv.org/abs/1710.06276). Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS). + +[18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) [Stochastic Optimization for Large-scale Optimal Transport](https://arxiv.org/abs/1605.08527). Advances in Neural Information Processing Systems (2016). + +[19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A.& Blondel, M. [Large-scale Optimal Transport and Mapping Estimation](https://arxiv.org/pdf/1711.02283.pdf). International Conference on Learning Representation (2018) + +[20] Cuturi, M. and Doucet, A. (2014) [Fast Computation of Wasserstein Barycenters](http://proceedings.mlr.press/v32/cuturi14.html). International Conference in Machine Learning + +[21] Solomon, J., De Goes, F., Peyré, G., Cuturi, M., Butscher, A., Nguyen, A. & Guibas, L. (2015). [Convolutional wasserstein distances: Efficient optimal transportation on geometric domains](https://dl.acm.org/citation.cfm?id=2766963). ACM Transactions on Graphics (TOG), 34(4), 66. + +[22] J. Altschuler, J.Weed, P. Rigollet, (2017) [Near-linear time approximation algorithms for optimal transport via Sinkhorn iteration](https://papers.nips.cc/paper/6792-near-linear-time-approximation-algorithms-for-optimal-transport-via-sinkhorn-iteration.pdf), Advances in Neural Information Processing Systems (NIPS) 31 + +[23] Aude, G., Peyré, G., Cuturi, M., [Learning Generative Models with Sinkhorn Divergences](https://arxiv.org/abs/1706.00292), Proceedings of the Twenty-First International Conference on Artficial Intelligence and Statistics, (AISTATS) 21, 2018 + +[24] Vayer, T., Chapel, L., Flamary, R., Tavenard, R. and Courty, N. (2019). [Optimal Transport for structured data with application on graphs](http://proceedings.mlr.press/v97/titouan19a.html) Proceedings of the 36th International Conference on Machine Learning (ICML). diff --git a/examples/plot_barycenter_fgw.py b/examples/plot_barycenter_fgw.py index f416629..9eea036 100644 --- a/examples/plot_barycenter_fgw.py +++ b/examples/plot_barycenter_fgw.py @@ -30,10 +30,11 @@ from matplotlib import cm from ot.gromov import fgw_barycenters #%% Graph functions -def find_thresh(C,inf=0.5,sup=3,step=10): + +def find_thresh(C, inf=0.5, sup=3, step=10): """ Trick to find the adequate thresholds from where value of the C matrix are considered close enough to say that nodes are connected - Tthe threshold is found by a linesearch between values "inf" and "sup" with "step" thresholds tested. - The optimal threshold is the one which minimizes the reconstruction error between the shortest_path matrix coming from the thresholded adjency matrix + Tthe threshold is found by a linesearch between values "inf" and "sup" with "step" thresholds tested. + The optimal threshold is the one which minimizes the reconstruction error between the shortest_path matrix coming from the thresholded adjency matrix and the original matrix. Parameters ---------- @@ -43,21 +44,22 @@ def find_thresh(C,inf=0.5,sup=3,step=10): The beginning of the linesearch sup : float The end of the linesearch - step : integer - Number of thresholds tested + step : integer + Number of thresholds tested """ - dist=[] - search=np.linspace(inf,sup,step) + dist = [] + search = np.linspace(inf, sup, step) for thresh in search: - Cprime=sp_to_adjency(C,0,thresh) - SC=shortest_path(Cprime,method='D') - SC[SC==float('inf')]=100 - dist.append(np.linalg.norm(SC-C)) - return search[np.argmin(dist)],dist - -def sp_to_adjency(C,threshinf=0.2,threshsup=1.8): - """ Thresholds the structure matrix in order to compute an adjency matrix. - All values between threshinf and threshsup are considered representing connected nodes and set to 1. Else are set to 0 + Cprime = sp_to_adjency(C, 0, thresh) + SC = shortest_path(Cprime, method='D') + SC[SC == float('inf')] = 100 + dist.append(np.linalg.norm(SC - C)) + return search[np.argmin(dist)], dist + + +def sp_to_adjency(C, threshinf=0.2, threshsup=1.8): + """ Thresholds the structure matrix in order to compute an adjency matrix. + All values between threshinf and threshsup are considered representing connected nodes and set to 1. Else are set to 0 Parameters ---------- C : ndarray, shape (n_nodes,n_nodes) @@ -71,102 +73,100 @@ def sp_to_adjency(C,threshinf=0.2,threshsup=1.8): C : ndarray, shape (n_nodes,n_nodes) The threshold matrix. Each element is in {0,1} """ - H=np.zeros_like(C) - np.fill_diagonal(H,np.diagonal(C)) - C=C-H - C=np.minimum(np.maximum(C,threshinf),threshsup) - C[C==threshsup]=0 - C[C!=0]=1 - - return C - -def build_noisy_circular_graph(N=20,mu=0,sigma=0.3,with_noise=False,structure_noise=False,p=None): + H = np.zeros_like(C) + np.fill_diagonal(H, np.diagonal(C)) + C = C - H + C = np.minimum(np.maximum(C, threshinf), threshsup) + C[C == threshsup] = 0 + C[C != 0] = 1 + + return C + + +def build_noisy_circular_graph(N=20, mu=0, sigma=0.3, with_noise=False, structure_noise=False, p=None): """ Create a noisy circular graph """ - g=nx.Graph() + g = nx.Graph() g.add_nodes_from(list(range(N))) for i in range(N): - noise=float(np.random.normal(mu,sigma,1)) + noise = float(np.random.normal(mu, sigma, 1)) if with_noise: - g.add_node(i,attr_name=math.sin((2*i*math.pi/N))+noise) + g.add_node(i, attr_name=math.sin((2 * i * math.pi / N)) + noise) else: - g.add_node(i,attr_name=math.sin(2*i*math.pi/N)) - g.add_edge(i,i+1) + g.add_node(i, attr_name=math.sin(2 * i * math.pi / N)) + g.add_edge(i, i + 1) if structure_noise: - randomint=np.random.randint(0,p) - if randomint==0: - if i<=N-3: - g.add_edge(i,i+2) - if i==N-2: - g.add_edge(i,0) - if i==N-1: - g.add_edge(i,1) - g.add_edge(N,0) - noise=float(np.random.normal(mu,sigma,1)) + randomint = np.random.randint(0, p) + if randomint == 0: + if i <= N - 3: + g.add_edge(i, i + 2) + if i == N - 2: + g.add_edge(i, 0) + if i == N - 1: + g.add_edge(i, 1) + g.add_edge(N, 0) + noise = float(np.random.normal(mu, sigma, 1)) if with_noise: - g.add_node(N,attr_name=math.sin((2*N*math.pi/N))+noise) + g.add_node(N, attr_name=math.sin((2 * N * math.pi / N)) + noise) else: - g.add_node(N,attr_name=math.sin(2*N*math.pi/N)) + g.add_node(N, attr_name=math.sin(2 * N * math.pi / N)) return g -def graph_colors(nx_graph,vmin=0,vmax=7): - cnorm = mcol.Normalize(vmin=vmin,vmax=vmax) - cpick = cm.ScalarMappable(norm=cnorm,cmap='viridis') + +def graph_colors(nx_graph, vmin=0, vmax=7): + cnorm = mcol.Normalize(vmin=vmin, vmax=vmax) + cpick = cm.ScalarMappable(norm=cnorm, cmap='viridis') cpick.set_array([]) val_map = {} - for k,v in nx.get_node_attributes(nx_graph,'attr_name').items(): - val_map[k]=cpick.to_rgba(v) - colors=[] + for k, v in nx.get_node_attributes(nx_graph, 'attr_name').items(): + val_map[k] = cpick.to_rgba(v) + colors = [] for node in nx_graph.nodes(): colors.append(val_map[node]) return colors - + #%% create dataset # We build a dataset of noisy circular graphs. # Noise is added on the structures by random connections and on the features by gaussian noise. + np.random.seed(30) -X0=[] +X0 = [] for k in range(9): - X0.append(build_noisy_circular_graph(np.random.randint(15,25),with_noise=True,structure_noise=True,p=3)) - + X0.append(build_noisy_circular_graph(np.random.randint(15, 25), with_noise=True, structure_noise=True, p=3)) + #%% Plot dataset -plt.figure(figsize=(8,10)) +plt.figure(figsize=(8, 10)) for i in range(len(X0)): - plt.subplot(3,3,i+1) - g=X0[i] - pos=nx.kamada_kawai_layout(g) - nx.draw(g,pos=pos,node_color = graph_colors(g,vmin=-1,vmax=1),with_labels=False,node_size=100) -plt.suptitle('Dataset of noisy graphs. Color indicates the label',fontsize=20) + plt.subplot(3, 3, i + 1) + g = X0[i] + pos = nx.kamada_kawai_layout(g) + nx.draw(g, pos=pos, node_color=graph_colors(g, vmin=-1, vmax=1), with_labels=False, node_size=100) +plt.suptitle('Dataset of noisy graphs. Color indicates the label', fontsize=20) plt.show() - #%% # We compute the barycenter using FGW. Structure matrices are computed using the shortest_path distance in the graph # Features distances are the euclidean distances -Cs=[shortest_path(nx.adjacency_matrix(x)) for x in X0] -ps=[np.ones(len(x.nodes()))/len(x.nodes()) for x in X0] -Ys=[np.array([v for (k,v) in nx.get_node_attributes(x,'attr_name').items()]).reshape(-1,1) for x in X0] -lambdas=np.array([np.ones(len(Ys))/len(Ys)]).ravel() -sizebary=15 # we choose a barycenter with 15 nodes +Cs = [shortest_path(nx.adjacency_matrix(x)) for x in X0] +ps = [np.ones(len(x.nodes())) / len(x.nodes()) for x in X0] +Ys = [np.array([v for (k, v) in nx.get_node_attributes(x, 'attr_name').items()]).reshape(-1, 1) for x in X0] +lambdas = np.array([np.ones(len(Ys)) / len(Ys)]).ravel() +sizebary = 15 # we choose a barycenter with 15 nodes #%% -A,C,log=fgw_barycenters(sizebary,Ys,Cs,ps,lambdas,alpha=0.95) +A, C, log = fgw_barycenters(sizebary, Ys, Cs, ps, lambdas, alpha=0.95) #%% -bary=nx.from_numpy_matrix(sp_to_adjency(C,threshinf=0,threshsup=find_thresh(C,sup=100,step=100)[0])) +bary = nx.from_numpy_matrix(sp_to_adjency(C, threshinf=0, threshsup=find_thresh(C, sup=100, step=100)[0])) for i in range(len(A.ravel())): - bary.add_node(i,attr_name=float(A.ravel()[i])) - + bary.add_node(i, attr_name=float(A.ravel()[i])) + #%% pos = nx.kamada_kawai_layout(bary) -nx.draw(bary,pos=pos,node_color = graph_colors(bary,vmin=-1,vmax=1),with_labels=False) -plt.suptitle('Barycenter',fontsize=20) +nx.draw(bary, pos=pos, node_color=graph_colors(bary, vmin=-1, vmax=1), with_labels=False) +plt.suptitle('Barycenter', fontsize=20) plt.show() - - - - diff --git a/examples/plot_fgw.py b/examples/plot_fgw.py index bfa7fb4..ae3c487 100644 --- a/examples/plot_fgw.py +++ b/examples/plot_fgw.py @@ -20,132 +20,132 @@ This example illustrates the computation of FGW for 1D measures[18]. import matplotlib.pyplot as pl import numpy as np import ot -from ot.gromov import gromov_wasserstein,fused_gromov_wasserstein +from ot.gromov import gromov_wasserstein, fused_gromov_wasserstein #%% parameters -# We create two 1D random measures -n=20 -n2=30 -sig=1 -sig2=0.1 +# We create two 1D random measures +n = 20 +n2 = 30 +sig = 1 +sig2 = 0.1 np.random.seed(0) -phi=np.arange(n)[:,None] -xs=phi+sig*np.random.randn(n,1) -ys=np.vstack((np.ones((n//2,1)),0*np.ones((n//2,1))))+sig2*np.random.randn(n,1) +phi = np.arange(n)[:, None] +xs = phi + sig * np.random.randn(n, 1) +ys = np.vstack((np.ones((n // 2, 1)), 0 * np.ones((n // 2, 1)))) + sig2 * np.random.randn(n, 1) -phi2=np.arange(n2)[:,None] -xt=phi2+sig*np.random.randn(n2,1) -yt=np.vstack((np.ones((n2//2,1)),0*np.ones((n2//2,1))))+sig2*np.random.randn(n2,1) -yt= yt[::-1,:] +phi2 = np.arange(n2)[:, None] +xt = phi2 + sig * np.random.randn(n2, 1) +yt = np.vstack((np.ones((n2 // 2, 1)), 0 * np.ones((n2 // 2, 1)))) + sig2 * np.random.randn(n2, 1) +yt = yt[::-1, :] -p=ot.unif(n) -q=ot.unif(n2) +p = ot.unif(n) +q = ot.unif(n2) #%% plot the distributions pl.close(10) -pl.figure(10,(7,7)) +pl.figure(10, (7, 7)) -pl.subplot(2,1,1) +pl.subplot(2, 1, 1) -pl.scatter(ys,xs,c=phi,s=70) -pl.ylabel('Feature value a',fontsize=20) -pl.title('$\mu=\sum_i \delta_{x_i,a_i}$',fontsize=25, usetex=True, y=1) +pl.scatter(ys, xs, c=phi, s=70) +pl.ylabel('Feature value a', fontsize=20) +pl.title('$\mu=\sum_i \delta_{x_i,a_i}$', fontsize=25, usetex=True, y=1) pl.xticks(()) pl.yticks(()) -pl.subplot(2,1,2) -pl.scatter(yt,xt,c=phi2,s=70) -pl.xlabel('coordinates x/y',fontsize=25) -pl.ylabel('Feature value b',fontsize=20) -pl.title('$\\nu=\sum_j \delta_{y_j,b_j}$',fontsize=25, usetex=True, y=1) +pl.subplot(2, 1, 2) +pl.scatter(yt, xt, c=phi2, s=70) +pl.xlabel('coordinates x/y', fontsize=25) +pl.ylabel('Feature value b', fontsize=20) +pl.title('$\\nu=\sum_j \delta_{y_j,b_j}$', fontsize=25, usetex=True, y=1) pl.yticks(()) pl.tight_layout() pl.show() #%% Structure matrices and across-features distance matrix -C1=ot.dist(xs) -C2=ot.dist(xt).T -M=ot.dist(ys,yt) -w1=ot.unif(C1.shape[0]) -w2=ot.unif(C2.shape[0]) -Got=ot.emd([],[],M) +C1 = ot.dist(xs) +C2 = ot.dist(xt).T +M = ot.dist(ys, yt) +w1 = ot.unif(C1.shape[0]) +w2 = ot.unif(C2.shape[0]) +Got = ot.emd([], [], M) #%% -cmap='Reds' +cmap = 'Reds' pl.close(10) -pl.figure(10,(5,5)) -fs=15 -l_x=[0,5,10,15] -l_y=[0,5,10,15,20,25] +pl.figure(10, (5, 5)) +fs = 15 +l_x = [0, 5, 10, 15] +l_y = [0, 5, 10, 15, 20, 25] gs = pl.GridSpec(5, 5) -ax1=pl.subplot(gs[3:,:2]) +ax1 = pl.subplot(gs[3:, :2]) -pl.imshow(C1,cmap=cmap,interpolation='nearest') -pl.title("$C_1$",fontsize=fs) -pl.xlabel("$k$",fontsize=fs) -pl.ylabel("$i$",fontsize=fs) +pl.imshow(C1, cmap=cmap, interpolation='nearest') +pl.title("$C_1$", fontsize=fs) +pl.xlabel("$k$", fontsize=fs) +pl.ylabel("$i$", fontsize=fs) pl.xticks(l_x) pl.yticks(l_x) -ax2=pl.subplot(gs[:3,2:]) +ax2 = pl.subplot(gs[:3, 2:]) -pl.imshow(C2,cmap=cmap,interpolation='nearest') -pl.title("$C_2$",fontsize=fs) -pl.ylabel("$l$",fontsize=fs) +pl.imshow(C2, cmap=cmap, interpolation='nearest') +pl.title("$C_2$", fontsize=fs) +pl.ylabel("$l$", fontsize=fs) #pl.ylabel("$l$",fontsize=fs) pl.xticks(()) pl.yticks(l_y) ax2.set_aspect('auto') -ax3=pl.subplot(gs[3:,2:],sharex=ax2,sharey=ax1) -pl.imshow(M,cmap=cmap,interpolation='nearest') +ax3 = pl.subplot(gs[3:, 2:], sharex=ax2, sharey=ax1) +pl.imshow(M, cmap=cmap, interpolation='nearest') pl.yticks(l_x) pl.xticks(l_y) -pl.ylabel("$i$",fontsize=fs) -pl.title("$M_{AB}$",fontsize=fs) -pl.xlabel("$j$",fontsize=fs) +pl.ylabel("$i$", fontsize=fs) +pl.title("$M_{AB}$", fontsize=fs) +pl.xlabel("$j$", fontsize=fs) pl.tight_layout() ax3.set_aspect('auto') pl.show() #%% Computing FGW and GW -alpha=1e-3 - +alpha = 1e-3 + ot.tic() -Gwg,logw=fused_gromov_wasserstein(M,C1,C2,p,q,loss_fun='square_loss',alpha=alpha,verbose=True,log=True) +Gwg, logw = fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=alpha, verbose=True, log=True) ot.toc() -#%reload_ext WGW -Gg,log=gromov_wasserstein(C1,C2,p,q,loss_fun='square_loss',verbose=True,log=True) - +#%reload_ext WGW +Gg, log = gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', verbose=True, log=True) + #%% visu OT matrix -cmap='Blues' -fs=15 -pl.figure(2,(13,5)) +cmap = 'Blues' +fs = 15 +pl.figure(2, (13, 5)) pl.clf() -pl.subplot(1,3,1) -pl.imshow(Got,cmap=cmap,interpolation='nearest') +pl.subplot(1, 3, 1) +pl.imshow(Got, cmap=cmap, interpolation='nearest') #pl.xlabel("$y$",fontsize=fs) -pl.ylabel("$i$",fontsize=fs) +pl.ylabel("$i$", fontsize=fs) pl.xticks(()) pl.title('Wasserstein ($M$ only)') -pl.subplot(1,3,2) -pl.imshow(Gg,cmap=cmap,interpolation='nearest') +pl.subplot(1, 3, 2) +pl.imshow(Gg, cmap=cmap, interpolation='nearest') pl.title('Gromov ($C_1,C_2$ only)') pl.xticks(()) -pl.subplot(1,3,3) -pl.imshow(Gwg,cmap=cmap,interpolation='nearest') +pl.subplot(1, 3, 3) +pl.imshow(Gwg, cmap=cmap, interpolation='nearest') pl.title('FGW ($M+C_1,C_2$)') -pl.xlabel("$j$",fontsize=fs) -pl.ylabel("$i$",fontsize=fs) +pl.xlabel("$j$", fontsize=fs) +pl.ylabel("$i$", fontsize=fs) pl.tight_layout() -pl.show() \ No newline at end of file +pl.show() diff --git a/test/test_gromov.py b/test/test_gromov.py index 43b63e1..cd180d4 100644 --- a/test/test_gromov.py +++ b/test/test_gromov.py @@ -145,7 +145,8 @@ def test_gromov_entropic_barycenter(): 'kl_loss', 2e-3, max_iter=100, tol=1e-3) np.testing.assert_allclose(Cb2.shape, (n_samples, n_samples)) - + + def test_fgw(): n_samples = 50 # nb samples @@ -155,9 +156,9 @@ def test_fgw(): xs = ot.datasets.make_2D_samples_gauss(n_samples, mu_s, cov_s) xt = xs[::-1].copy() - - ys = np.random.randn(xs.shape[0],2) - yt= ys[::-1].copy() + + ys = np.random.randn(xs.shape[0], 2) + yt = ys[::-1].copy() p = ot.unif(n_samples) q = ot.unif(n_samples) @@ -167,11 +168,11 @@ def test_fgw(): C1 /= C1.max() C2 /= C2.max() - - M=ot.dist(ys,yt) - M/=M.max() - G = ot.gromov.fused_gromov_wasserstein(M,C1, C2, p, q, 'square_loss',alpha=0.5) + M = ot.dist(ys, yt) + M /= M.max() + + G = ot.gromov.fused_gromov_wasserstein(M, C1, C2, p, q, 'square_loss', alpha=0.5) # check constratints np.testing.assert_allclose( @@ -187,36 +188,36 @@ def test_fgw_barycenter(): Xs, ys = ot.datasets.make_data_classif('3gauss', ns) Xt, yt = ot.datasets.make_data_classif('3gauss2', nt) - - ys = np.random.randn(Xs.shape[0],2) - yt= np.random.randn(Xt.shape[0],2) + + ys = np.random.randn(Xs.shape[0], 2) + yt = np.random.randn(Xt.shape[0], 2) C1 = ot.dist(Xs) C2 = ot.dist(Xt) n_samples = 3 - X,C,log = ot.gromov.fgw_barycenters(n_samples,[ys,yt] ,[C1, C2],[ot.unif(ns), ot.unif(nt)],[.5, .5],0.5, - fixed_structure=False,fixed_features=False, - p=ot.unif(n_samples),loss_fun='square_loss', - max_iter=100, tol=1e-3) + X, C, log = ot.gromov.fgw_barycenters(n_samples, [ys, yt], [C1, C2], [ot.unif(ns), ot.unif(nt)], [.5, .5], 0.5, + fixed_structure=False, fixed_features=False, + p=ot.unif(n_samples), loss_fun='square_loss', + max_iter=100, tol=1e-3) np.testing.assert_allclose(C.shape, (n_samples, n_samples)) np.testing.assert_allclose(X.shape, (n_samples, ys.shape[1])) xalea = np.random.randn(n_samples, 2) init_C = ot.dist(xalea, xalea) - - X,C,log = ot.gromov.fgw_barycenters(n_samples,[ys,yt] ,[C1, C2],ps=[ot.unif(ns), ot.unif(nt)],lambdas=[.5, .5],alpha=0.5, - fixed_structure=True,init_C=init_C,fixed_features=False, - p=ot.unif(n_samples),loss_fun='square_loss', - max_iter=100, tol=1e-3) + + X, C, log = ot.gromov.fgw_barycenters(n_samples, [ys, yt], [C1, C2], ps=[ot.unif(ns), ot.unif(nt)], lambdas=[.5, .5], alpha=0.5, + fixed_structure=True, init_C=init_C, fixed_features=False, + p=ot.unif(n_samples), loss_fun='square_loss', + max_iter=100, tol=1e-3) np.testing.assert_allclose(C.shape, (n_samples, n_samples)) np.testing.assert_allclose(X.shape, (n_samples, ys.shape[1])) - - init_X=np.random.randn(n_samples,ys.shape[1]) - X,C,log = ot.gromov.fgw_barycenters(n_samples,[ys,yt] ,[C1, C2],[ot.unif(ns), ot.unif(nt)],[.5, .5],0.5, - fixed_structure=False,fixed_features=True, init_X=init_X, - p=ot.unif(n_samples),loss_fun='square_loss', - max_iter=100, tol=1e-3) + init_X = np.random.randn(n_samples, ys.shape[1]) + + X, C, log = ot.gromov.fgw_barycenters(n_samples, [ys, yt], [C1, C2], [ot.unif(ns), ot.unif(nt)], [.5, .5], 0.5, + fixed_structure=False, fixed_features=True, init_X=init_X, + p=ot.unif(n_samples), loss_fun='square_loss', + max_iter=100, tol=1e-3) np.testing.assert_allclose(C.shape, (n_samples, n_samples)) np.testing.assert_allclose(X.shape, (n_samples, ys.shape[1])) diff --git a/test/test_optim.py b/test/test_optim.py index 1188ef6..e7ba32a 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -65,8 +65,9 @@ def test_generalized_conditional_gradient(): np.testing.assert_allclose(a, G.sum(1), atol=1e-05) np.testing.assert_allclose(b, G.sum(0), atol=1e-05) - + + def test_solve_1d_linesearch_quad_funct(): - np.testing.assert_allclose(ot.optim.solve_1d_linesearch_quad_funct(1,-1,0),0.5) - np.testing.assert_allclose(ot.optim.solve_1d_linesearch_quad_funct(-1,5,0),0) - np.testing.assert_allclose(ot.optim.solve_1d_linesearch_quad_funct(-1,0.5,0),1) + np.testing.assert_allclose(ot.optim.solve_1d_linesearch_quad_funct(1, -1, 0), 0.5) + np.testing.assert_allclose(ot.optim.solve_1d_linesearch_quad_funct(-1, 5, 0), 0) + np.testing.assert_allclose(ot.optim.solve_1d_linesearch_quad_funct(-1, 0.5, 0), 1) -- cgit v1.2.3 From e1bd94bb7e85a0d2fd0fcd7642b06da12c1db6db Mon Sep 17 00:00:00 2001 From: tvayer Date: Wed, 29 May 2019 17:05:38 +0200 Subject: code review1 --- examples/plot_barycenter_fgw.py | 30 +++++++---- examples/plot_fgw.py | 32 ++++++++++-- ot/gromov.py | 108 +++++++++++++++++++++++++++++++++++----- ot/optim.py | 31 ++++++------ test/test_gromov.py | 57 ++++++++++++++++----- 5 files changed, 204 insertions(+), 54 deletions(-) (limited to 'examples') diff --git a/examples/plot_barycenter_fgw.py b/examples/plot_barycenter_fgw.py index 9eea036..e4be447 100644 --- a/examples/plot_barycenter_fgw.py +++ b/examples/plot_barycenter_fgw.py @@ -125,7 +125,11 @@ def graph_colors(nx_graph, vmin=0, vmax=7): colors.append(val_map[node]) return colors -#%% create dataset +############################################################################## +# Generate data +# ------------- + +#%% circular dataset # We build a dataset of noisy circular graphs. # Noise is added on the structures by random connections and on the features by gaussian noise. @@ -135,7 +139,11 @@ X0 = [] for k in range(9): X0.append(build_noisy_circular_graph(np.random.randint(15, 25), with_noise=True, structure_noise=True, p=3)) -#%% Plot dataset +############################################################################## +# Plot data +# --------- + +#%% Plot graphs plt.figure(figsize=(8, 10)) for i in range(len(X0)): @@ -146,9 +154,11 @@ for i in range(len(X0)): plt.suptitle('Dataset of noisy graphs. Color indicates the label', fontsize=20) plt.show() +############################################################################## +# Barycenter computation +# ---------------------- -#%% -# We compute the barycenter using FGW. Structure matrices are computed using the shortest_path distance in the graph +#%% We compute the barycenter using FGW. Structure matrices are computed using the shortest_path distance in the graph # Features distances are the euclidean distances Cs = [shortest_path(nx.adjacency_matrix(x)) for x in X0] ps = [np.ones(len(x.nodes())) / len(x.nodes()) for x in X0] @@ -156,14 +166,16 @@ Ys = [np.array([v for (k, v) in nx.get_node_attributes(x, 'attr_name').items()]) lambdas = np.array([np.ones(len(Ys)) / len(Ys)]).ravel() sizebary = 15 # we choose a barycenter with 15 nodes -#%% - A, C, log = fgw_barycenters(sizebary, Ys, Cs, ps, lambdas, alpha=0.95) -#%% +############################################################################## +# Plot Barycenter +# ------------------------- + +#%% Create the barycenter bary = nx.from_numpy_matrix(sp_to_adjency(C, threshinf=0, threshsup=find_thresh(C, sup=100, step=100)[0])) -for i in range(len(A.ravel())): - bary.add_node(i, attr_name=float(A.ravel()[i])) +for i, v in enumerate(A.ravel()): + bary.add_node(i, attr_name=v) #%% pos = nx.kamada_kawai_layout(bary) diff --git a/examples/plot_fgw.py b/examples/plot_fgw.py index ae3c487..43efc94 100644 --- a/examples/plot_fgw.py +++ b/examples/plot_fgw.py @@ -22,12 +22,16 @@ import numpy as np import ot from ot.gromov import gromov_wasserstein, fused_gromov_wasserstein +############################################################################## +# Generate data +# --------- + #%% parameters # We create two 1D random measures -n = 20 -n2 = 30 -sig = 1 -sig2 = 0.1 +n = 20 # number of points in the first distribution +n2 = 30 # number of points in the second distribution +sig = 1 # std of first distribution +sig2 = 0.1 # std of second distribution np.random.seed(0) @@ -43,6 +47,10 @@ yt = yt[::-1, :] p = ot.unif(n) q = ot.unif(n2) +############################################################################## +# Plot data +# --------- + #%% plot the distributions pl.close(10) @@ -64,15 +72,22 @@ pl.yticks(()) pl.tight_layout() pl.show() +############################################################################## +# Create structure matrices and across-feature distance matrix +# --------- #%% Structure matrices and across-features distance matrix C1 = ot.dist(xs) -C2 = ot.dist(xt).T +C2 = ot.dist(xt) M = ot.dist(ys, yt) w1 = ot.unif(C1.shape[0]) w2 = ot.unif(C2.shape[0]) Got = ot.emd([], [], M) +############################################################################## +# Plot matrices +# --------- + #%% cmap = 'Reds' pl.close(10) @@ -112,6 +127,9 @@ pl.tight_layout() ax3.set_aspect('auto') pl.show() +############################################################################## +# Compute FGW/GW +# --------- #%% Computing FGW and GW alpha = 1e-3 @@ -123,6 +141,10 @@ ot.toc() #%reload_ext WGW Gg, log = gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', verbose=True, log=True) +############################################################################## +# Visualize transport matrices +# --------- + #%% visu OT matrix cmap = 'Blues' fs = 15 diff --git a/ot/gromov.py b/ot/gromov.py index 5a57dc8..53349b7 100644 --- a/ot/gromov.py +++ b/ot/gromov.py @@ -10,6 +10,7 @@ Gromov-Wasserstein transport method # Nicolas Courty # Rémi Flamary # Titouan Vayer +# # License: MIT License import numpy as np @@ -351,9 +352,9 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs return cg(p, q, 0, 1, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, **kwargs) -def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, **kwargs): +def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, log=False, **kwargs): """ - Computes the FGW distance between two graphs see [3] + Computes the FGW transport between two graphs see [24] .. math:: \gamma = arg\min_\gamma (1-\alpha)*<\gamma,M>_F + alpha* \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l} s.t. \gamma 1 = p @@ -377,7 +378,7 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, distribution in the source space q : ndarray, shape (nt,) distribution in the target space - loss_fun : string,optionnal + loss_fun : string,optional loss function used for the solver max_iter : int, optional Max number of iterations @@ -416,7 +417,86 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, def df(G): return gwggrad(constC, hC1, hC2, G) - return cg(p, q, M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, **kwargs) + if log: + res, log = cg(p, q, M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, log=True, **kwargs) + log['fgw_dist'] = log['loss'][::-1][0] + return res, log + else: + return cg(p, q, M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, **kwargs) + + +def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, log=False, **kwargs): + """ + Computes the FGW distance between two graphs see [24] + .. math:: + \gamma = arg\min_\gamma (1-\alpha)*<\gamma,M>_F + alpha* \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l} + s.t. \gamma 1 = p + \gamma^T 1= q + \gamma\geq 0 + where : + - M is the (ns,nt) metric cost matrix + - :math:`f` is the regularization term ( and df is its gradient) + - a and b are source and target weights (sum to 1) + - L is a loss function to account for the misfit between the similarity matrices + The algorithm used for solving the problem is conditional gradient as discussed in [1]_ + Parameters + ---------- + M : ndarray, shape (ns, nt) + Metric cost matrix between features across domains + C1 : ndarray, shape (ns, ns) + Metric cost matrix respresentative of the structure in the source space + C2 : ndarray, shape (nt, nt) + Metric cost matrix espresentative of the structure in the target space + p : ndarray, shape (ns,) + distribution in the source space + q : ndarray, shape (nt,) + distribution in the target space + loss_fun : string,optional + loss function used for the solver + max_iter : int, optional + Max number of iterations + tol : float, optional + Stop threshold on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + armijo : bool, optional + If True the steps of the line-search is found via an armijo research. Else closed form is used. + If there is convergence issues use False. + **kwargs : dict + parameters can be directly pased to the ot.optim.cg solver + Returns + ------- + gamma : (ns x nt) ndarray + Optimal transportation matrix for the given parameters + log : dict + log dictionary return only if log==True in parameters + References + ---------- + .. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain + and Courty Nicolas + "Optimal Transport for structured data with application on graphs" + International Conference on Machine Learning (ICML). 2019. + """ + + constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun) + + G0 = p[:, None] * q[None, :] + + def f(G): + return gwloss(constC, hC1, hC2, G) + + def df(G): + return gwggrad(constC, hC1, hC2, G) + + res, log = cg(p, q, M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, log=True, **kwargs) + if log: + log['fgw_dist'] = log['loss'][::-1][0] + log['T'] = res + return log['fgw_dist'], log + else: + return log['fgw_dist'] def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs): @@ -889,7 +969,7 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_features=False, p=None, loss_fun='square_loss', max_iter=100, tol=1e-9, - verbose=False, log=True, init_C=None, init_X=None): + verbose=False, log=False, init_C=None, init_X=None): """ Compute the fgw barycenter as presented eq (5) in [24]. ---------- @@ -919,7 +999,8 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_ Barycenters' features C : ndarray, shape (N,N) Barycenters' structure matrix - log_: + log_: dictionary + Only returned when log=True T : list of (N,ns) transport matrices Ms : all distance matrices between the feature of the barycenter and the other features dist(X,Ys) shape (N,ns) References @@ -1015,14 +1096,13 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_ T = [fused_gromov_wasserstein((1 - alpha) * Ms[s], C, Cs[s], p, ps[s], loss_fun, alpha, numItermax=max_iter, stopThr=1e-5, verbose=verbose) for s in range(S)] # T is N,ns - - log_['Ts_iter'].append(T) err_feature = np.linalg.norm(X - Xprev.reshape(N, d)) err_structure = np.linalg.norm(C - Cprev) if log: log_['err_feature'].append(err_feature) log_['err_structure'].append(err_structure) + log_['Ts_iter'].append(T) if verbose: if cpt % 200 == 0: @@ -1032,11 +1112,15 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_ print('{:5d}|{:8e}|'.format(cpt, err_feature)) cpt += 1 - log_['T'] = T # from target to Ys - log_['p'] = p - log_['Ms'] = Ms # Ms are N,ns + if log: + log_['T'] = T # from target to Ys + log_['p'] = p + log_['Ms'] = Ms # Ms are N,ns - return X, C, log_ + if log: + return X, C, log_ + else: + return X, C def update_sructure_matrix(p, lambdas, T, Cs): diff --git a/ot/optim.py b/ot/optim.py index 7d103e2..4d428d9 100644 --- a/ot/optim.py +++ b/ot/optim.py @@ -5,6 +5,7 @@ Optimization algorithms for OT # Author: Remi Flamary # Titouan Vayer +# # License: MIT License import numpy as np @@ -88,20 +89,20 @@ def do_linesearch(cost, G, deltaG, Mi, f_val, Cost matrix of the linearized transport problem. Corresponds to the gradient of the cost f_val : float Value of the cost at G - armijo : bool, optionnal + armijo : bool, optional If True the steps of the line-search is found via an armijo research. Else closed form is used. If there is convergence issues use False. - C1 : ndarray (ns,ns), optionnal + C1 : ndarray (ns,ns), optional Structure matrix in the source domain. Only used when armijo=False - C2 : ndarray (nt,nt), optionnal + C2 : ndarray (nt,nt), optional Structure matrix in the target domain. Only used when armijo=False - reg : float, optionnal + reg : float, optional Regularization parameter. Only used when armijo=False Gc : ndarray (ns,nt) Optimal map found by linearization in the FW algorithm. Only used when armijo=False constC : ndarray (ns,nt) Constant for the gromov cost. See [24]. Only used when armijo=False - M : ndarray (ns,nt), optionnal + M : ndarray (ns,nt), optional Cost matrix between the features. Only used when armijo=False Returns ------- @@ -223,9 +224,9 @@ def cg(a, b, M, reg, f, df, G0=None, numItermax=200, it = 0 if verbose: - print('{:5s}|{:12s}|{:8s}'.format( - 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32) - print('{:5d}|{:8e}|{:8e}'.format(it, f_val, 0)) + print('{:5s}|{:12s}|{:8s}|{:8s}'.format( + 'It.', 'Loss', 'Relative loss', 'Absolute loss') + '\n' + '-' * 48) + print('{:5d}|{:8e}|{:8e}|{:8e}'.format(it, f_val, 0, 0)) while loop: @@ -261,8 +262,8 @@ def cg(a, b, M, reg, f, df, G0=None, numItermax=200, if verbose: if it % 20 == 0: - print('{:5s}|{:12s}|{:8s}'.format( - 'It.', 'Loss', 'Relative variation loss', 'Absolute variation loss') + '\n' + '-' * 32) + print('{:5s}|{:12s}|{:8s}|{:8s}'.format( + 'It.', 'Loss', 'Relative loss', 'Absolute loss') + '\n' + '-' * 48) print('{:5d}|{:8e}|{:8e}|{:8e}'.format(it, f_val, relative_delta_fval, abs_delta_fval)) if log: @@ -363,9 +364,9 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10, it = 0 if verbose: - print('{:5s}|{:12s}|{:8s}'.format( - 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32) - print('{:5d}|{:8e}|{:8e}'.format(it, f_val, 0)) + print('{:5s}|{:12s}|{:8s}|{:8s}'.format( + 'It.', 'Loss', 'Relative loss', 'Absolute loss') + '\n' + '-' * 48) + print('{:5d}|{:8e}|{:8e}|{:8e}'.format(it, f_val, 0, 0)) while loop: @@ -402,8 +403,8 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10, if verbose: if it % 20 == 0: - print('{:5s}|{:12s}|{:8s}'.format( - 'It.', 'Loss', 'Relative variation loss', 'Absolute variation loss') + '\n' + '-' * 32) + print('{:5s}|{:12s}|{:8s}|{:8s}'.format( + 'It.', 'Loss', 'Relative loss', 'Absolute loss') + '\n' + '-' * 48) print('{:5d}|{:8e}|{:8e}|{:8e}'.format(it, f_val, relative_delta_fval, abs_delta_fval)) if log: diff --git a/test/test_gromov.py b/test/test_gromov.py index cd180d4..ec85abf 100644 --- a/test/test_gromov.py +++ b/test/test_gromov.py @@ -2,6 +2,7 @@ # Author: Erwan Vautier # Nicolas Courty +# Titouan Vayer # # License: MIT License @@ -10,6 +11,8 @@ import ot def test_gromov(): + np.random.seed(42) + n_samples = 50 # nb samples mu_s = np.array([0, 0]) @@ -36,6 +39,11 @@ def test_gromov(): np.testing.assert_allclose( q, G.sum(0), atol=1e-04) # cf convergence gromov + Id = (1 / n_samples) * np.eye(n_samples, n_samples) + + np.testing.assert_allclose( + G, np.flipud(Id), atol=1e-04) + gw, log = ot.gromov.gromov_wasserstein2(C1, C2, p, q, 'kl_loss', log=True) G = log['T'] @@ -50,6 +58,8 @@ def test_gromov(): def test_entropic_gromov(): + np.random.seed(42) + n_samples = 50 # nb samples mu_s = np.array([0, 0]) @@ -92,6 +102,7 @@ def test_entropic_gromov(): def test_gromov_barycenter(): + np.random.seed(42) ns = 50 nt = 60 @@ -120,7 +131,7 @@ def test_gromov_barycenter(): def test_gromov_entropic_barycenter(): - + np.random.seed(42) ns = 50 nt = 60 @@ -148,6 +159,8 @@ def test_gromov_entropic_barycenter(): def test_fgw(): + np.random.seed(42) + n_samples = 50 # nb samples mu_s = np.array([0, 0]) @@ -180,8 +193,26 @@ def test_fgw(): np.testing.assert_allclose( q, G.sum(0), atol=1e-04) # cf convergence fgw + Id = (1 / n_samples) * np.eye(n_samples, n_samples) + + np.testing.assert_allclose( + G, np.flipud(Id), atol=1e-04) # cf convergence gromov + + fgw, log = ot.gromov.fused_gromov_wasserstein2(M, C1, C2, p, q, 'square_loss', alpha=0.5, log=True) + + G = log['T'] + + np.testing.assert_allclose(fgw, 0, atol=1e-1, rtol=1e-1) + + # check constratints + np.testing.assert_allclose( + p, G.sum(1), atol=1e-04) # cf convergence gromov + np.testing.assert_allclose( + q, G.sum(0), atol=1e-04) # cf convergence gromov + def test_fgw_barycenter(): + np.random.seed(42) ns = 50 nt = 60 @@ -196,28 +227,28 @@ def test_fgw_barycenter(): C2 = ot.dist(Xt) n_samples = 3 - X, C, log = ot.gromov.fgw_barycenters(n_samples, [ys, yt], [C1, C2], [ot.unif(ns), ot.unif(nt)], [.5, .5], 0.5, - fixed_structure=False, fixed_features=False, - p=ot.unif(n_samples), loss_fun='square_loss', - max_iter=100, tol=1e-3) + X, C = ot.gromov.fgw_barycenters(n_samples, [ys, yt], [C1, C2], [ot.unif(ns), ot.unif(nt)], [.5, .5], 0.5, + fixed_structure=False, fixed_features=False, + p=ot.unif(n_samples), loss_fun='square_loss', + max_iter=100, tol=1e-3) np.testing.assert_allclose(C.shape, (n_samples, n_samples)) np.testing.assert_allclose(X.shape, (n_samples, ys.shape[1])) xalea = np.random.randn(n_samples, 2) init_C = ot.dist(xalea, xalea) - X, C, log = ot.gromov.fgw_barycenters(n_samples, [ys, yt], [C1, C2], ps=[ot.unif(ns), ot.unif(nt)], lambdas=[.5, .5], alpha=0.5, - fixed_structure=True, init_C=init_C, fixed_features=False, - p=ot.unif(n_samples), loss_fun='square_loss', - max_iter=100, tol=1e-3) + X, C = ot.gromov.fgw_barycenters(n_samples, [ys, yt], [C1, C2], ps=[ot.unif(ns), ot.unif(nt)], lambdas=[.5, .5], alpha=0.5, + fixed_structure=True, init_C=init_C, fixed_features=False, + p=ot.unif(n_samples), loss_fun='square_loss', + max_iter=100, tol=1e-3) np.testing.assert_allclose(C.shape, (n_samples, n_samples)) np.testing.assert_allclose(X.shape, (n_samples, ys.shape[1])) init_X = np.random.randn(n_samples, ys.shape[1]) - X, C, log = ot.gromov.fgw_barycenters(n_samples, [ys, yt], [C1, C2], [ot.unif(ns), ot.unif(nt)], [.5, .5], 0.5, - fixed_structure=False, fixed_features=True, init_X=init_X, - p=ot.unif(n_samples), loss_fun='square_loss', - max_iter=100, tol=1e-3) + X, C = ot.gromov.fgw_barycenters(n_samples, [ys, yt], [C1, C2], [ot.unif(ns), ot.unif(nt)], [.5, .5], 0.5, + fixed_structure=False, fixed_features=True, init_X=init_X, + p=ot.unif(n_samples), loss_fun='square_loss', + max_iter=100, tol=1e-3) np.testing.assert_allclose(C.shape, (n_samples, n_samples)) np.testing.assert_allclose(X.shape, (n_samples, ys.shape[1])) -- cgit v1.2.3