summaryrefslogtreecommitdiff
path: root/ot/gromov.py
diff options
context:
space:
mode:
Diffstat (limited to 'ot/gromov.py')
-rw-r--r--ot/gromov.py514
1 files changed, 266 insertions, 248 deletions
diff --git a/ot/gromov.py b/ot/gromov.py
index ca96b31..699ae4c 100644
--- a/ot/gromov.py
+++ b/ot/gromov.py
@@ -1,9 +1,6 @@
-
# -*- coding: utf-8 -*-
"""
Gromov-Wasserstein transport method
-
-
"""
# Author: Erwan Vautier <erwan.vautier@gmail.com>
@@ -22,7 +19,7 @@ from .optim import cg
def init_matrix(C1, C2, p, q, loss_fun='square_loss'):
- """ Return loss matrices and tensors for Gromov-Wasserstein fast computation
+ """Return loss matrices and tensors for Gromov-Wasserstein fast computation
Returns the value of \mathcal{L}(C1,C2) \otimes T with the selected loss
function as the loss function of Gromow-Wasserstein discrepancy.
@@ -51,29 +48,27 @@ def init_matrix(C1, C2, p, q, loss_fun='square_loss'):
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
+ Metric costfr matrix in the target space
T : ndarray, shape (ns, nt)
- Coupling between source and target spaces
+ Coupling between source and target spaces
p : ndarray, shape (ns,)
-
Returns
-------
-
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
+ h2(C) matrix in Eq. (6)
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
@@ -114,31 +109,29 @@ def init_matrix(C1, C2, p, q, loss_fun='square_loss'):
def tensor_product(constC, hC1, hC2, T):
- """ Return the tensor for Gromov-Wasserstein fast computation
+ """Return the tensor for Gromov-Wasserstein fast computation
The tensor is computed as described in Proposition 1 Eq. (6) in [12].
Parameters
----------
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
-
+ h2(C) matrix in Eq. (6)
Returns
-------
-
tens : ndarray, shape (ns, nt)
- \mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
+ \mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
A = -np.dot(hC1, T).dot(hC2.T)
@@ -148,32 +141,31 @@ def tensor_product(constC, hC1, hC2, T):
def gwloss(constC, hC1, hC2, T):
- """ Return the Loss for Gromov-Wasserstein
+ """Return the Loss for Gromov-Wasserstein
The loss is computed as described in Proposition 1 Eq. (6) in [12].
Parameters
----------
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
+ h2(C) matrix in Eq. (6)
T : ndarray, shape (ns, nt)
- Current value of transport matrix T
+ Current value of transport matrix T
Returns
-------
-
loss : float
- Gromov Wasserstein loss
+ Gromov Wasserstein loss
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
@@ -183,32 +175,31 @@ def gwloss(constC, hC1, hC2, T):
def gwggrad(constC, hC1, hC2, T):
- """ Return the gradient for Gromov-Wasserstein
+ """Return the gradient for Gromov-Wasserstein
The gradient is computed as described in Proposition 2 in [12].
Parameters
----------
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
+ h2(C) matrix in Eq. (6)
T : ndarray, shape (ns, nt)
- Current value of transport matrix T
+ Current value of transport matrix T
Returns
-------
-
grad : ndarray, shape (ns, nt)
Gromov Wasserstein gradient
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
return 2 * tensor_product(constC, hC1, hC2,
@@ -222,19 +213,19 @@ def update_square_loss(p, lambdas, T, Cs):
Parameters
----------
- p : ndarray, shape (N,)
- masses in the targeted barycenter
+ p : ndarray, shape (N,)
+ Masses in the targeted barycenter.
lambdas : list of float
- list of the S spaces' weights
- T : list of S np.ndarray(ns,N)
- the S Ts couplings calculated at each iteration
+ List of the S spaces' weights.
+ T : list of S np.ndarray of shape (ns,N)
+ The S Ts couplings calculated at each iteration.
Cs : list of S ndarray, shape(ns,ns)
- Metric cost matrices
+ Metric cost matrices.
Returns
----------
- C : ndarray, shape (nt,nt)
- updated C matrix
+ C : ndarray, shape (nt, nt)
+ Updated C matrix.
"""
tmpsum = sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s])
for s in range(len(T))])
@@ -251,12 +242,12 @@ def update_kl_loss(p, lambdas, T, Cs):
Parameters
----------
p : ndarray, shape (N,)
- weights in the targeted barycenter
+ Weights in the targeted barycenter.
lambdas : list of the S spaces' weights
- T : list of S np.ndarray(ns,N)
- the S Ts couplings calculated at each iteration
+ T : list of S np.ndarray of shape (ns,N)
+ The S Ts couplings calculated at each iteration.
Cs : list of S ndarray, shape(ns,ns)
- Metric cost matrices
+ Metric cost matrices.
Returns
----------
@@ -277,27 +268,27 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs
The function solves the following optimization problem:
.. math::
- \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+ - H : entropy
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- p : ndarray, shape (ns,)
- distribution in the source space
- q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string
+ Metric costfr matrix in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space
+ q : ndarray, shape (nt,)
+ Distribution in the target space
+ loss_fun : str
loss function used for the solver either 'square_loss' or 'kl_loss'
max_iter : int, optional
@@ -312,15 +303,15 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs
If True the steps of the line-search is found via an armijo research. Else closed form is used.
If there is convergence issues use False.
**kwargs : dict
- parameters can be directly pased to the ot.optim.cg solver
+ parameters can be directly passed to the ot.optim.cg solver
Returns
-------
T : ndarray, shape (ns, nt)
- coupling between the two spaces that minimizes :
+ Doupling between the two spaces that minimizes:
\sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
log : dict
- convergence information and loss
+ Convergence information and loss.
References
----------
@@ -355,31 +346,37 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs
def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, log=False, **kwargs):
"""
Computes the FGW transport between two graphs see [24]
+
.. math::
- \gamma = arg\min_\gamma (1-\alpha)*<\gamma,M>_F + alpha* \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ \gamma = arg\min_\gamma (1-\\alpha)*<\gamma,M>_F + \\alpha* \sum_{i,j,k,l}
+ L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+
s.t. \gamma 1 = p
\gamma^T 1= q
\gamma\geq 0
+
where :
- M is the (ns,nt) metric cost matrix
- :math:`f` is the regularization term ( and df is its gradient)
- a and b are source and target weights (sum to 1)
- L is a loss function to account for the misfit between the similarity matrices
- The algorithm used for solving the problem is conditional gradient as discussed in [1]_
+
+ The algorithm used for solving the problem is conditional gradient as discussed in [24]_
+
Parameters
----------
- M : ndarray, shape (ns, nt)
- Metric cost matrix between features across domains
+ M : ndarray, shape (ns, nt)
+ Metric cost matrix between features across domains
C1 : ndarray, shape (ns, ns)
- Metric cost matrix respresentative of the structure in the source space
+ Metric cost matrix representative of the structure in the source space
C2 : ndarray, shape (nt, nt)
- Metric cost matrix espresentative of the structure in the target space
- p : ndarray, shape (ns,)
- distribution in the source space
- q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string,optional
- loss function used for the solver
+ Metric cost matrix representative of the structure in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space
+ q : ndarray, shape (nt,)
+ Distribution in the target space
+ loss_fun : str, optional
+ Loss function used for the solver
max_iter : int, optional
Max number of iterations
tol : float, optional
@@ -392,19 +389,21 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5,
If True the steps of the line-search is found via an armijo research. Else closed form is used.
If there is convergence issues use False.
**kwargs : dict
- parameters can be directly pased to the ot.optim.cg solver
+ parameters can be directly passed to the ot.optim.cg solver
+
Returns
-------
- gamma : (ns x nt) ndarray
- Optimal transportation matrix for the given parameters
+ gamma : ndarray, shape (ns, nt)
+ Optimal transportation matrix for the given parameters.
log : dict
- log dictionary return only if log==True in parameters
+ Log dictionary return only if log==True in parameters.
+
References
----------
.. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
- and Courty Nicolas
- "Optimal Transport for structured data with application on graphs"
- International Conference on Machine Learning (ICML). 2019.
+ and Courty Nicolas "Optimal Transport for structured data with
+ application on graphs", International Conference on Machine Learning
+ (ICML). 2019.
"""
constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
@@ -428,31 +427,37 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5,
def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, log=False, **kwargs):
"""
Computes the FGW distance between two graphs see [24]
+
.. math::
- \gamma = arg\min_\gamma (1-\alpha)*<\gamma,M>_F + alpha* \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ \min_\gamma (1-\\alpha)*<\gamma,M>_F + \\alpha* \sum_{i,j,k,l}
+ L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+
+
s.t. \gamma 1 = p
\gamma^T 1= q
\gamma\geq 0
+
where :
- M is the (ns,nt) metric cost matrix
- :math:`f` is the regularization term ( and df is its gradient)
- a and b are source and target weights (sum to 1)
- L is a loss function to account for the misfit between the similarity matrices
The algorithm used for solving the problem is conditional gradient as discussed in [1]_
+
Parameters
----------
- M : ndarray, shape (ns, nt)
- Metric cost matrix between features across domains
+ M : ndarray, shape (ns, nt)
+ Metric cost matrix between features across domains
C1 : ndarray, shape (ns, ns)
- Metric cost matrix respresentative of the structure in the source space
+ Metric cost matrix respresentative of the structure in the source space.
C2 : ndarray, shape (nt, nt)
- Metric cost matrix espresentative of the structure in the target space
+ Metric cost matrix espresentative of the structure in the target space.
p : ndarray, shape (ns,)
- distribution in the source space
+ Distribution in the source space.
q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string,optional
- loss function used for the solver
+ Distribution in the target space.
+ loss_fun : str, optional
+ Loss function used for the solver.
max_iter : int, optional
Max number of iterations
tol : float, optional
@@ -460,22 +465,24 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
+ Record log if True.
armijo : bool, optional
- If True the steps of the line-search is found via an armijo research. Else closed form is used.
- If there is convergence issues use False.
+ If True the steps of the line-search is found via an armijo research.
+ Else closed form is used. If there is convergence issues use False.
**kwargs : dict
- parameters can be directly pased to the ot.optim.cg solver
+ Parameters can be directly pased to the ot.optim.cg solver.
+
Returns
-------
- gamma : (ns x nt) ndarray
- Optimal transportation matrix for the given parameters
+ gamma : ndarray, shape (ns, nt)
+ Optimal transportation matrix for the given parameters.
log : dict
- log dictionary return only if log==True in parameters
+ Log dictionary return only if log==True in parameters.
+
References
----------
.. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
- and Courty Nicolas
+ and Courty Nicolas
"Optimal Transport for structured data with application on graphs"
International Conference on Machine Learning (ICML). 2019.
"""
@@ -506,29 +513,28 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwarg
The function solves the following optimization problem:
.. math::
- \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+ - H : entropy
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- p : ndarray, shape (ns,)
- distribution in the source space
+ Metric cost matrix in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space.
q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string
+ Distribution in the target space.
+ loss_fun : str
loss function used for the solver either 'square_loss' or 'kl_loss'
-
max_iter : int, optional
Max number of iterations
tol : float, optional
@@ -540,6 +546,7 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwarg
armijo : bool, optional
If True the steps of the line-search is found via an armijo research. Else closed form is used.
If there is convergence issues use False.
+
Returns
-------
gw_dist : float
@@ -587,56 +594,55 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
The function solves the following optimization problem:
.. math::
- \GW = arg\min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+ GW = arg\min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
- s.t. \GW 1 = p
+ s.t. T 1 = p
- \GW^T 1= q
+ T^T 1= q
- \GW\geq 0
+ T\geq 0
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+ - H : entropy
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
+ Metric costfr matrix in the target space
p : ndarray, shape (ns,)
- distribution in the source space
+ Distribution in the source space
q : ndarray, shape (nt,)
- distribution in the target space
+ Distribution in the target space
loss_fun : string
- loss function used for the solver either 'square_loss' or 'kl_loss'
+ Loss function used for the solver either 'square_loss' or 'kl_loss'
epsilon : float
Regularization term >0
max_iter : int, optional
- Max number of iterations
+ Max number of iterations
tol : float, optional
Stop threshold on error (>0)
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
+ Record log if True.
Returns
-------
T : ndarray, shape (ns, nt)
- coupling between the two spaces that minimizes :
- \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+ Optimal coupling between the two spaces
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
@@ -695,28 +701,28 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
The function solves the following optimization problem:
.. math::
- \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+ GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+ - H : entropy
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
+ Metric costfr matrix in the target space
p : ndarray, shape (ns,)
- distribution in the source space
+ Distribution in the source space
q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string
- loss function used for the solver either 'square_loss' or 'kl_loss'
+ Distribution in the target space
+ loss_fun : str
+ Loss function used for the solver either 'square_loss' or 'kl_loss'
epsilon : float
Regularization term >0
max_iter : int, optional
@@ -726,7 +732,7 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
+ Record log if True.
Returns
-------
@@ -736,11 +742,10 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
-
gw, logv = entropic_gromov_wasserstein(
C1, C2, p, q, loss_fun, epsilon, max_iter, tol, verbose, log=True)
@@ -762,29 +767,31 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
The function solves the following optimization problem:
.. math::
- C = argmin_C\in R^{NxN} \sum_s \lambda_s GW(C,Cs,p,ps)
+ C = argmin_{C\in R^{NxN}} \sum_s \lambda_s GW(C,C_s,p,p_s)
Where :
- Cs : metric cost matrix
- ps : distribution
+ - :math:`C_s` : metric cost matrix
+ - :math:`p_s` : distribution
Parameters
----------
- N : Integer
- Size of the targeted barycenter
- Cs : list of S np.ndarray(ns,ns)
- Metric cost matrices
- ps : list of S np.ndarray(ns,)
- sample weights in the S spaces
- p : ndarray, shape(N,)
- weights in the targeted barycenter
+ N : int
+ Size of the targeted barycenter
+ Cs : list of S np.ndarray of shape (ns,ns)
+ Metric cost matrices
+ ps : list of S np.ndarray of shape (ns,)
+ Sample weights in the S spaces
+ p : ndarray, shape(N,)
+ Weights in the targeted barycenter
lambdas : list of float
- list of the S spaces' weights
- loss_fun : tensor-matrix multiplication function based on specific loss function
- update : function(p,lambdas,T,Cs) that updates C according to a specific Kernel
- with the S Ts couplings calculated at each iteration
+ List of the S spaces' weights.
+ loss_fun : callable
+ Tensor-matrix multiplication function based on specific loss function.
+ update : callable
+ function(p,lambdas,T,Cs) that updates C according to a specific Kernel
+ with the S Ts couplings calculated at each iteration
epsilon : float
Regularization term >0
max_iter : int, optional
@@ -792,11 +799,11 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
tol : float, optional
Stop threshol on error (>0)
verbose : bool, optional
- Print information along iterations
+ Print information along iterations.
log : bool, optional
- record log if True
- init_C : bool, ndarray, shape(N,N)
- random initial value for the C matrix provided by user
+ Record log if True.
+ init_C : bool | ndarray, shape (N, N)
+ Random initial value for the C matrix provided by user.
Returns
-------
@@ -806,9 +813,8 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
-
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
S = len(Cs)
@@ -818,6 +824,7 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
# Initialization of C : random SPD matrix (if not provided by user)
if init_C is None:
+ # XXX use random state
xalea = np.random.randn(N, 2)
C = dist(xalea, xalea)
C /= C.max()
@@ -829,7 +836,7 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
error = []
- while(err > tol and cpt < max_iter):
+ while (err > tol) and (cpt < max_iter):
Cprev = C
T = [entropic_gromov_wasserstein(Cs[s], C, ps[s], p, loss_fun, epsilon,
@@ -873,37 +880,36 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
.. math::
C = argmin_C\in R^NxN \sum_s \lambda_s GW(C,Cs,p,ps)
-
Where :
- Cs : metric cost matrix
- ps : distribution
+ - Cs : metric cost matrix
+ - ps : distribution
Parameters
----------
- N : Integer
- Size of the targeted barycenter
- Cs : list of S np.ndarray(ns,ns)
- Metric cost matrices
- ps : list of S np.ndarray(ns,)
- sample weights in the S spaces
- p : ndarray, shape(N,)
- weights in the targeted barycenter
+ N : int
+ Size of the targeted barycenter
+ Cs : list of S np.ndarray of shape (ns, ns)
+ Metric cost matrices
+ ps : list of S np.ndarray of shape (ns,)
+ Sample weights in the S spaces
+ p : ndarray, shape (N,)
+ Weights in the targeted barycenter
lambdas : list of float
- list of the S spaces' weights
+ List of the S spaces' weights
loss_fun : tensor-matrix multiplication function based on specific loss function
update : function(p,lambdas,T,Cs) that updates C according to a specific Kernel
with the S Ts couplings calculated at each iteration
max_iter : int, optional
Max number of iterations
tol : float, optional
- Stop threshol on error (>0)
+ Stop threshol on error (>0).
verbose : bool, optional
- Print information along iterations
+ Print information along iterations.
log : bool, optional
- record log if True
- init_C : bool, ndarray, shape(N,N)
- random initial value for the C matrix provided by user
+ Record log if True.
+ init_C : bool | ndarray, shape(N,N)
+ Random initial value for the C matrix provided by user.
Returns
-------
@@ -913,11 +919,10 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
-
S = len(Cs)
Cs = [np.asarray(Cs[s], dtype=np.float64) for s in range(S)]
@@ -925,6 +930,7 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
# Initialization of C : random SPD matrix (if not provided by user)
if init_C is None:
+ # XXX : should use a random state and not use the global seed
xalea = np.random.randn(N, 2)
C = dist(xalea, xalea)
C /= C.max()
@@ -970,47 +976,52 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_features=False,
p=None, loss_fun='square_loss', max_iter=100, tol=1e-9,
verbose=False, log=False, init_C=None, init_X=None):
- """
- Compute the fgw barycenter as presented eq (5) in [24].
+ """Compute the fgw barycenter as presented eq (5) in [24].
+
+ Parameters
----------
N : integer
Desired number of samples of the target barycenter
Ys: list of ndarray, each element has shape (ns,d)
Features of all samples
Cs : list of ndarray, each element has shape (ns,ns)
- Structure matrices of all samples
+ Structure matrices of all samples
ps : list of ndarray, each element has shape (ns,)
- masses of all samples
+ Masses of all samples.
lambdas : list of float
- list of the S spaces' weights
+ List of the S spaces' weights
alpha : float
- Alpha parameter for the fgw distance
- fixed_structure : bool
- Wether to fix the structure of the barycenter during the updates
- fixed_features : bool
- Wether to fix the feature of the barycenter during the updates
- init_C : ndarray, shape (N,N), optional
- initialization for the barycenters' structure matrix. If not set random init
- init_X : ndarray, shape (N,d), optional
- initialization for the barycenters' features. If not set random init
+ Alpha parameter for the fgw distance
+ fixed_structure : bool
+ Whether to fix the structure of the barycenter during the updates
+ fixed_features : bool
+ Whether to fix the feature of the barycenter during the updates
+ init_C : ndarray, shape (N,N), optional
+ Initialization for the barycenters' structure matrix. If not set
+ a random init is used.
+ init_X : ndarray, shape (N,d), optional
+ Initialization for the barycenters' features. If not set a
+ random init is used.
+
Returns
- ----------
- X : ndarray, shape (N,d)
+ -------
+ X : ndarray, shape (N, d)
Barycenters' features
- C : ndarray, shape (N,N)
+ C : ndarray, shape (N, N)
Barycenters' structure matrix
- log_: dictionary
- Only returned when log=True
+ log_: dict
+ Only returned when log=True. It contains the keys:
T : list of (N,ns) transport matrices
- Ms : all distance matrices between the feature of the barycenter and the other features dist(X,Ys) shape (N,ns)
+ Ms : all distance matrices between the feature of the barycenter and the
+ other features dist(X,Ys) shape (N,ns)
+
References
----------
.. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
- and Courty Nicolas
+ and Courty Nicolas
"Optimal Transport for structured data with application on graphs"
International Conference on Machine Learning (ICML). 2019.
"""
-
S = len(Cs)
d = Ys[0].shape[1] # dimension on the node features
if p is None:
@@ -1073,7 +1084,8 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_
T_temp = [t.T for t in T]
C = update_sructure_matrix(p, lambdas, T_temp, Cs)
- T = [fused_gromov_wasserstein((1 - alpha) * Ms[s], C, Cs[s], p, ps[s], loss_fun, alpha, numItermax=max_iter, stopThr=1e-5, verbose=verbose) for s in range(S)]
+ T = [fused_gromov_wasserstein((1 - alpha) * Ms[s], C, Cs[s], p, ps[s], loss_fun, alpha,
+ numItermax=max_iter, stopThr=1e-5, verbose=verbose) for s in range(S)]
# T is N,ns
err_feature = np.linalg.norm(X - Xprev.reshape(N, d))
@@ -1092,6 +1104,7 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_
print('{:5d}|{:8e}|'.format(cpt, err_feature))
cpt += 1
+
if log:
log_['T'] = T # from target to Ys
log_['p'] = p
@@ -1104,23 +1117,25 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_
def update_sructure_matrix(p, lambdas, T, Cs):
- """
- Updates C according to the L2 Loss kernel with the S Ts couplings
- calculated at each iteration
+ """Updates C according to the L2 Loss kernel with the S Ts couplings.
+
+ It is calculated at each iteration
+
Parameters
----------
- p : ndarray, shape (N,)
- masses in the targeted barycenter
+ p : ndarray, shape (N,)
+ Masses in the targeted barycenter.
lambdas : list of float
- list of the S spaces' weights
- T : list of S np.ndarray(ns,N)
- the S Ts couplings calculated at each iteration
- Cs : list of S ndarray, shape(ns,ns)
- Metric cost matrices
+ List of the S spaces' weights.
+ T : list of S ndarray of shape (ns, N)
+ The S Ts couplings calculated at each iteration.
+ Cs : list of S ndarray, shape (ns, ns)
+ Metric cost matrices.
+
Returns
- ----------
- C : ndarray, shape (nt,nt)
- updated C matrix
+ -------
+ C : ndarray, shape (nt, nt)
+ Updated C matrix.
"""
tmpsum = sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s]) for s in range(len(T))])
ppt = np.outer(p, p)
@@ -1129,22 +1144,26 @@ def update_sructure_matrix(p, lambdas, T, Cs):
def update_feature_matrix(lambdas, Ys, Ts, p):
- """
- Updates the feature with respect to the S Ts couplings. See "Solving the barycenter problem with Block Coordinate Descent (BCD)" in [24]
- calculated at each iteration
+ """Updates the feature with respect to the S Ts couplings.
+
+
+ See "Solving the barycenter problem with Block Coordinate Descent (BCD)"
+ in [24] calculated at each iteration
+
Parameters
----------
- p : ndarray, shape (N,)
- masses in the targeted barycenter
+ p : ndarray, shape (N,)
+ masses in the targeted barycenter
lambdas : list of float
- list of the S spaces' weights
+ List of the S spaces' weights
Ts : list of S np.ndarray(ns,N)
the S Ts couplings calculated at each iteration
Ys : list of S ndarray, shape(d,ns)
- The features
+ The features.
+
Returns
- ----------
- X : ndarray, shape (d,N)
+ -------
+ X : ndarray, shape (d, N)
References
----------
@@ -1153,9 +1172,8 @@ def update_feature_matrix(lambdas, Ys, Ts, p):
"Optimal Transport for structured data with application on graphs"
International Conference on Machine Learning (ICML). 2019.
"""
+ p = np.array(1. / p).reshape(-1,)
- p = np.diag(np.array(1 / p).reshape(-1,))
-
- tmpsum = sum([lambdas[s] * np.dot(Ys[s], Ts[s].T).dot(p) for s in range(len(Ts))])
+ tmpsum = sum([lambdas[s] * np.dot(Ys[s], Ts[s].T) * p[None, :] for s in range(len(Ts))])
return tmpsum