summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorncassereau-idris <84033440+ncassereau-idris@users.noreply.github.com>2021-11-02 13:42:02 +0100
committerGitHub <noreply@github.com>2021-11-02 13:42:02 +0100
commita335324d008e8982be61d7ace937815a2bfa98f9 (patch)
tree83c7f637597f10f6f3d20b15532e53fc65b51f22
parent0cb2b2efe901ed74c614046d250518769f870313 (diff)
[MRG] Backend for gromov (#294)
* bregman: small correction * gromov backend first draft * Removing decorators * Reworked casting method * Bug solve * Removing casting * Bug solve * toarray renamed todense ; expand_dims removed * Warning (jax not supporting sparse matrix) moved * Mistake corrected * test backend * Sparsity test for older versions of pytorch * Trying pytorch/1.10 * Attempt to correct torch sparse bug * Backend version of gromov tests * Random state introduced for remaining gromov functions * review changes * code coverage * Docs (first draft, to be continued) * Gromov docs * Prettified docs * mistake corrected in the docs * little change Co-authored-by: Rémi Flamary <remi.flamary@gmail.com>
-rw-r--r--ot/backend.py214
-rw-r--r--ot/bregman.py184
-rw-r--r--ot/gromov.py1220
-rw-r--r--ot/lp/__init__.py58
-rw-r--r--ot/optim.py22
-rw-r--r--test/test_backend.py56
-rw-r--r--test/test_bregman.py4
-rw-r--r--test/test_gromov.py297
8 files changed, 1289 insertions, 766 deletions
diff --git a/ot/backend.py b/ot/backend.py
index 876b96a..358297c 100644
--- a/ot/backend.py
+++ b/ot/backend.py
@@ -26,6 +26,7 @@ Examples
import numpy as np
import scipy.special as scipy
+from scipy.sparse import issparse, coo_matrix, csr_matrix
try:
import torch
@@ -539,6 +540,86 @@ class Backend():
"""
raise NotImplementedError()
+ def coo_matrix(self, data, rows, cols, shape=None, type_as=None):
+ r"""
+ Creates a sparse tensor in COOrdinate format.
+
+ This function follows the api from :any:`scipy.sparse.coo_matrix`
+
+ See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.coo_matrix.html
+ """
+ raise NotImplementedError()
+
+ def issparse(self, a):
+ r"""
+ Checks whether or not the input tensor is a sparse tensor.
+
+ This function follows the api from :any:`scipy.sparse.issparse`
+
+ See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.issparse.html
+ """
+ raise NotImplementedError()
+
+ def tocsr(self, a):
+ r"""
+ Converts this matrix to Compressed Sparse Row format.
+
+ This function follows the api from :any:`scipy.sparse.coo_matrix.tocsr`
+
+ See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.coo_matrix.tocsr.html
+ """
+ raise NotImplementedError()
+
+ def eliminate_zeros(self, a, threshold=0.):
+ r"""
+ Removes entries smaller than the given threshold from the sparse tensor.
+
+ This function follows the api from :any:`scipy.sparse.csr_matrix.eliminate_zeros`
+
+ See: https://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.sparse.csr_matrix.eliminate_zeros.html
+ """
+ raise NotImplementedError()
+
+ def todense(self, a):
+ r"""
+ Converts a sparse tensor to a dense tensor.
+
+ This function follows the api from :any:`scipy.sparse.csr_matrix.toarray`
+
+ See: https://docs.scipy.org/doc/scipy/reference/generated/scipy.sparse.csr_matrix.toarray.html
+ """
+ raise NotImplementedError()
+
+ def where(self, condition, x, y):
+ r"""
+ Returns elements chosen from x or y depending on condition.
+
+ This function follows the api from :any:`numpy.where`
+
+ See: https://numpy.org/doc/stable/reference/generated/numpy.where.html
+ """
+ raise NotImplementedError()
+
+ def copy(self, a):
+ r"""
+ Returns a copy of the given tensor.
+
+ This function follows the api from :any:`numpy.copy`
+
+ See: https://numpy.org/doc/stable/reference/generated/numpy.copy.html
+ """
+ raise NotImplementedError()
+
+ def allclose(self, a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
+ r"""
+ Returns True if two arrays are element-wise equal within a tolerance.
+
+ This function follows the api from :any:`numpy.allclose`
+
+ See: https://numpy.org/doc/stable/reference/generated/numpy.allclose.html
+ """
+ raise NotImplementedError()
+
class NumpyBackend(Backend):
"""
@@ -712,6 +793,46 @@ class NumpyBackend(Backend):
def reshape(self, a, shape):
return np.reshape(a, shape)
+ def coo_matrix(self, data, rows, cols, shape=None, type_as=None):
+ if type_as is None:
+ return coo_matrix((data, (rows, cols)), shape=shape)
+ else:
+ return coo_matrix((data, (rows, cols)), shape=shape, dtype=type_as.dtype)
+
+ def issparse(self, a):
+ return issparse(a)
+
+ def tocsr(self, a):
+ if self.issparse(a):
+ return a.tocsr()
+ else:
+ return csr_matrix(a)
+
+ def eliminate_zeros(self, a, threshold=0.):
+ if threshold > 0:
+ if self.issparse(a):
+ a.data[self.abs(a.data) <= threshold] = 0
+ else:
+ a[self.abs(a) <= threshold] = 0
+ if self.issparse(a):
+ a.eliminate_zeros()
+ return a
+
+ def todense(self, a):
+ if self.issparse(a):
+ return a.toarray()
+ else:
+ return a
+
+ def where(self, condition, x, y):
+ return np.where(condition, x, y)
+
+ def copy(self, a):
+ return a.copy()
+
+ def allclose(self, a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
+ return np.allclose(a, b, rtol=rtol, atol=atol, equal_nan=equal_nan)
+
class JaxBackend(Backend):
"""
@@ -889,6 +1010,48 @@ class JaxBackend(Backend):
def reshape(self, a, shape):
return jnp.reshape(a, shape)
+ def coo_matrix(self, data, rows, cols, shape=None, type_as=None):
+ # Currently, JAX does not support sparse matrices
+ data = self.to_numpy(data)
+ rows = self.to_numpy(rows)
+ cols = self.to_numpy(cols)
+ nx = NumpyBackend()
+ coo_matrix = nx.coo_matrix(data, rows, cols, shape=shape, type_as=type_as)
+ matrix = nx.todense(coo_matrix)
+ return self.from_numpy(matrix)
+
+ def issparse(self, a):
+ # Currently, JAX does not support sparse matrices
+ return False
+
+ def tocsr(self, a):
+ # Currently, JAX does not support sparse matrices
+ return a
+
+ def eliminate_zeros(self, a, threshold=0.):
+ # Currently, JAX does not support sparse matrices
+ if threshold > 0:
+ return self.where(
+ self.abs(a) <= threshold,
+ self.zeros((1,), type_as=a),
+ a
+ )
+ return a
+
+ def todense(self, a):
+ # Currently, JAX does not support sparse matrices
+ return a
+
+ def where(self, condition, x, y):
+ return jnp.where(condition, x, y)
+
+ def copy(self, a):
+ # No need to copy, JAX arrays are immutable
+ return a
+
+ def allclose(self, a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
+ return jnp.allclose(a, b, rtol=rtol, atol=atol, equal_nan=equal_nan)
+
class TorchBackend(Backend):
"""
@@ -999,7 +1162,7 @@ class TorchBackend(Backend):
a = torch.tensor([float(a)], dtype=b.dtype, device=b.device)
if isinstance(b, int) or isinstance(b, float):
b = torch.tensor([float(b)], dtype=a.dtype, device=a.device)
- if torch.__version__ >= '1.7.0':
+ if hasattr(torch, "maximum"):
return torch.maximum(a, b)
else:
return torch.max(torch.stack(torch.broadcast_tensors(a, b)), axis=0)[0]
@@ -1009,7 +1172,7 @@ class TorchBackend(Backend):
a = torch.tensor([float(a)], dtype=b.dtype, device=b.device)
if isinstance(b, int) or isinstance(b, float):
b = torch.tensor([float(b)], dtype=a.dtype, device=a.device)
- if torch.__version__ >= '1.7.0':
+ if hasattr(torch, "minimum"):
return torch.minimum(a, b)
else:
return torch.min(torch.stack(torch.broadcast_tensors(a, b)), axis=0)[0]
@@ -1129,3 +1292,50 @@ class TorchBackend(Backend):
def reshape(self, a, shape):
return torch.reshape(a, shape)
+
+ def coo_matrix(self, data, rows, cols, shape=None, type_as=None):
+ if type_as is None:
+ return torch.sparse_coo_tensor(torch.stack([rows, cols]), data, size=shape)
+ else:
+ return torch.sparse_coo_tensor(
+ torch.stack([rows, cols]), data, size=shape,
+ dtype=type_as.dtype, device=type_as.device
+ )
+
+ def issparse(self, a):
+ return getattr(a, "is_sparse", False) or getattr(a, "is_sparse_csr", False)
+
+ def tocsr(self, a):
+ # Versions older than 1.9 do not support CSR tensors. PyTorch 1.9 and 1.10 offer a very limited support
+ return self.todense(a)
+
+ def eliminate_zeros(self, a, threshold=0.):
+ if self.issparse(a):
+ if threshold > 0:
+ mask = self.abs(a) <= threshold
+ mask = ~mask
+ mask = mask.nonzero()
+ else:
+ mask = a._values().nonzero()
+ nv = a._values().index_select(0, mask.view(-1))
+ ni = a._indices().index_select(1, mask.view(-1))
+ return self.coo_matrix(nv, ni[0], ni[1], shape=a.shape, type_as=a)
+ else:
+ if threshold > 0:
+ a[self.abs(a) <= threshold] = 0
+ return a
+
+ def todense(self, a):
+ if self.issparse(a):
+ return a.to_dense()
+ else:
+ return a
+
+ def where(self, condition, x, y):
+ return torch.where(condition, x, y)
+
+ def copy(self, a):
+ return torch.clone(a)
+
+ def allclose(self, a, b, rtol=1e-05, atol=1e-08, equal_nan=False):
+ return torch.allclose(a, b, rtol=rtol, atol=atol, equal_nan=equal_nan)
diff --git a/ot/bregman.py b/ot/bregman.py
index 2aa76ff..0499b8e 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -32,13 +32,14 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg}\cdot\Omega(\gamma)
- s.t. \ \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} &= \mathbf{a}
- \gamma^T 1= b
+ \gamma^T \mathbf{1} &= \mathbf{b}
+
+ \gamma &\geq 0
- \gamma\geq 0
where :
- :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
@@ -167,13 +168,14 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
The function solves the following optimization problem:
.. math::
- W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ W = \min_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg}\cdot\Omega(\gamma)
+
+ s.t. \ \gamma \mathbf{1} &= \mathbf{a}
- s.t. \ \gamma 1 = a
+ \gamma^T \mathbf{1} &= \mathbf{b}
- \gamma^T 1= b
+ \gamma &\geq 0
- \gamma\geq 0
where :
- :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
@@ -323,13 +325,13 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg}\cdot\Omega(\gamma)
- s.t. \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} &= \mathbf{a}
- \gamma^T 1= b
+ \gamma^T \mathbf{1} &= \mathbf{b}
- \gamma\geq 0
+ \gamma &\geq 0
where :
- :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
@@ -489,13 +491,13 @@ def sinkhorn_log(a, b, M, reg, numItermax=1000,
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg}\cdot\Omega(\gamma)
- s.t. \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} &= \mathbf{a}
- \gamma^T 1= b
+ \gamma^T \mathbf{1} &= \mathbf{b}
- \gamma\geq 0
+ \gamma &\geq 0
where :
- :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
@@ -550,8 +552,7 @@ def sinkhorn_log(a, b, M, reg, numItermax=1000,
References
----------
- .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal
- Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+ .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
.. [34] Feydy, J., Séjourné, T., Vialard, F. X., Amari, S. I., Trouvé, A., & Peyré, G. (2019, April). Interpolating between optimal transport and MMD using Sinkhorn divergences. In The 22nd International Conference on Artificial Intelligence and Statistics (pp. 2681-2690). PMLR.
@@ -675,13 +676,13 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False,
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg}\cdot\Omega(\gamma)
- s.t. \ \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} &= \mathbf{a}
- \gamma^T 1= b
+ \gamma^T \mathbf{1} &= \mathbf{b}
- \gamma\geq 0
+ \gamma &\geq 0
where :
- :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
@@ -820,13 +821,13 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg}\cdot\Omega(\gamma)
- s.t. \ \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} &= \mathbf{a}
- \gamma^T 1= b
+ \gamma^T \mathbf{1} &= \mathbf{b}
- \gamma\geq 0
+ \gamma &\geq 0
where :
- :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
@@ -965,7 +966,7 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
# remove numerical problems and store them in K
if nx.max(nx.abs(u)) > tau or nx.max(nx.abs(v)) > tau:
if n_hists:
- alpha, beta = alpha + reg * nx.max(nx.log(u), 1), beta + reg * nx.max(np.log(v))
+ alpha, beta = alpha + reg * nx.max(nx.log(u), 1), beta + reg * nx.max(nx.log(v))
else:
alpha, beta = alpha + reg * nx.log(u), beta + reg * nx.log(v)
if n_hists:
@@ -1055,13 +1056,13 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4,
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg}\cdot\Omega(\gamma)
- s.t. \ \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} &= \mathbf{a}
- \gamma^T 1= b
+ \gamma^T \mathbf{1} &= \mathbf{b}
- \gamma\geq 0
+ \gamma &\geq 0
where :
- :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
@@ -1245,12 +1246,12 @@ def projC(gamma, q):
def barycenter(A, M, reg, weights=None, method="sinkhorn", numItermax=10000,
stopThr=1e-4, verbose=False, log=False, **kwargs):
- r"""Compute the entropic regularized wasserstein barycenter of distributions A
+ r"""Compute the entropic regularized wasserstein barycenter of distributions :math:`\mathbf{A}`
The function solves the following optimization problem:
.. math::
- \mathbf{a} = arg\min_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
+ \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
where :
@@ -1263,7 +1264,7 @@ def barycenter(A, M, reg, weights=None, method="sinkhorn", numItermax=10000,
Parameters
----------
A : array-like, shape (dim, n_hists)
- `n_hists` training distributions :math:`a_i` of size `dim`
+ `n_hists` training distributions :math:`\mathbf{a}_i` of size `dim`
M : array-like, shape (dim, dim)
loss matrix for OT
reg : float
@@ -1271,7 +1272,7 @@ def barycenter(A, M, reg, weights=None, method="sinkhorn", numItermax=10000,
method : str (optional)
method used for the solver either 'sinkhorn' or 'sinkhorn_stabilized'
weights : array-like, shape (n_hists,)
- Weights of each histogram :math:`a_i` on the simplex (barycentric coodinates)
+ Weights of each histogram :math:`\mathbf{a}_i` on the simplex (barycentric coodinates)
numItermax : int, optional
Max number of iterations
stopThr : float, optional
@@ -1314,12 +1315,12 @@ def barycenter(A, M, reg, weights=None, method="sinkhorn", numItermax=10000,
def barycenter_sinkhorn(A, M, reg, weights=None, numItermax=1000,
stopThr=1e-4, verbose=False, log=False):
- r"""Compute the entropic regularized wasserstein barycenter of distributions A
+ r"""Compute the entropic regularized wasserstein barycenter of distributions :math:`\mathbf{A}`
The function solves the following optimization problem:
.. math::
- \mathbf{a} = arg\min_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
+ \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
where :
@@ -1332,13 +1333,13 @@ def barycenter_sinkhorn(A, M, reg, weights=None, numItermax=1000,
Parameters
----------
A : array-like, shape (dim, n_hists)
- `n_hists` training distributions :math:`a_i` of size `dim`
+ `n_hists` training distributions :math:`\mathbf{a}_i` of size `dim`
M : array-like, shape (dim, dim)
loss matrix for OT
reg : float
Regularization term > 0
weights : array-like, shape (n_hists,)
- Weights of each histogram :math:`a_i` on the simplex (barycentric coodinates)
+ Weights of each histogram :math:`\mathbf{a}_i` on the simplex (barycentric coodinates)
numItermax : int, optional
Max number of iterations
stopThr : float, optional
@@ -1414,12 +1415,12 @@ def barycenter_sinkhorn(A, M, reg, weights=None, numItermax=1000,
def barycenter_stabilized(A, M, reg, tau=1e10, weights=None, numItermax=1000,
stopThr=1e-4, verbose=False, log=False):
- r"""Compute the entropic regularized wasserstein barycenter of distributions A with stabilization.
+ r"""Compute the entropic regularized wasserstein barycenter of distributions :math:`\mathbf{A}` with stabilization.
The function solves the following optimization problem:
.. math::
- \mathbf{a} = arg\min_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
+ \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
where :
@@ -1432,7 +1433,7 @@ def barycenter_stabilized(A, M, reg, tau=1e10, weights=None, numItermax=1000,
Parameters
----------
A : array-like, shape (dim, n_hists)
- `n_hists` training distributions :math:`a_i` of size `dim`
+ `n_hists` training distributions :math:`\mathbf{a}_i` of size `dim`
M : array-like, shape (dim, dim)
loss matrix for OT
reg : float
@@ -1440,7 +1441,7 @@ def barycenter_stabilized(A, M, reg, tau=1e10, weights=None, numItermax=1000,
tau : float
threshold for max value in :math:`\mathbf{u}` or :math:`\mathbf{v}` for log scaling
weights : array-like, shape (n_hists,)
- Weights of each histogram :math:`a_i` on the simplex (barycentric coodinates)
+ Weights of each histogram :math:`\mathbf{a}_i` on the simplex (barycentric coodinates)
numItermax : int, optional
Max number of iterations
stopThr : float, optional
@@ -1533,8 +1534,8 @@ def barycenter_stabilized(A, M, reg, tau=1e10, weights=None, numItermax=1000,
"Or a larger absorption threshold `tau`.")
if log:
log['niter'] = cpt
- log['logu'] = np.log(u + 1e-16)
- log['logv'] = np.log(v + 1e-16)
+ log['logu'] = nx.log(u + 1e-16)
+ log['logv'] = nx.log(v + 1e-16)
return q, log
else:
return q
@@ -1543,13 +1544,13 @@ def barycenter_stabilized(A, M, reg, tau=1e10, weights=None, numItermax=1000,
def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000,
stopThr=1e-9, stabThr=1e-30, verbose=False,
log=False):
- r"""Compute the entropic regularized wasserstein barycenter of distributions A
- where A is a collection of 2D images.
+ r"""Compute the entropic regularized wasserstein barycenter of distributions :math:`\mathbf{A}`
+ where :math:`\mathbf{A}` is a collection of 2D images.
The function solves the following optimization problem:
.. math::
- \mathbf{a} = arg\min_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
+ \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
where :
@@ -1673,12 +1674,12 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
.. math::
- \mathbf{h} = arg\min_\mathbf{h} (1- \alpha) W_{M,reg}(\mathbf{a},\mathbf{Dh})+\alpha W_{M_0,reg_0}(\mathbf{h}_0,\mathbf{h})
+ \mathbf{h} = \mathop{\arg \min}_\mathbf{h} (1- \alpha) W_{\mathbf{M}, \mathrm{reg}}(\mathbf{a},\mathbf{Dh})+\alpha W_{\mathbf{M_0},\mathrm{reg}_0}(\mathbf{h}_0,\mathbf{h})
where :
- - :math:`W_{M,reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance with M loss matrix (see :py:func:`ot.bregman.sinkhorn`)
+ - :math:`W_{M,reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance with :math:`\mathbf{M}` loss matrix (see :py:func:`ot.bregman.sinkhorn`)
- :math:`\mathbf{D}` is a dictionary of `n_atoms` atoms of dimension `dim_a`, its expected shape is `(dim_a, n_atoms)`
- :math:`\mathbf{h}` is the estimated unmixing of dimension `n_atoms`
- :math:`\mathbf{a}` is an observed distribution of dimension `dim_a`
@@ -1790,7 +1791,7 @@ def jcpot_barycenter(Xs, Ys, Xt, reg, metric='sqeuclidean', numItermax=100,
.. math::
- \mathbf{h} = arg\min_{\mathbf{h}}\quad \sum_{k=1}^{K} \lambda_k
+ \mathbf{h} = \mathop{\arg \min}_{\mathbf{h}} \sum_{k=1}^{K} \lambda_k
W_{reg}((\mathbf{D}_2^{(k)} \mathbf{h})^T, \mathbf{a})
s.t. \ \forall k, \mathbf{D}_1^{(k)} \gamma_k \mathbf{1}_n= \mathbf{h}
@@ -1898,7 +1899,7 @@ def jcpot_barycenter(Xs, Ys, Xt, reg, metric='sqeuclidean', numItermax=100,
K.append(Ktmp)
# uniform target distribution
- a = nx.from_numpy(unif(np.shape(Xt)[0]))
+ a = nx.from_numpy(unif(Xt.shape[0]), type_as=Xs[0])
cpt = 0 # iterations count
err = 1
@@ -1956,13 +1957,13 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg} \cdot\Omega(\gamma)
- s.t. \ \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} &= a
- \gamma^T 1= b
+ \gamma^T \mathbf{1} &= b
- \gamma\geq 0
+ \gamma &\geq 0
where :
- :math:`\mathbf{M}` is the (`n_samples_a`, `n_samples_b`) metric cost matrix
@@ -2010,8 +2011,8 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
>>> n_samples_a = 2
>>> n_samples_b = 2
>>> reg = 0.1
- >>> X_s = np.reshape(np.arange(n_samples_a), (n_samples_a, 1))
- >>> X_t = np.reshape(np.arange(0, n_samples_b), (n_samples_b, 1))
+ >>> X_s = np.reshape(np.arange(n_samples_a, dtype=np.float64), (n_samples_a, 1))
+ >>> X_t = np.reshape(np.arange(0, n_samples_b, dtype=np.float64), (n_samples_b, 1))
>>> empirical_sinkhorn(X_s, X_t, reg=reg, verbose=False) # doctest: +NORMALIZE_WHITESPACE
array([[4.99977301e-01, 2.26989344e-05],
[2.26989344e-05, 4.99977301e-01]])
@@ -2033,9 +2034,9 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
ns, nt = X_s.shape[0], X_t.shape[0]
if a is None:
- a = nx.from_numpy(unif(ns))
+ a = nx.from_numpy(unif(ns), type_as=X_s)
if b is None:
- b = nx.from_numpy(unif(nt))
+ b = nx.from_numpy(unif(nt), type_as=X_s)
if isLazy:
if log:
@@ -2127,13 +2128,13 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', num
The function solves the following optimization problem:
.. math::
- W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ W = \min_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg} \cdot\Omega(\gamma)
- s.t. \ \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} &= a
- \gamma^T 1= b
+ \gamma^T \mathbf{1} &= b
- \gamma\geq 0
+ \gamma &\geq 0
where :
- :math:`\mathbf{M}` is the (`n_samples_a`, `n_samples_b`) metric cost matrix
@@ -2181,8 +2182,8 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', num
>>> n_samples_a = 2
>>> n_samples_b = 2
>>> reg = 0.1
- >>> X_s = np.reshape(np.arange(n_samples_a), (n_samples_a, 1))
- >>> X_t = np.reshape(np.arange(0, n_samples_b), (n_samples_b, 1))
+ >>> X_s = np.reshape(np.arange(n_samples_a, dtype=np.float64), (n_samples_a, 1))
+ >>> X_t = np.reshape(np.arange(0, n_samples_b, dtype=np.float64), (n_samples_b, 1))
>>> b = np.full((n_samples_b, 3), 1/n_samples_b)
>>> empirical_sinkhorn2(X_s, X_t, b=b, reg=reg, verbose=False)
array([4.53978687e-05, 4.53978687e-05, 4.53978687e-05])
@@ -2204,9 +2205,9 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', num
ns, nt = X_s.shape[0], X_t.shape[0]
if a is None:
- a = nx.from_numpy(unif(ns))
+ a = nx.from_numpy(unif(ns), type_as=X_s)
if b is None:
- b = nx.from_numpy(unif(nt))
+ b = nx.from_numpy(unif(nt), type_as=X_s)
if isLazy:
if log:
@@ -2259,32 +2260,32 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
.. math::
- W &= \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ W &= \min_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg} \cdot\Omega(\gamma)
- W_a &= \min_{\gamma_a} <\gamma_a,M_a>_F + reg\cdot\Omega(\gamma_a)
+ W_a &= \min_{\gamma_a} <\gamma_a, \mathbf{M_a}>_F + \mathrm{reg} \cdot\Omega(\gamma_a)
- W_b &= \min_{\gamma_b} <\gamma_b,M_b>_F + reg\cdot\Omega(\gamma_b)
+ W_b &= \min_{\gamma_b} <\gamma_b, \mathbf{M_b}>_F + \mathrm{reg} \cdot\Omega(\gamma_b)
S &= W - \frac{W_a + W_b}{2}
.. math::
- s.t. \ \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} &= a
- \gamma^T 1= b
+ \gamma^T \mathbf{1} &= b
- \gamma\geq 0
+ \gamma &\geq 0
- \gamma_a 1 = a
+ \gamma_a \mathbf{1} &= \mathbf{a}
- \gamma_a^T 1= a
+ \gamma_a^T \mathbf{1} &= \mathbf{a}
- \gamma_a\geq 0
+ \gamma_a &\geq 0
- \gamma_b 1 = b
+ \gamma_b \mathbf{1} &= \mathbf{b}
- \gamma_b^T 1= b
+ \gamma_b^T \mathbf{1} &= \mathbf{b}
- \gamma_b\geq 0
+ \gamma_b &\geq 0
where :
- :math:`\mathbf{M}` (resp. :math:`\mathbf{M_a}`, :math:`\mathbf{M_b}`) is the (`n_samples_a`, `n_samples_b`) metric cost matrix (resp (`n_samples_a, n_samples_a`) and (`n_samples_b`, `n_samples_b`))
@@ -2325,8 +2326,8 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
>>> n_samples_a = 2
>>> n_samples_b = 4
>>> reg = 0.1
- >>> X_s = np.reshape(np.arange(n_samples_a), (n_samples_a, 1))
- >>> X_t = np.reshape(np.arange(0, n_samples_b), (n_samples_b, 1))
+ >>> X_s = np.reshape(np.arange(n_samples_a, dtype=np.float64), (n_samples_a, 1))
+ >>> X_t = np.reshape(np.arange(0, n_samples_b, dtype=np.float64), (n_samples_b, 1))
>>> empirical_sinkhorn_divergence(X_s, X_t, reg) # doctest: +ELLIPSIS
1.499887176049052
@@ -2380,19 +2381,19 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, res
.. math::
- (u, v) = arg\min_{u, v} 1_{ns}^T B(u,v) 1_{nt} - <\kappa u, a> - <v/\kappa, b>
+ (\mathbf{u}, \mathbf{v}) = \mathop{\arg \min}_{\mathbf{u}, \mathbf{v}} \ \mathbf{1}_{ns}^T \mathbf{B}(\mathbf{u}, \mathbf{v}) \mathbf{1}_{nt} - <\kappa \mathbf{u}, \mathbf{a}> - <\mathbf{v} / \kappa, \mathbf{b}>
where:
.. math::
- B(u,v) = \mathrm{diag}(e^u) K \mathrm{diag}(e^v) \text{, with } K = e^{-M/reg} \text{ and}
+ \mathbf{B}(\mathbf{u}, \mathbf{v}) = \mathrm{diag}(e^\mathbf{u}) \mathbf{K} \mathrm{diag}(e^\mathbf{v}) \text{, with } \mathbf{K} = e^{-\mathbf{M} / \mathrm{reg}} \text{ and}
.. math::
- s.t. \ e^{u_i} \geq \epsilon / \kappa, \forall i \in \{1, \ldots, ns\}
+ s.t. \ e^{u_i} &\geq \epsilon / \kappa, \forall i \in \{1, \ldots, ns\}
- e^{v_j} \geq \epsilon \kappa, \forall j \in \{1, \ldots, nt\}
+ e^{v_j} &\geq \epsilon \kappa, \forall j \in \{1, \ldots, nt\}
The parameters `kappa` and `epsilon` are determined w.r.t the couple number budget of points (`ns_budget`, `nt_budget`), see Equation (5) in :ref:`[26] <references-screenkhorn>`
@@ -2531,7 +2532,8 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, res
epsilon_u_square = a[0] / aK_sort[ns_budget - 1]
else:
aK_sort = nx.from_numpy(
- bottleneck.partition(nx.to_numpy(K_sum_cols), ns_budget - 1)[ns_budget - 1]
+ bottleneck.partition(nx.to_numpy(K_sum_cols), ns_budget - 1)[ns_budget - 1],
+ type_as=M
)
epsilon_u_square = a[0] / aK_sort
@@ -2540,7 +2542,8 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, res
epsilon_v_square = b[0] / bK_sort[nt_budget - 1]
else:
bK_sort = nx.from_numpy(
- bottleneck.partition(nx.to_numpy(K_sum_rows), nt_budget - 1)[nt_budget - 1]
+ bottleneck.partition(nx.to_numpy(K_sum_rows), nt_budget - 1)[nt_budget - 1],
+ type_as=M
)
epsilon_v_square = b[0] / bK_sort
else:
@@ -2589,10 +2592,9 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, res
K_IcJ = K[np.ix_(Ic, Jsel)]
K_IJc = K[np.ix_(Isel, Jc)]
- #K_min = K_IJ.min()
K_min = nx.min(K_IJ)
if K_min == 0:
- K_min = np.finfo(float).tiny
+ K_min = float(np.finfo(float).tiny)
# a_I, b_J, a_Ic, b_Jc
a_I = a[Isel]
@@ -2713,7 +2715,7 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, res
maxfun=maxfun,
pgtol=pgtol,
maxiter=maxiter)
- theta = nx.from_numpy(theta)
+ theta = nx.from_numpy(theta, type_as=M)
usc = theta[:ns_budget]
vsc = theta[ns_budget:]
diff --git a/ot/gromov.py b/ot/gromov.py
index 33b4453..a0fbf48 100644
--- a/ot/gromov.py
+++ b/ot/gromov.py
@@ -14,67 +14,85 @@ import numpy as np
from .bregman import sinkhorn
-from .utils import dist, UndefinedParameter
+from .utils import dist, UndefinedParameter, list_to_array
from .optim import cg
from .lp import emd_1d, emd
from .utils import check_random_state
-
-from scipy.sparse import issparse
+from .backend import get_backend
def init_matrix(C1, C2, p, q, loss_fun='square_loss'):
r"""Return loss matrices and tensors for Gromov-Wasserstein fast computation
- Returns the value of \mathcal{L}(C1,C2) \otimes T with the selected loss
- function as the loss function of Gromow-Wasserstein discrepancy.
+ Returns the value of :math:`\mathcal{L}(\mathbf{C_1}, \mathbf{C_2}) \otimes \mathbf{T}` with the
+ selected loss function as the loss function of Gromow-Wasserstein discrepancy.
- The matrices are computed as described in Proposition 1 in [12]
+ The matrices are computed as described in Proposition 1 in :ref:`[12] <references-init-matrix>`
Where :
- * C1 : Metric cost matrix in the source space
- * C2 : Metric cost matrix in the target space
- * T : A coupling between those two spaces
-
- The square-loss function L(a,b)=|a-b|^2 is read as :
- L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with :
- * f1(a)=(a^2)
- * f2(b)=(b^2)
- * h1(a)=a
- * h2(b)=2*b
-
- The kl-loss function L(a,b)=a*log(a/b)-a+b is read as :
- L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with :
- * f1(a)=a*log(a)-a
- * f2(b)=b
- * h1(a)=a
- * h2(b)=log(b)
+
+ - :math:`\mathbf{C_1}`: Metric cost matrix in the source space
+ - :math:`\mathbf{C_2}`: Metric cost matrix in the target space
+ - :math:`\mathbf{T}`: A coupling between those two spaces
+
+ The square-loss function :math:`L(a, b) = |a - b|^2` is read as :
+
+ .. math::
+
+ L(a, b) = f_1(a) + f_2(b) - h_1(a) h_2(b)
+
+ \mathrm{with} \ f_1(a) &= a^2
+
+ f_2(b) &= b^2
+
+ h_1(a) &= a
+
+ h_2(b) &= 2b
+
+ The kl-loss function :math:`L(a, b) = a \log\left(\frac{a}{b}\right) - a + b` is read as :
+
+ .. math::
+
+ L(a, b) = f_1(a) + f_2(b) - h_1(a) h_2(b)
+
+ \mathrm{with} \ f_1(a) &= a \log(a) - a
+
+ f_2(b) &= b
+
+ h_1(a) &= a
+
+ h_2(b) &= \log(b)
Parameters
----------
- C1 : ndarray, shape (ns, ns)
+ C1 : array-like, shape (ns, ns)
Metric cost matrix in the source space
- C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- T : ndarray, shape (ns, nt)
+ C2 : array-like, shape (nt, nt)
+ Metric cost matrix in the target space
+ T : array-like, shape (ns, nt)
Coupling between source and target spaces
- p : ndarray, shape (ns,)
+ p : array-like, shape (ns,)
Returns
-------
- constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
- hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
- hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
+ constC : array-like, shape (ns, nt)
+ Constant :math:`\mathbf{C}` matrix in Eq. (6)
+ hC1 : array-like, shape (ns, ns)
+ :math:`\mathbf{h1}(\mathbf{C1})` matrix in Eq. (6)
+ hC2 : array-like, shape (nt, nt)
+ :math:`\mathbf{h2}(\mathbf{C2})` matrix in Eq. (6)
+
+ .. _references-init-matrix:
References
----------
- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ .. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
"""
+ C1, C2, p, q = list_to_array(C1, C2, p, q)
+ nx = get_backend(C1, C2, p, q)
if loss_fun == 'square_loss':
def f1(a):
@@ -90,7 +108,7 @@ def init_matrix(C1, C2, p, q, loss_fun='square_loss'):
return 2 * b
elif loss_fun == 'kl_loss':
def f1(a):
- return a * np.log(a + 1e-15) - a
+ return a * nx.log(a + 1e-15) - a
def f2(b):
return b
@@ -99,12 +117,16 @@ def init_matrix(C1, C2, p, q, loss_fun='square_loss'):
return a
def h2(b):
- return np.log(b + 1e-15)
-
- constC1 = np.dot(np.dot(f1(C1), p.reshape(-1, 1)),
- np.ones(len(q)).reshape(1, -1))
- constC2 = np.dot(np.ones(len(p)).reshape(-1, 1),
- np.dot(q.reshape(1, -1), f2(C2).T))
+ return nx.log(b + 1e-15)
+
+ constC1 = nx.dot(
+ nx.dot(f1(C1), nx.reshape(p, (-1, 1))),
+ nx.ones((1, len(q)), type_as=q)
+ )
+ constC2 = nx.dot(
+ nx.ones((len(p), 1), type_as=p),
+ nx.dot(nx.reshape(q, (1, -1)), f2(C2).T)
+ )
constC = constC1 + constC2
hC1 = h1(C1)
hC2 = h2(C2)
@@ -115,30 +137,37 @@ def init_matrix(C1, C2, p, q, loss_fun='square_loss'):
def tensor_product(constC, hC1, hC2, T):
r"""Return the tensor for Gromov-Wasserstein fast computation
- The tensor is computed as described in Proposition 1 Eq. (6) in [12].
+ The tensor is computed as described in Proposition 1 Eq. (6) in :ref:`[12] <references-tensor-product>`
Parameters
----------
- constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
- hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
- hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
+ constC : array-like, shape (ns, nt)
+ Constant :math:`\mathbf{C}` matrix in Eq. (6)
+ hC1 : array-like, shape (ns, ns)
+ :math:`\mathbf{h1}(\mathbf{C1})` matrix in Eq. (6)
+ hC2 : array-like, shape (nt, nt)
+ :math:`\mathbf{h2}(\mathbf{C2})` matrix in Eq. (6)
Returns
-------
- tens : ndarray, shape (ns, nt)
- \mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
+ tens : array-like, shape (`ns`, `nt`)
+ :math:`\mathcal{L}(\mathbf{C_1}, \mathbf{C_2}) \otimes \mathbf{T}` tensor-matrix multiplication result
+
+ .. _references-tensor-product:
References
----------
- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ .. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
"""
- A = -np.dot(hC1, T).dot(hC2.T)
+ constC, hC1, hC2, T = list_to_array(constC, hC1, hC2, T)
+ nx = get_backend(constC, hC1, hC2, T)
+
+ A = - nx.dot(
+ nx.dot(hC1, T), hC2.T
+ )
tens = constC + A
# tens -= tens.min()
return tens
@@ -147,27 +176,29 @@ def tensor_product(constC, hC1, hC2, T):
def gwloss(constC, hC1, hC2, T):
"""Return the Loss for Gromov-Wasserstein
- The loss is computed as described in Proposition 1 Eq. (6) in [12].
+ The loss is computed as described in Proposition 1 Eq. (6) in :ref:`[12] <references-gwloss>`
Parameters
----------
- constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
- hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
- hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
- T : ndarray, shape (ns, nt)
- Current value of transport matrix T
+ constC : array-like, shape (ns, nt)
+ Constant :math:`\mathbf{C}` matrix in Eq. (6)
+ hC1 : array-like, shape (ns, ns)
+ :math:`\mathbf{h1}(\mathbf{C1})` matrix in Eq. (6)
+ hC2 : array-like, shape (nt, nt)
+ :math:`\mathbf{h2}(\mathbf{C2})` matrix in Eq. (6)
+ T : array-like, shape (ns, nt)
+ Current value of transport matrix :math:`\mathbf{T}`
Returns
-------
loss : float
Gromov Wasserstein loss
+
+ .. _references-gwloss:
References
----------
- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ .. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
@@ -175,33 +206,38 @@ def gwloss(constC, hC1, hC2, T):
tens = tensor_product(constC, hC1, hC2, T)
- return np.sum(tens * T)
+ tens, T = list_to_array(tens, T)
+ nx = get_backend(tens, T)
+
+ return nx.sum(tens * T)
def gwggrad(constC, hC1, hC2, T):
"""Return the gradient for Gromov-Wasserstein
- The gradient is computed as described in Proposition 2 in [12].
+ The gradient is computed as described in Proposition 2 in :ref:`[12] <references-gwggrad>`
Parameters
----------
- constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
- hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
- hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
- T : ndarray, shape (ns, nt)
- Current value of transport matrix T
+ constC : array-like, shape (ns, nt)
+ Constant :math:`\mathbf{C}` matrix in Eq. (6)
+ hC1 : array-like, shape (ns, ns)
+ :math:`\mathbf{h1}(\mathbf{C1})` matrix in Eq. (6)
+ hC2 : array-like, shape (nt, nt)
+ :math:`\mathbf{h2}(\mathbf{C2})` matrix in Eq. (6)
+ T : array-like, shape (ns, nt)
+ Current value of transport matrix :math:`\mathbf{T}`
Returns
-------
- grad : ndarray, shape (ns, nt)
+ grad : array-like, shape (`ns`, `nt`)
Gromov Wasserstein gradient
+
+ .. _references-gwggrad:
References
----------
- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ .. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
@@ -212,88 +248,107 @@ def gwggrad(constC, hC1, hC2, T):
def update_square_loss(p, lambdas, T, Cs):
"""
- Updates C according to the L2 Loss kernel with the S Ts couplings
- calculated at each iteration
+ Updates :math:`\mathbf{C}` according to the L2 Loss kernel with the `S` :math:`\mathbf{T}_s`
+ couplings calculated at each iteration
Parameters
----------
- p : ndarray, shape (N,)
+ p : array-like, shape (N,)
Masses in the targeted barycenter.
lambdas : list of float
- List of the S spaces' weights.
- T : list of S np.ndarray of shape (ns,N)
- The S Ts couplings calculated at each iteration.
- Cs : list of S ndarray, shape(ns,ns)
+ List of the `S` spaces' weights.
+ T : list of S array-like of shape (ns,N)
+ The `S` :math:`\mathbf{T}_s` couplings calculated at each iteration.
+ Cs : list of S array-like, shape(ns,ns)
Metric cost matrices.
Returns
----------
- C : ndarray, shape (nt, nt)
- Updated C matrix.
+ C : array-like, shape (`nt`, `nt`)
+ Updated :math:`\mathbf{C}` matrix.
"""
- tmpsum = sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s])
- for s in range(len(T))])
- ppt = np.outer(p, p)
+ T = list_to_array(*T)
+ Cs = list_to_array(*Cs)
+ p = list_to_array(p)
+ nx = get_backend(p, *T, *Cs)
+
+ tmpsum = sum([
+ lambdas[s] * nx.dot(
+ nx.dot(T[s].T, Cs[s]),
+ T[s]
+ ) for s in range(len(T))
+ ])
+ ppt = nx.outer(p, p)
- return np.divide(tmpsum, ppt)
+ return tmpsum / ppt
def update_kl_loss(p, lambdas, T, Cs):
"""
- Updates C according to the KL Loss kernel with the S Ts couplings calculated at each iteration
+ Updates :math:`\mathbf{C}` according to the KL Loss kernel with the `S` :math:`\mathbf{T}_s` couplings calculated at each iteration
Parameters
----------
- p : ndarray, shape (N,)
+ p : array-like, shape (N,)
Weights in the targeted barycenter.
- lambdas : list of the S spaces' weights
- T : list of S np.ndarray of shape (ns,N)
- The S Ts couplings calculated at each iteration.
- Cs : list of S ndarray, shape(ns,ns)
+ lambdas : list of float
+ List of the `S` spaces' weights
+ T : list of S array-like of shape (ns,N)
+ The `S` :math:`\mathbf{T}_s` couplings calculated at each iteration.
+ Cs : list of S array-like, shape(ns,ns)
Metric cost matrices.
Returns
----------
- C : ndarray, shape (ns,ns)
- updated C matrix
+ C : array-like, shape (`ns`, `ns`)
+ updated :math:`\mathbf{C}` matrix
"""
- tmpsum = sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s])
- for s in range(len(T))])
- ppt = np.outer(p, p)
+ Cs = list_to_array(*Cs)
+ T = list_to_array(*T)
+ p = list_to_array(p)
+ nx = get_backend(p, *T, *Cs)
- return np.exp(np.divide(tmpsum, ppt))
+ tmpsum = sum([
+ lambdas[s] * nx.dot(
+ nx.dot(T[s].T, Cs[s]),
+ T[s]
+ ) for s in range(len(T))
+ ])
+ ppt = nx.outer(p, p)
+
+ return nx.exp(tmpsum / ppt)
def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs):
r"""
- Returns the gromov-wasserstein transport between (C1,p) and (C2,q)
+ Returns the gromov-wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
The function solves the following optimization problem:
.. math::
- GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
Where :
- - C1 : Metric cost matrix in the source space
- - C2 : Metric cost matrix in the target space
- - p : distribution in the source space
- - q : distribution in the target space
- - L : loss function to account for the misfit between the similarity matrices
+
+ - :math:`\mathbf{C_1}`: Metric cost matrix in the source space
+ - :math:`\mathbf{C_2}`: Metric cost matrix in the target space
+ - :math:`\mathbf{p}`: distribution in the source space
+ - :math:`\mathbf{q}`: distribution in the target space
+ - `L`: loss function to account for the misfit between the similarity matrices
Parameters
----------
- C1 : ndarray, shape (ns, ns)
+ C1 : array-like, shape (ns, ns)
Metric cost matrix in the source space
- C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- p : ndarray, shape (ns,)
+ C2 : array-like, shape (nt, nt)
+ Metric cost matrix in the target space
+ p : array-like, shape (ns,)
Distribution in the source space
- q : ndarray, shape (nt,)
+ q : array-like, shape (nt,)
Distribution in the target space
loss_fun : str
loss function used for the solver either 'square_loss' or 'kl_loss'
-
max_iter : int, optional
Max number of iterations
tol : float, optional
@@ -303,22 +358,23 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs
log : bool, optional
record log if True
armijo : bool, optional
- If True the steps of the line-search is found via an armijo research. Else closed form is used.
- If there is convergence issues use False.
+ If True the step of the line-search is found via an armijo research. Else closed form is used.
+ If there are convergence issues use False.
**kwargs : dict
parameters can be directly passed to the ot.optim.cg solver
Returns
-------
- T : ndarray, shape (ns, nt)
- Doupling between the two spaces that minimizes:
- \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ T : array-like, shape (`ns`, `nt`)
+ Coupling between the two spaces that minimizes:
+
+ :math:`\sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}`
log : dict
Convergence information and loss.
References
----------
- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ .. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
@@ -327,6 +383,7 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs
mathematics 11.4 (2011): 417-487.
"""
+ p, q = list_to_array(p, q)
constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
@@ -348,29 +405,30 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs
def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs):
r"""
- Returns the gromov-wasserstein discrepancy between (C1,p) and (C2,q)
+ Returns the gromov-wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
The function solves the following optimization problem:
.. math::
- GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ GW = \min_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
Where :
- - C1 : Metric cost matrix in the source space
- - C2 : Metric cost matrix in the target space
- - p : distribution in the source space
- - q : distribution in the target space
- - L : loss function to account for the misfit between the similarity matrices
+
+ - :math:`\mathbf{C_1}`: Metric cost matrix in the source space
+ - :math:`\mathbf{C_2}`: Metric cost matrix in the target space
+ - :math:`\mathbf{p}`: distribution in the source space
+ - :math:`\mathbf{q}`: distribution in the target space
+ - `L`: loss function to account for the misfit between the similarity matrices
Parameters
----------
- C1 : ndarray, shape (ns, ns)
+ C1 : array-like, shape (ns, ns)
Metric cost matrix in the source space
- C2 : ndarray, shape (nt, nt)
+ C2 : array-like, shape (nt, nt)
Metric cost matrix in the target space
- p : ndarray, shape (ns,)
+ p : array-like, shape (ns,)
Distribution in the source space.
- q : ndarray, shape (nt,)
+ q : array-like, shape (nt,)
Distribution in the target space.
loss_fun : str
loss function used for the solver either 'square_loss' or 'kl_loss'
@@ -383,8 +441,8 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwarg
log : bool, optional
record log if True
armijo : bool, optional
- If True the steps of the line-search is found via an armijo research. Else closed form is used.
- If there is convergence issues use False.
+ If True the step of the line-search is found via an armijo research. Else closed form is used.
+ If there are convergence issues use False.
Returns
-------
@@ -395,7 +453,7 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwarg
References
----------
- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ .. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
@@ -404,6 +462,7 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwarg
mathematics 11.4 (2011): 417-487.
"""
+ p, q = list_to_array(p, q)
constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
@@ -425,42 +484,45 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwarg
def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, log=False, **kwargs):
r"""
- Computes the FGW transport between two graphs see [24]
+ Computes the FGW transport between two graphs (see :ref:`[24] <references-fused-gromov-wasserstein>`)
.. math::
- \gamma = arg\min_\gamma (1-\\alpha)*<\gamma,M>_F + \\alpha* \sum_{i,j,k,l}
- L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ \gamma = \mathop{\arg \min}_\gamma (1 - \alpha) <\gamma, \mathbf{M}>_F + \alpha \sum_{i,j,k,l}
+ L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+
+ s.t. \ \mathbf{\gamma} \mathbf{1} &= \mathbf{p}
- s.t. \gamma 1 = p
- \gamma^T 1= q
- \gamma\geq 0
+ \mathbf{\gamma}^T \mathbf{1} &= \mathbf{q}
+
+ \mathbf{\gamma} &\geq 0
where :
- - M is the (ns,nt) metric cost matrix
- - p and q are source and target weights (sum to 1)
- - L is a loss function to account for the misfit between the similarity matrices
- The algorithm used for solving the problem is conditional gradient as discussed in [24]_
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
+ - :math:`\mathbf{p}` and :math:`\mathbf{q}` are source and target weights (sum to 1)
+ - `L` is a loss function to account for the misfit between the similarity matrices
+
+ The algorithm used for solving the problem is conditional gradient as discussed in :ref:`[24] <references-fused-gromov-wasserstein>`
Parameters
----------
- M : ndarray, shape (ns, nt)
+ M : array-like, shape (ns, nt)
Metric cost matrix between features across domains
- C1 : ndarray, shape (ns, ns)
+ C1 : array-like, shape (ns, ns)
Metric cost matrix representative of the structure in the source space
- C2 : ndarray, shape (nt, nt)
+ C2 : array-like, shape (nt, nt)
Metric cost matrix representative of the structure in the target space
- p : ndarray, shape (ns,)
+ p : array-like, shape (ns,)
Distribution in the source space
- q : ndarray, shape (nt,)
+ q : array-like, shape (nt,)
Distribution in the target space
loss_fun : str, optional
Loss function used for the solver
alpha : float, optional
Trade-off parameter (0 < alpha < 1)
armijo : bool, optional
- If True the steps of the line-search is found via an armijo research. Else closed form is used.
- If there is convergence issues use False.
+ If True the step of the line-search is found via an armijo research. Else closed form is used.
+ If there are convergence issues use False.
log : bool, optional
record log if True
**kwargs : dict
@@ -468,18 +530,21 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5,
Returns
-------
- gamma : ndarray, shape (ns, nt)
+ gamma : array-like, shape (`ns`, `nt`)
Optimal transportation matrix for the given parameters.
log : dict
Log dictionary return only if log==True in parameters.
+
+ .. _references-fused-gromov-wasserstein:
References
----------
- .. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
+ .. [24] Vayer Titouan, Chapel Laetitia, Flamary Rémi, Tavenard Romain
and Courty Nicolas "Optimal Transport for structured data with
application on graphs", International Conference on Machine Learning
(ICML). 2019.
"""
+ p, q = list_to_array(p, q)
constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
@@ -501,61 +566,67 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5,
def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, log=False, **kwargs):
r"""
- Computes the FGW distance between two graphs see [24]
+ Computes the FGW distance between two graphs see (see :ref:`[24] <references-fused-gromov-wasserstein2>`)
.. math::
- \min_\gamma (1-\\alpha)*<\gamma,M>_F + \\alpha* \sum_{i,j,k,l}
- L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ \min_\gamma (1 - \alpha) <\gamma, \mathbf{M}>_F + \alpha \sum_{i,j,k,l}
+ L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+ s.t. \ \mathbf{\gamma} \mathbf{1} &= \mathbf{p}
- s.t. \gamma 1 = p
- \gamma^T 1= q
- \gamma\geq 0
+ \mathbf{\gamma}^T \mathbf{1} &= \mathbf{q}
+
+ \mathbf{\gamma} &\geq 0
where :
- - M is the (ns,nt) metric cost matrix
- - p and q are source and target weights (sum to 1)
- - L is a loss function to account for the misfit between the similarity matrices
- The algorithm used for solving the problem is conditional gradient as discussed in [1]_
+
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
+ - :math:`\mathbf{p}` and :math:`\mathbf{q}` are source and target weights (sum to 1)
+ - `L` is a loss function to account for the misfit between the similarity matrices
+
+ The algorithm used for solving the problem is conditional gradient as discussed in :ref:`[24] <references-fused-gromov-wasserstein2>`
Parameters
----------
- M : ndarray, shape (ns, nt)
+ M : array-like, shape (ns, nt)
Metric cost matrix between features across domains
- C1 : ndarray, shape (ns, ns)
+ C1 : array-like, shape (ns, ns)
Metric cost matrix respresentative of the structure in the source space.
- C2 : ndarray, shape (nt, nt)
+ C2 : array-like, shape (nt, nt)
Metric cost matrix espresentative of the structure in the target space.
- p : ndarray, shape (ns,)
+ p : array-like, shape (ns,)
Distribution in the source space.
- q : ndarray, shape (nt,)
+ q : array-like, shape (nt,)
Distribution in the target space.
loss_fun : str, optional
Loss function used for the solver.
alpha : float, optional
Trade-off parameter (0 < alpha < 1)
armijo : bool, optional
- If True the steps of the line-search is found via an armijo research.
- Else closed form is used. If there is convergence issues use False.
+ If True the step of the line-search is found via an armijo research.
+ Else closed form is used. If there are convergence issues use False.
log : bool, optional
Record log if True.
**kwargs : dict
- Parameters can be directly pased to the ot.optim.cg solver.
+ Parameters can be directly passed to the ot.optim.cg solver.
Returns
-------
- gamma : ndarray, shape (ns, nt)
+ gamma : array-like, shape (ns, nt)
Optimal transportation matrix for the given parameters.
log : dict
Log dictionary return only if log==True in parameters.
+
+ .. _references-fused-gromov-wasserstein2:
References
----------
- .. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
+ .. [24] Vayer Titouan, Chapel Laetitia, Flamary Rémi, Tavenard Romain
and Courty Nicolas
"Optimal Transport for structured data with application on graphs"
International Conference on Machine Learning (ICML). 2019.
"""
+ p, q = list_to_array(p, q)
constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
@@ -579,60 +650,64 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
def GW_distance_estimation(C1, C2, p, q, loss_fun, T,
nb_samples_p=None, nb_samples_q=None, std=True, random_state=None):
r"""
- Returns an approximation of the gromov-wasserstein cost between (C1,p) and (C2,q)
- with a fixed transport plan T.
-
- The function gives an unbiased approximation of the following equation:
-
- .. math::
- GW = \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
-
- Where :
-
- - C1 : Metric cost matrix in the source space
- - C2 : Metric cost matrix in the target space
- - L : Loss function to account for the misfit between the similarity matrices
- - T : Matrix with marginal p and q
-
- Parameters
- ----------
- C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
- C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- p : ndarray, shape (ns,)
- Distribution in the source space
- q : ndarray, shape (nt,)
- Distribution in the target space
- loss_fun : function: \mathcal{R} \times \mathcal{R} \shortarrow \mathcal{R}
- Loss function used for the distance, the transport plan does not depend on the loss function
- T : csr or ndarray, shape (ns, nt)
- Transport plan matrix, either a sparse csr matrix or
- nb_samples_p : int, optional
- nb_samples_p is the number of samples (without replacement) along the first dimension of T.
- nb_samples_q : int, optional
- nb_samples_q is the number of samples along the second dimension of T, for each sample along the first.
- std : bool, optional
- Standard deviation associated with the prediction of the gromov-wasserstein cost.
- random_state : int or RandomState instance, optional
- Fix the seed for to allow reproducibility
-
- Returns
- -------
- : float
- Gromov-wasserstein cost
-
- References
- ----------
- .. [14] Kerdoncuff, Tanguy, Emonet, Rémi, Sebban, Marc
- "Sampled Gromov Wasserstein."
- Machine Learning Journal (MLJ). 2021.
-
- """
+ Returns an approximation of the gromov-wasserstein cost between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
+ with a fixed transport plan :math:`\mathbf{T}`.
+
+ The function gives an unbiased approximation of the following equation:
+
+ .. math::
+
+ GW = \sum_{i,j,k,l} L(\mathbf{C_{1}}_{i,k}, \mathbf{C_{2}}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+
+ Where :
+
+ - :math:`\mathbf{C_1}`: Metric cost matrix in the source space
+ - :math:`\mathbf{C_2}`: Metric cost matrix in the target space
+ - `L` : Loss function to account for the misfit between the similarity matrices
+ - :math:`\mathbf{T}`: Matrix with marginal :math:`\mathbf{p}` and :math:`\mathbf{q}`
+
+ Parameters
+ ----------
+ C1 : array-like, shape (ns, ns)
+ Metric cost matrix in the source space
+ C2 : array-like, shape (nt, nt)
+ Metric cost matrix in the target space
+ p : array-like, shape (ns,)
+ Distribution in the source space
+ q : array-like, shape (nt,)
+ Distribution in the target space
+ loss_fun : function: :math:`\mathbb{R} \times \mathbb{R} \mapsto \mathbb{R}`
+ Loss function used for the distance, the transport plan does not depend on the loss function
+ T : csr or array-like, shape (ns, nt)
+ Transport plan matrix, either a sparse csr or a dense matrix
+ nb_samples_p : int, optional
+ `nb_samples_p` is the number of samples (without replacement) along the first dimension of :math:`\mathbf{T}`
+ nb_samples_q : int, optional
+ `nb_samples_q` is the number of samples along the second dimension of :math:`\mathbf{T}`, for each sample along the first
+ std : bool, optional
+ Standard deviation associated with the prediction of the gromov-wasserstein cost
+ random_state : int or RandomState instance, optional
+ Fix the seed for reproducibility
+
+ Returns
+ -------
+ : float
+ Gromov-wasserstein cost
+
+ References
+ ----------
+ .. [14] Kerdoncuff, Tanguy, Emonet, Rémi, Sebban, Marc
+ "Sampled Gromov Wasserstein."
+ Machine Learning Journal (MLJ). 2021.
+
+ """
+ C1, C2, p, q = list_to_array(C1, C2, p, q)
+ nx = get_backend(C1, C2, p, q)
+
generator = check_random_state(random_state)
- len_p = len(p)
- len_q = len(q)
+ len_p = p.shape[0]
+ len_q = q.shape[0]
# It is always better to sample from the biggest distribution first.
if len_p < len_q:
@@ -642,7 +717,7 @@ def GW_distance_estimation(C1, C2, p, q, loss_fun, T,
T = T.T
if nb_samples_p is None:
- if issparse(T):
+ if nx.issparse(T):
# If T is sparse, it probably mean that PoGroW was used, thus the number of sample is reduced
nb_samples_p = min(int(5 * (len_p * np.log(len_p)) ** 0.5), len_p)
else:
@@ -657,100 +732,112 @@ def GW_distance_estimation(C1, C2, p, q, loss_fun, T,
index_k = np.zeros((nb_samples_p, nb_samples_q), dtype=int)
index_l = np.zeros((nb_samples_p, nb_samples_q), dtype=int)
- list_value_sample = np.zeros((nb_samples_p, nb_samples_p, nb_samples_q))
index_i = generator.choice(len_p, size=nb_samples_p, p=p, replace=False)
index_j = generator.choice(len_p, size=nb_samples_p, p=p, replace=False)
for i in range(nb_samples_p):
- if issparse(T):
- T_indexi = T[index_i[i], :].toarray()[0]
- T_indexj = T[index_j[i], :].toarray()[0]
+ if nx.issparse(T):
+ T_indexi = nx.reshape(nx.todense(T[index_i[i], :]), (-1,))
+ T_indexj = nx.reshape(nx.todense(T[index_j[i], :]), (-1,))
else:
T_indexi = T[index_i[i], :]
T_indexj = T[index_j[i], :]
# For each of the row sampled, the column is sampled.
- index_k[i] = generator.choice(len_q, size=nb_samples_q, p=T_indexi / T_indexi.sum(), replace=True)
- index_l[i] = generator.choice(len_q, size=nb_samples_q, p=T_indexj / T_indexj.sum(), replace=True)
-
- for n in range(nb_samples_q):
- list_value_sample[:, :, n] = loss_fun(C1[np.ix_(index_i, index_j)], C2[np.ix_(index_k[:, n], index_l[:, n])])
+ index_k[i] = generator.choice(
+ len_q,
+ size=nb_samples_q,
+ p=T_indexi / nx.sum(T_indexi),
+ replace=True
+ )
+ index_l[i] = generator.choice(
+ len_q,
+ size=nb_samples_q,
+ p=T_indexj / nx.sum(T_indexj),
+ replace=True
+ )
+
+ list_value_sample = nx.stack([
+ loss_fun(
+ C1[np.ix_(index_i, index_j)],
+ C2[np.ix_(index_k[:, n], index_l[:, n])]
+ ) for n in range(nb_samples_q)
+ ], axis=2)
if std:
- std_value = np.sum(np.std(list_value_sample, axis=2) ** 2) ** 0.5
- return np.mean(list_value_sample), std_value / (nb_samples_p * nb_samples_p)
+ std_value = nx.sum(nx.std(list_value_sample, axis=2) ** 2) ** 0.5
+ return nx.mean(list_value_sample), std_value / (nb_samples_p * nb_samples_p)
else:
- return np.mean(list_value_sample)
+ return nx.mean(list_value_sample)
def pointwise_gromov_wasserstein(C1, C2, p, q, loss_fun,
alpha=1, max_iter=100, threshold_plan=0, log=False, verbose=False, random_state=None):
r"""
- Returns the gromov-wasserstein transport between (C1,p) and (C2,q) using a stochastic Frank-Wolfe.
- This method as a O(max_iter \times PN^2) time complexity with P the number of Sinkhorn iterations.
-
- The function solves the following optimization problem:
-
- .. math::
- GW = arg\min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
-
- s.t. T 1 = p
-
- T^T 1= q
-
- T\geq 0
-
- Where :
-
- - C1 : Metric cost matrix in the source space
- - C2 : Metric cost matrix in the target space
- - p : distribution in the source space
- - q : distribution in the target space
- - L : loss function to account for the misfit between the similarity matrices
-
- Parameters
- ----------
- C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
- C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- p : ndarray, shape (ns,)
- Distribution in the source space
- q : ndarray, shape (nt,)
- Distribution in the target space
- loss_fun : function: \mathcal{R} \times \mathcal{R} \shortarrow \mathcal{R}
- Loss function used for the distance, the transport plan does not depend on the loss function
- alpha : float
- Step of the Frank-Wolfe algorithm, should be between 0 and 1
- max_iter : int, optional
- Max number of iterations
- threshold_plan : float, optional
- Deleting very small value in the transport plan. If above zero, it violate the marginal constraints.
- verbose : bool, optional
- Print information along iterations
- log : bool, optional
- Gives the distance estimated and the standard deviation
- random_state : int or RandomState instance, optional
- Fix the seed for to allow reproducibility
-
- Returns
- -------
- T : ndarray, shape (ns, nt)
- Optimal coupling between the two spaces
-
- References
- ----------
- .. [14] Kerdoncuff, Tanguy, Emonet, Rémi, Sebban, Marc
- "Sampled Gromov Wasserstein."
- Machine Learning Journal (MLJ). 2021.
-
- """
- C1 = np.asarray(C1, dtype=np.float64)
- C2 = np.asarray(C2, dtype=np.float64)
- p = np.asarray(p, dtype=np.float64)
- q = np.asarray(q, dtype=np.float64)
- len_p = len(p)
- len_q = len(q)
+ Returns the gromov-wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` using a stochastic Frank-Wolfe.
+ This method has a :math:`\mathcal{O}(\mathrm{max\_iter} \times PN^2)` time complexity with `P` the number of Sinkhorn iterations.
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+
+ s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
+
+ \mathbf{T}^T \mathbf{1} &= \mathbf{q}
+
+ \mathbf{T} &\geq 0
+
+ Where :
+
+ - :math:`\mathbf{C_1}`: Metric cost matrix in the source space
+ - :math:`\mathbf{C_2}`: Metric cost matrix in the target space
+ - :math:`\mathbf{p}`: distribution in the source space
+ - :math:`\mathbf{q}`: distribution in the target space
+ - `L`: loss function to account for the misfit between the similarity matrices
+
+ Parameters
+ ----------
+ C1 : array-like, shape (ns, ns)
+ Metric cost matrix in the source space
+ C2 : array-like, shape (nt, nt)
+ Metric cost matrix in the target space
+ p : array-like, shape (ns,)
+ Distribution in the source space
+ q : array-like, shape (nt,)
+ Distribution in the target space
+ loss_fun : function: :math:`\mathbb{R} \times \mathbb{R} \mapsto \mathbb{R}`
+ Loss function used for the distance, the transport plan does not depend on the loss function
+ alpha : float
+ Step of the Frank-Wolfe algorithm, should be between 0 and 1
+ max_iter : int, optional
+ Max number of iterations
+ threshold_plan : float, optional
+ Deleting very small values in the transport plan. If above zero, it violates the marginal constraints.
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ Gives the distance estimated and the standard deviation
+ random_state : int or RandomState instance, optional
+ Fix the seed for reproducibility
+
+ Returns
+ -------
+ T : array-like, shape (`ns`, `nt`)
+ Optimal coupling between the two spaces
+
+ References
+ ----------
+ .. [14] Kerdoncuff, Tanguy, Emonet, Rémi, Sebban, Marc
+ "Sampled Gromov Wasserstein."
+ Machine Learning Journal (MLJ). 2021.
+
+ """
+ C1, C2, p, q = list_to_array(C1, C2, p, q)
+ nx = get_backend(C1, C2, p, q)
+
+ len_p = p.shape[0]
+ len_q = q.shape[0]
generator = check_random_state(random_state)
@@ -759,30 +846,35 @@ def pointwise_gromov_wasserstein(C1, C2, p, q, loss_fun,
# Initialize with default marginal
index[0] = generator.choice(len_p, size=1, p=p)
index[1] = generator.choice(len_q, size=1, p=q)
- T = emd_1d(C1[index[0]], C2[index[1]], a=p, b=q, dense=False).tocsr()
+ T = nx.tocsr(emd_1d(C1[index[0]], C2[index[1]], a=p, b=q, dense=False))
best_gw_dist_estimated = np.inf
for cpt in range(max_iter):
index[0] = generator.choice(len_p, size=1, p=p)
- T_index0 = T[index[0], :].toarray()[0]
+ T_index0 = nx.reshape(nx.todense(T[index[0], :]), (-1,))
index[1] = generator.choice(len_q, size=1, p=T_index0 / T_index0.sum())
if alpha == 1:
- T = emd_1d(C1[index[0]], C2[index[1]], a=p, b=q, dense=False).tocsr()
+ T = nx.tocsr(
+ emd_1d(C1[index[0]], C2[index[1]], a=p, b=q, dense=False)
+ )
else:
- new_T = emd_1d(C1[index[0]], C2[index[1]], a=p, b=q, dense=False).tocsr()
+ new_T = nx.tocsr(
+ emd_1d(C1[index[0]], C2[index[1]], a=p, b=q, dense=False)
+ )
T = (1 - alpha) * T + alpha * new_T
- # To limit the number of non 0, the values bellow the threshold are set to 0.
- T.data[T.data < threshold_plan] = 0
- T.eliminate_zeros()
+ # To limit the number of non 0, the values below the threshold are set to 0.
+ T = nx.eliminate_zeros(T, threshold=threshold_plan)
if cpt % 10 == 0 or cpt == (max_iter - 1):
- gw_dist_estimated = GW_distance_estimation(C1=C1, C2=C2, loss_fun=loss_fun,
- p=p, q=q, T=T, std=False, random_state=generator)
+ gw_dist_estimated = GW_distance_estimation(
+ C1=C1, C2=C2, loss_fun=loss_fun,
+ p=p, q=q, T=T, std=False, random_state=generator
+ )
if gw_dist_estimated < best_gw_dist_estimated:
best_gw_dist_estimated = gw_dist_estimated
- best_T = T.copy()
+ best_T = nx.copy(T)
if verbose:
if cpt % 200 == 0:
@@ -791,9 +883,10 @@ def pointwise_gromov_wasserstein(C1, C2, p, q, loss_fun,
if log:
log = {}
- log["gw_dist_estimated"], log["gw_dist_std"] = GW_distance_estimation(C1=C1, C2=C2, loss_fun=loss_fun,
- p=p, q=q, T=best_T,
- random_state=generator)
+ log["gw_dist_estimated"], log["gw_dist_std"] = GW_distance_estimation(
+ C1=C1, C2=C2, loss_fun=loss_fun,
+ p=p, q=q, T=best_T, random_state=generator
+ )
return best_T, log
return best_T
@@ -802,71 +895,70 @@ def sampled_gromov_wasserstein(C1, C2, p, q, loss_fun,
nb_samples_grad=100, epsilon=1, max_iter=500, log=False, verbose=False,
random_state=None):
r"""
- Returns the gromov-wasserstein transport between (C1,p) and (C2,q) using a 1-stochastic Frank-Wolfe.
- This method as a O(max_iter \times Nlog(N)) time complexity by relying on the 1D Optimal Transport solver.
-
- The function solves the following optimization problem:
-
- .. math::
- GW = arg\min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
-
- s.t. T 1 = p
-
- T^T 1= q
-
- T\geq 0
-
- Where :
-
- - C1 : Metric cost matrix in the source space
- - C2 : Metric cost matrix in the target space
- - p : distribution in the source space
- - q : distribution in the target space
- - L : loss function to account for the misfit between the similarity matrices
-
- Parameters
- ----------
- C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
- C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- p : ndarray, shape (ns,)
- Distribution in the source space
- q : ndarray, shape (nt,)
- Distribution in the target space
- loss_fun : function: \mathcal{R} \times \mathcal{R} \shortarrow \mathcal{R}
- Loss function used for the distance, the transport plan does not depend on the loss function
- nb_samples_grad : int
- Number of samples to approximate the gradient
- epsilon : float
- Weight of the Kullback-Leiber regularization
- max_iter : int, optional
- Max number of iterations
- verbose : bool, optional
- Print information along iterations
- log : bool, optional
- Gives the distance estimated and the standard deviation
- random_state : int or RandomState instance, optional
- Fix the seed for to allow reproducibility
-
- Returns
- -------
- T : ndarray, shape (ns, nt)
- Optimal coupling between the two spaces
-
- References
- ----------
- .. [14] Kerdoncuff, Tanguy, Emonet, Rémi, Sebban, Marc
- "Sampled Gromov Wasserstein."
- Machine Learning Journal (MLJ). 2021.
-
- """
- C1 = np.asarray(C1, dtype=np.float64)
- C2 = np.asarray(C2, dtype=np.float64)
- p = np.asarray(p, dtype=np.float64)
- q = np.asarray(q, dtype=np.float64)
- len_p = len(p)
- len_q = len(q)
+ Returns the gromov-wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` using a 1-stochastic Frank-Wolfe.
+ This method has a :math:`\mathcal{O}(\mathrm{max\_iter} \times N \log(N))` time complexity by relying on the 1D Optimal Transport solver.
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+
+ s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
+
+ \mathbf{T}^T \mathbf{1} &= \mathbf{q}
+
+ \mathbf{T} &\geq 0
+
+ Where :
+
+ - :math:`\mathbf{C_1}`: Metric cost matrix in the source space
+ - :math:`\mathbf{C_2}`: Metric cost matrix in the target space
+ - :math:`\mathbf{p}`: distribution in the source space
+ - :math:`\mathbf{q}`: distribution in the target space
+ - `L`: loss function to account for the misfit between the similarity matrices
+
+ Parameters
+ ----------
+ C1 : array-like, shape (ns, ns)
+ Metric cost matrix in the source space
+ C2 : array-like, shape (nt, nt)
+ Metric cost matrix in the target space
+ p : array-like, shape (ns,)
+ Distribution in the source space
+ q : array-like, shape (nt,)
+ Distribution in the target space
+ loss_fun : function: :math:`\mathbb{R} \times \mathbb{R} \mapsto \mathbb{R}`
+ Loss function used for the distance, the transport plan does not depend on the loss function
+ nb_samples_grad : int
+ Number of samples to approximate the gradient
+ epsilon : float
+ Weight of the Kullback-Leibler regularization
+ max_iter : int, optional
+ Max number of iterations
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ Gives the distance estimated and the standard deviation
+ random_state : int or RandomState instance, optional
+ Fix the seed for reproducibility
+
+ Returns
+ -------
+ T : array-like, shape (`ns`, `nt`)
+ Optimal coupling between the two spaces
+
+ References
+ ----------
+ .. [14] Kerdoncuff, Tanguy, Emonet, Rémi, Sebban, Marc
+ "Sampled Gromov Wasserstein."
+ Machine Learning Journal (MLJ). 2021.
+
+ """
+ C1, C2, p, q = list_to_array(C1, C2, p, q)
+ nx = get_backend(C1, C2, p, q)
+
+ len_p = p.shape[0]
+ len_q = q.shape[0]
generator = check_random_state(random_state)
@@ -880,12 +972,12 @@ def sampled_gromov_wasserstein(C1, C2, p, q, loss_fun,
nb_samples_grad_p, nb_samples_grad_q = nb_samples_grad, 1
else:
nb_samples_grad_p, nb_samples_grad_q = nb_samples_grad
- T = np.outer(p, q)
+ T = nx.outer(p, q)
# continue_loop allows to stop the loop if there is several successive small modification of T.
continue_loop = 0
# The gradient of GW is more complex if the two matrices are not symmetric.
- C_are_symmetric = np.allclose(C1, C1.T, rtol=1e-10, atol=1e-10) and np.allclose(C2, C2.T, rtol=1e-10, atol=1e-10)
+ C_are_symmetric = nx.allclose(C1, C1.T, rtol=1e-10, atol=1e-10) and nx.allclose(C2, C2.T, rtol=1e-10, atol=1e-10)
for cpt in range(max_iter):
index0 = generator.choice(len_p, size=nb_samples_grad_p, p=p, replace=False)
@@ -893,28 +985,30 @@ def sampled_gromov_wasserstein(C1, C2, p, q, loss_fun,
for i, index0_i in enumerate(index0):
index1 = generator.choice(len_q,
size=nb_samples_grad_q,
- p=T[index0_i, :] / T[index0_i, :].sum(),
+ p=T[index0_i, :] / nx.sum(T[index0_i, :]),
replace=False)
# If the matrices C are not symmetric, the gradient has 2 terms, thus the term is chosen randomly.
if (not C_are_symmetric) and generator.rand(1) > 0.5:
- Lik += np.mean(loss_fun(np.expand_dims(C1[:, np.repeat(index0[i], nb_samples_grad_q)], 1),
- np.expand_dims(C2[:, index1], 0)),
- axis=2)
+ Lik += nx.mean(loss_fun(
+ C1[:, [index0[i]] * nb_samples_grad_q][:, None, :],
+ C2[:, index1][None, :, :]
+ ), axis=2)
else:
- Lik += np.mean(loss_fun(np.expand_dims(C1[np.repeat(index0[i], nb_samples_grad_q), :], 2),
- np.expand_dims(C2[index1, :], 1)),
- axis=0)
+ Lik += nx.mean(loss_fun(
+ C1[[index0[i]] * nb_samples_grad_q, :][:, :, None],
+ C2[index1, :][:, None, :]
+ ), axis=0)
- max_Lik = np.max(Lik)
+ max_Lik = nx.max(Lik)
if max_Lik == 0:
continue
# This division by the max is here to facilitate the choice of epsilon.
Lik /= max_Lik
if epsilon > 0:
- # Set to infinity all the numbers bellow exp(-200) to avoid log of 0.
- log_T = np.log(np.clip(T, np.exp(-200), 1))
- log_T[log_T == -200] = -np.inf
+ # Set to infinity all the numbers below exp(-200) to avoid log of 0.
+ log_T = nx.log(nx.clip(T, np.exp(-200), 1))
+ log_T = nx.where(log_T == -200, -np.inf, log_T)
Lik = Lik - epsilon * log_T
try:
@@ -925,11 +1019,11 @@ def sampled_gromov_wasserstein(C1, C2, p, q, loss_fun,
else:
new_T = emd(a=p, b=q, M=Lik)
- change_T = ((T - new_T) ** 2).mean()
+ change_T = nx.mean((T - new_T) ** 2)
if change_T <= 10e-20:
continue_loop += 1
if continue_loop > 100: # Number max of low modifications of T
- T = new_T.copy()
+ T = nx.copy(new_T)
break
else:
continue_loop = 0
@@ -938,12 +1032,14 @@ def sampled_gromov_wasserstein(C1, C2, p, q, loss_fun,
if cpt % 200 == 0:
print('{:5s}|{:12s}'.format('It.', '||T_n - T_{n+1}||') + '\n' + '-' * 19)
print('{:5d}|{:8e}|'.format(cpt, change_T))
- T = new_T.copy()
+ T = nx.copy(new_T)
if log:
log = {}
- log["gw_dist_estimated"], log["gw_dist_std"] = GW_distance_estimation(C1=C1, C2=C2, loss_fun=loss_fun,
- p=p, q=q, T=T, random_state=generator)
+ log["gw_dist_estimated"], log["gw_dist_std"] = GW_distance_estimation(
+ C1=C1, C2=C2, loss_fun=loss_fun,
+ p=p, q=q, T=T, random_state=generator
+ )
return T, log
return T
@@ -951,38 +1047,37 @@ def sampled_gromov_wasserstein(C1, C2, p, q, loss_fun,
def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
max_iter=1000, tol=1e-9, verbose=False, log=False):
r"""
- Returns the gromov-wasserstein transport between (C1,p) and (C2,q)
-
- (C1,p) and (C2,q)
+ Returns the gromov-wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
The function solves the following optimization problem:
.. math::
- GW = arg\min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+ \mathbf{GW} = \mathop{\arg\min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} - \epsilon(H(\mathbf{T}))
- s.t. T 1 = p
+ s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
- T^T 1= q
+ \mathbf{T}^T \mathbf{1} &= \mathbf{q}
- T\geq 0
+ \mathbf{T} &\geq 0
Where :
- - C1 : Metric cost matrix in the source space
- - C2 : Metric cost matrix in the target space
- - p : distribution in the source space
- - q : distribution in the target space
- - L : loss function to account for the misfit between the similarity matrices
- - H : entropy
+
+ - :math:`\mathbf{C_1}`: Metric cost matrix in the source space
+ - :math:`\mathbf{C_2}`: Metric cost matrix in the target space
+ - :math:`\mathbf{p}`: distribution in the source space
+ - :math:`\mathbf{q}`: distribution in the target space
+ - `L`: loss function to account for the misfit between the similarity matrices
+ - `H`: entropy
Parameters
----------
- C1 : ndarray, shape (ns, ns)
+ C1 : array-like, shape (ns, ns)
Metric cost matrix in the source space
- C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- p : ndarray, shape (ns,)
+ C2 : array-like, shape (nt, nt)
+ Metric cost matrix in the target space
+ p : array-like, shape (ns,)
Distribution in the source space
- q : ndarray, shape (nt,)
+ q : array-like, shape (nt,)
Distribution in the target space
loss_fun : string
Loss function used for the solver either 'square_loss' or 'kl_loss'
@@ -999,21 +1094,20 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
Returns
-------
- T : ndarray, shape (ns, nt)
+ T : array-like, shape (`ns`, `nt`)
Optimal coupling between the two spaces
References
----------
- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ .. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
"""
+ C1, C2, p, q = list_to_array(C1, C2, p, q)
+ nx = get_backend(C1, C2, p, q)
- C1 = np.asarray(C1, dtype=np.float64)
- C2 = np.asarray(C2, dtype=np.float64)
-
- T = np.outer(p, q) # Initialization
+ T = nx.outer(p, q)
constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
@@ -1035,7 +1129,7 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
if cpt % 10 == 0:
# we can speed up the process by checking for the error only all
# the 10th iterations
- err = np.linalg.norm(T - Tprev)
+ err = nx.norm(T - Tprev)
if log:
log['err'].append(err)
@@ -1058,32 +1152,31 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
max_iter=1000, tol=1e-9, verbose=False, log=False):
r"""
- Returns the entropic gromov-wasserstein discrepancy between the two measured similarity matrices
-
- (C1,p) and (C2,q)
+ Returns the entropic gromov-wasserstein discrepancy between the two measured similarity matrices :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
The function solves the following optimization problem:
.. math::
- GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+ GW = \min_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} - \epsilon(H(\mathbf{T}))
Where :
- - C1 : Metric cost matrix in the source space
- - C2 : Metric cost matrix in the target space
- - p : distribution in the source space
- - q : distribution in the target space
- - L : loss function to account for the misfit between the similarity matrices
- - H : entropy
+
+ - :math:`\mathbf{C_1}`: Metric cost matrix in the source space
+ - :math:`\mathbf{C_2}`: Metric cost matrix in the target space
+ - :math:`\mathbf{p}`: distribution in the source space
+ - :math:`\mathbf{q}`: distribution in the target space
+ - `L`: loss function to account for the misfit between the similarity matrices
+ - `H`: entropy
Parameters
----------
- C1 : ndarray, shape (ns, ns)
+ C1 : array-like, shape (ns, ns)
Metric cost matrix in the source space
- C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- p : ndarray, shape (ns,)
+ C2 : array-like, shape (nt, nt)
+ Metric cost matrix in the target space
+ p : array-like, shape (ns,)
Distribution in the source space
- q : ndarray, shape (nt,)
+ q : array-like, shape (nt,)
Distribution in the target space
loss_fun : str
Loss function used for the solver either 'square_loss' or 'kl_loss'
@@ -1105,7 +1198,7 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
References
----------
- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ .. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
@@ -1122,40 +1215,39 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
- max_iter=1000, tol=1e-9, verbose=False, log=False, init_C=None):
+ max_iter=1000, tol=1e-9, verbose=False, log=False, init_C=None, random_state=None):
r"""
- Returns the gromov-wasserstein barycenters of S measured similarity matrices
-
- (Cs)_{s=1}^{s=S}
+ Returns the gromov-wasserstein barycenters of `S` measured similarity matrices :math:`(\mathbf{C}_s)_{1 \leq s \leq S}`
The function solves the following optimization problem:
.. math::
- C = argmin_{C\in R^{NxN}} \sum_s \lambda_s GW(C,C_s,p,p_s)
+ \mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
Where :
- - :math:`C_s` : metric cost matrix
- - :math:`p_s` : distribution
+ - :math:`\mathbf{C}_s`: metric cost matrix
+ - :math:`\mathbf{p}_s`: distribution
Parameters
----------
N : int
Size of the targeted barycenter
- Cs : list of S np.ndarray of shape (ns,ns)
+ Cs : list of S array-like of shape (ns,ns)
Metric cost matrices
- ps : list of S np.ndarray of shape (ns,)
- Sample weights in the S spaces
- p : ndarray, shape(N,)
+ ps : list of S array-like of shape (ns,)
+ Sample weights in the `S` spaces
+ p : array-like, shape(N,)
Weights in the targeted barycenter
lambdas : list of float
- List of the S spaces' weights.
+ List of the `S` spaces' weights.
loss_fun : callable
Tensor-matrix multiplication function based on specific loss function.
update : callable
- function(p,lambdas,T,Cs) that updates C according to a specific Kernel
- with the S Ts couplings calculated at each iteration
+ function(:math:`\mathbf{p}`, lambdas, :math:`\mathbf{T}`, :math:`\mathbf{Cs}`) that updates
+ :math:`\mathbf{C}` according to a specific Kernel with the `S` :math:`\mathbf{T}_s` couplings
+ calculated at each iteration
epsilon : float
Regularization term >0
max_iter : int, optional
@@ -1166,32 +1258,36 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
Print information along iterations.
log : bool, optional
Record log if True.
- init_C : bool | ndarray, shape (N, N)
- Random initial value for the C matrix provided by user.
+ init_C : bool | array-like, shape (N, N)
+ Random initial value for the :math:`\mathbf{C}` matrix provided by user.
+ random_state : int or RandomState instance, optional
+ Fix the seed for reproducibility
Returns
-------
- C : ndarray, shape (N, N)
+ C : array-like, shape (`N`, `N`)
Similarity matrix in the barycenter space (permutated arbitrarily)
References
----------
- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ .. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
"""
+ Cs = list_to_array(*Cs)
+ ps = list_to_array(*ps)
+ p = list_to_array(p)
+ nx = get_backend(*Cs, *ps, p)
S = len(Cs)
- Cs = [np.asarray(Cs[s], dtype=np.float64) for s in range(S)]
- lambdas = np.asarray(lambdas, dtype=np.float64)
-
# Initialization of C : random SPD matrix (if not provided by user)
if init_C is None:
- # XXX use random state
- xalea = np.random.randn(N, 2)
+ generator = check_random_state(random_state)
+ xalea = generator.randn(N, 2)
C = dist(xalea, xalea)
C /= C.max()
+ C = nx.from_numpy(C, type_as=p)
else:
C = init_C
@@ -1214,7 +1310,7 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
if cpt % 10 == 0:
# we can speed up the process by checking for the error only all
# the 10th iterations
- err = np.linalg.norm(C - Cprev)
+ err = nx.norm(C - Cprev)
error.append(err)
if log:
@@ -1232,38 +1328,39 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
- max_iter=1000, tol=1e-9, verbose=False, log=False, init_C=None):
+ max_iter=1000, tol=1e-9, verbose=False, log=False, init_C=None, random_state=None):
r"""
- Returns the gromov-wasserstein barycenters of S measured similarity matrices
-
- (Cs)_{s=1}^{s=S}
+ Returns the gromov-wasserstein barycenters of `S` measured similarity matrices :math:`(\mathbf{C}_s)_{1 \leq s \leq S}`
- The function solves the following optimization problem with block
- coordinate descent:
+ The function solves the following optimization problem with block coordinate descent:
.. math::
- C = argmin_C\in R^NxN \sum_s \lambda_s GW(C,Cs,p,ps)
+
+ \mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
Where :
- - Cs : metric cost matrix
- - ps : distribution
+ - :math:`\mathbf{C}_s`: metric cost matrix
+ - :math:`\mathbf{p}_s`: distribution
Parameters
----------
N : int
Size of the targeted barycenter
- Cs : list of S np.ndarray of shape (ns, ns)
+ Cs : list of S array-like of shape (ns, ns)
Metric cost matrices
- ps : list of S np.ndarray of shape (ns,)
- Sample weights in the S spaces
- p : ndarray, shape (N,)
+ ps : list of S array-like of shape (ns,)
+ Sample weights in the `S` spaces
+ p : array-like, shape (N,)
Weights in the targeted barycenter
lambdas : list of float
- List of the S spaces' weights
- loss_fun : tensor-matrix multiplication function based on specific loss function
- update : function(p,lambdas,T,Cs) that updates C according to a specific Kernel
- with the S Ts couplings calculated at each iteration
+ List of the `S` spaces' weights
+ loss_fun : callable
+ tensor-matrix multiplication function based on specific loss function
+ update : callable
+ function(:math:`\mathbf{p}`, lambdas, :math:`\mathbf{T}`, :math:`\mathbf{Cs}`) that updates
+ :math:`\mathbf{C}` according to a specific Kernel with the `S` :math:`\mathbf{T}_s` couplings
+ calculated at each iteration
max_iter : int, optional
Max number of iterations
tol : float, optional
@@ -1272,32 +1369,37 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
Print information along iterations.
log : bool, optional
Record log if True.
- init_C : bool | ndarray, shape(N,N)
- Random initial value for the C matrix provided by user.
+ init_C : bool | array-like, shape(N,N)
+ Random initial value for the :math:`\mathbf{C}` matrix provided by user.
+ random_state : int or RandomState instance, optional
+ Fix the seed for reproducibility
Returns
-------
- C : ndarray, shape (N, N)
+ C : array-like, shape (`N`, `N`)
Similarity matrix in the barycenter space (permutated arbitrarily)
References
----------
- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ .. [12] Gabriel Peyré, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
"""
- S = len(Cs)
+ Cs = list_to_array(*Cs)
+ ps = list_to_array(*ps)
+ p = list_to_array(p)
+ nx = get_backend(*Cs, *ps, p)
- Cs = [np.asarray(Cs[s], dtype=np.float64) for s in range(S)]
- lambdas = np.asarray(lambdas, dtype=np.float64)
+ S = len(Cs)
# Initialization of C : random SPD matrix (if not provided by user)
if init_C is None:
- # XXX : should use a random state and not use the global seed
- xalea = np.random.randn(N, 2)
+ generator = check_random_state(random_state)
+ xalea = generator.randn(N, 2)
C = dist(xalea, xalea)
C /= C.max()
+ C = nx.from_numpy(C, type_as=p)
else:
C = init_C
@@ -1320,7 +1422,7 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
if cpt % 10 == 0:
# we can speed up the process by checking for the error only all
# the 10th iterations
- err = np.linalg.norm(C - Cprev)
+ err = nx.norm(C - Cprev)
error.append(err)
if log:
@@ -1339,21 +1441,21 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_features=False,
p=None, loss_fun='square_loss', max_iter=100, tol=1e-9,
- verbose=False, log=False, init_C=None, init_X=None):
- """Compute the fgw barycenter as presented eq (5) in [24].
+ verbose=False, log=False, init_C=None, init_X=None, random_state=None):
+ """Compute the fgw barycenter as presented eq (5) in :ref:`[24] <references-fgw-barycenters>`
Parameters
----------
- N : integer
+ N : int
Desired number of samples of the target barycenter
- Ys: list of ndarray, each element has shape (ns,d)
+ Ys: list of array-like, each element has shape (ns,d)
Features of all samples
- Cs : list of ndarray, each element has shape (ns,ns)
+ Cs : list of array-like, each element has shape (ns,ns)
Structure matrices of all samples
- ps : list of ndarray, each element has shape (ns,)
+ ps : list of array-like, each element has shape (ns,)
Masses of all samples.
lambdas : list of float
- List of the S spaces' weights
+ List of the `S` spaces' weights
alpha : float
Alpha parameter for the fgw distance
fixed_structure : bool
@@ -1370,41 +1472,46 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_
Print information along iterations.
log : bool, optional
Record log if True.
- init_C : ndarray, shape (N,N), optional
+ init_C : array-like, shape (N,N), optional
Initialization for the barycenters' structure matrix. If not set
a random init is used.
- init_X : ndarray, shape (N,d), optional
+ init_X : array-like, shape (N,d), optional
Initialization for the barycenters' features. If not set a
random init is used.
+ random_state : int or RandomState instance, optional
+ Fix the seed for reproducibility
Returns
-------
- X : ndarray, shape (N, d)
+ X : array-like, shape (`N`, `d`)
Barycenters' features
- C : ndarray, shape (N, N)
+ C : array-like, shape (`N`, `N`)
Barycenters' structure matrix
- log_: dict
+ log : dict
Only returned when log=True. It contains the keys:
- T : list of (N,ns) transport matrices
- Ms : all distance matrices between the feature of the barycenter and the
- other features dist(X,Ys) shape (N,ns)
+ - :math:`\mathbf{T}`: list of (`N`, `ns`) transport matrices
+ - :math:`(\mathbf{M}_s)_s`: all distance matrices between the feature of the barycenter and the other features :math:`(dist(\mathbf{X}, \mathbf{Y}_s))_s` shape (`N`, `ns`)
+
+
+ .. _references-fgw-barycenters:
References
----------
- .. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
+ .. [24] Vayer Titouan, Chapel Laetitia, Flamary Rémi, Tavenard Romain
and Courty Nicolas
"Optimal Transport for structured data with application on graphs"
International Conference on Machine Learning (ICML). 2019.
"""
+ Cs = list_to_array(*Cs)
+ ps = list_to_array(*ps)
+ Ys = list_to_array(*Ys)
+ p = list_to_array(p)
+ nx = get_backend(*Cs, *Ys, *ps)
+
S = len(Cs)
d = Ys[0].shape[1] # dimension on the node features
if p is None:
- p = np.ones(N) / N
-
- Cs = [np.asarray(Cs[s], dtype=np.float64) for s in range(S)]
- Ys = [np.asarray(Ys[s], dtype=np.float64) for s in range(S)]
-
- lambdas = np.asarray(lambdas, dtype=np.float64)
+ p = nx.ones(N, type_as=Cs[0]) / N
if fixed_structure:
if init_C is None:
@@ -1413,8 +1520,10 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_
C = init_C
else:
if init_C is None:
- xalea = np.random.randn(N, 2)
+ generator = check_random_state(random_state)
+ xalea = generator.randn(N, 2)
C = dist(xalea, xalea)
+ C = nx.from_numpy(C, type_as=ps[0])
else:
C = init_C
@@ -1425,13 +1534,13 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_
X = init_X
else:
if init_X is None:
- X = np.zeros((N, d))
+ X = nx.zeros((N, d), type_as=ps[0])
else:
X = init_X
- T = [np.outer(p, q) for q in ps]
+ T = [nx.outer(p, q) for q in ps]
- Ms = [np.asarray(dist(X, Ys[s]), dtype=np.float64) for s in range(len(Ys))] # Ms is N,ns
+ Ms = [dist(X, Ys[s]) for s in range(len(Ys))]
cpt = 0
err_feature = 1
@@ -1451,20 +1560,19 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_
Ys_temp = [y.T for y in Ys]
X = update_feature_matrix(lambdas, Ys_temp, T, p).T
- Ms = [np.asarray(dist(X, Ys[s]), dtype=np.float64) for s in range(len(Ys))]
+ Ms = [dist(X, Ys[s]) for s in range(len(Ys))]
if not fixed_structure:
if loss_fun == 'square_loss':
T_temp = [t.T for t in T]
- C = update_sructure_matrix(p, lambdas, T_temp, Cs)
+ C = update_structure_matrix(p, lambdas, T_temp, Cs)
T = [fused_gromov_wasserstein(Ms[s], C, Cs[s], p, ps[s], loss_fun, alpha,
numItermax=max_iter, stopThr=1e-5, verbose=verbose) for s in range(S)]
# T is N,ns
- err_feature = np.linalg.norm(X - Xprev.reshape(N, d))
- err_structure = np.linalg.norm(C - Cprev)
-
+ err_feature = nx.norm(X - nx.reshape(Xprev, (N, d)))
+ err_structure = nx.norm(C - Cprev)
if log:
log_['err_feature'].append(err_feature)
log_['err_structure'].append(err_structure)
@@ -1490,64 +1598,80 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_
return X, C
-def update_sructure_matrix(p, lambdas, T, Cs):
- """Updates C according to the L2 Loss kernel with the S Ts couplings.
+def update_structure_matrix(p, lambdas, T, Cs):
+ """Updates :math:`\mathbf{C}` according to the L2 Loss kernel with the `S` :math:`\mathbf{T}_s` couplings.
It is calculated at each iteration
Parameters
----------
- p : ndarray, shape (N,)
+ p : array-like, shape (N,)
Masses in the targeted barycenter.
lambdas : list of float
- List of the S spaces' weights.
- T : list of S ndarray of shape (ns, N)
- The S Ts couplings calculated at each iteration.
- Cs : list of S ndarray, shape (ns, ns)
- Metric cost matrices.
+ List of the `S` spaces' weights.
+ T : list of S array-like of shape (ns, N)
+ The `S` :math:`\mathbf{T}_s` couplings calculated at each iteration.
+ Cs : list of S array-like, shape (ns, ns)
+ Metric cost matrices.
Returns
-------
- C : ndarray, shape (nt, nt)
- Updated C matrix.
+ C : array-like, shape (`nt`, `nt`)
+ Updated :math:`\mathbf{C}` matrix.
"""
- tmpsum = sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s]) for s in range(len(T))])
- ppt = np.outer(p, p)
+ p = list_to_array(p)
+ T = list_to_array(*T)
+ Cs = list_to_array(*Cs)
+ nx = get_backend(*Cs, *T, p)
- return np.divide(tmpsum, ppt)
+ tmpsum = sum([
+ lambdas[s] * nx.dot(
+ nx.dot(T[s].T, Cs[s]),
+ T[s]
+ ) for s in range(len(T))
+ ])
+ ppt = nx.outer(p, p)
+ return tmpsum / ppt
def update_feature_matrix(lambdas, Ys, Ts, p):
- """Updates the feature with respect to the S Ts couplings.
+ """Updates the feature with respect to the `S` :math:`\mathbf{T}_s` couplings.
See "Solving the barycenter problem with Block Coordinate Descent (BCD)"
- in [24] calculated at each iteration
+ in :ref:`[24] <references-update-feature-matrix>` calculated at each iteration
Parameters
----------
- p : ndarray, shape (N,)
+ p : array-like, shape (N,)
masses in the targeted barycenter
lambdas : list of float
- List of the S spaces' weights
- Ts : list of S np.ndarray(ns,N)
- the S Ts couplings calculated at each iteration
- Ys : list of S ndarray, shape(d,ns)
+ List of the `S` spaces' weights
+ Ts : list of S array-like, shape (ns,N)
+ The `S` :math:`\mathbf{T}_s` couplings calculated at each iteration
+ Ys : list of S array-like, shape (d,ns)
The features.
Returns
-------
- X : ndarray, shape (d, N)
+ X : array-like, shape (`d`, `N`)
+
+ .. _references-update-feature-matrix:
References
----------
- .. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
- and Courty Nicolas
+ .. [24] Vayer Titouan, Chapel Laetitia, Flamary Rémi, Tavenard Romain and Courty Nicolas
"Optimal Transport for structured data with application on graphs"
International Conference on Machine Learning (ICML). 2019.
"""
- p = np.array(1. / p).reshape(-1,)
-
- tmpsum = sum([lambdas[s] * np.dot(Ys[s], Ts[s].T) * p[None, :] for s in range(len(Ts))])
-
+ p = list_to_array(p)
+ Ts = list_to_array(*Ts)
+ Ys = list_to_array(*Ys)
+ nx = get_backend(*Ys, *Ts, p)
+
+ p = 1. / p
+ tmpsum = sum([
+ lambdas[s] * nx.dot(Ys[s], Ts[s].T) * p[None, :]
+ for s in range(len(Ts))
+ ])
return tmpsum
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index c6757d1..4e95ccf 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -691,10 +691,8 @@ def emd_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
ot.lp.emd2_1d : EMD for 1d distributions (returns cost instead of the
transportation matrix)
"""
- a = np.asarray(a, dtype=np.float64)
- b = np.asarray(b, dtype=np.float64)
- x_a = np.asarray(x_a, dtype=np.float64)
- x_b = np.asarray(x_b, dtype=np.float64)
+ a, b, x_a, x_b = list_to_array(a, b, x_a, x_b)
+ nx = get_backend(x_a, x_b)
assert (x_a.ndim == 1 or x_a.ndim == 2 and x_a.shape[1] == 1), \
"emd_1d should only be used with monodimensional data"
@@ -702,27 +700,43 @@ def emd_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
"emd_1d should only be used with monodimensional data"
# if empty array given then use uniform distributions
- if a.ndim == 0 or len(a) == 0:
- a = np.ones((x_a.shape[0],), dtype=np.float64) / x_a.shape[0]
- if b.ndim == 0 or len(b) == 0:
- b = np.ones((x_b.shape[0],), dtype=np.float64) / x_b.shape[0]
+ if a is None or a.ndim == 0 or len(a) == 0:
+ a = nx.ones((x_a.shape[0],), type_as=x_a) / x_a.shape[0]
+ if b is None or b.ndim == 0 or len(b) == 0:
+ b = nx.ones((x_b.shape[0],), type_as=x_b) / x_b.shape[0]
# ensure that same mass
- np.testing.assert_almost_equal(a.sum(0),b.sum(0),err_msg='a and b vector must have the same sum')
- b=b*a.sum()/b.sum()
-
- x_a_1d = x_a.reshape((-1,))
- x_b_1d = x_b.reshape((-1,))
- perm_a = np.argsort(x_a_1d)
- perm_b = np.argsort(x_b_1d)
-
- G_sorted, indices, cost = emd_1d_sorted(a[perm_a], b[perm_b],
- x_a_1d[perm_a], x_b_1d[perm_b],
- metric=metric, p=p)
- G = coo_matrix((G_sorted, (perm_a[indices[:, 0]], perm_b[indices[:, 1]])),
- shape=(a.shape[0], b.shape[0]))
+ np.testing.assert_almost_equal(
+ nx.sum(a, axis=0),
+ nx.sum(b, axis=0),
+ err_msg='a and b vector must have the same sum'
+ )
+ b = b * nx.sum(a) / nx.sum(b)
+
+ x_a_1d = nx.reshape(x_a, (-1,))
+ x_b_1d = nx.reshape(x_b, (-1,))
+ perm_a = nx.argsort(x_a_1d)
+ perm_b = nx.argsort(x_b_1d)
+
+ G_sorted, indices, cost = emd_1d_sorted(
+ nx.to_numpy(a[perm_a]),
+ nx.to_numpy(b[perm_b]),
+ nx.to_numpy(x_a_1d[perm_a]),
+ nx.to_numpy(x_b_1d[perm_b]),
+ metric=metric, p=p
+ )
+
+ G = nx.coo_matrix(
+ G_sorted,
+ perm_a[indices[:, 0]],
+ perm_b[indices[:, 1]],
+ shape=(a.shape[0], b.shape[0]),
+ type_as=x_a
+ )
if dense:
- G = G.toarray()
+ G = nx.todense(G)
+ elif str(nx) == "jax":
+ warnings.warn("JAX does not support sparse matrices, converting to dense")
if log:
log = {'cost': cost}
return G, log
diff --git a/ot/optim.py b/ot/optim.py
index 34cbb17..6456c03 100644
--- a/ot/optim.py
+++ b/ot/optim.py
@@ -23,7 +23,7 @@ def line_search_armijo(f, xk, pk, gfk, old_fval,
r"""
Armijo linesearch function that works with matrices
- Find an approximate minimum of :math:`f(x_k + \\alpha \cdot p_k)` that satisfies the
+ Find an approximate minimum of :math:`f(x_k + \alpha \cdot p_k)` that satisfies the
armijo conditions.
Parameters
@@ -129,7 +129,7 @@ def solve_linesearch(cost, G, deltaG, Mi, f_val,
.. _references-solve-linesearch:
References
----------
- .. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain and Courty Nicolas
+ .. [24] Vayer Titouan, Chapel Laetitia, Flamary Rémi, Tavenard Romain and Courty Nicolas
"Optimal Transport for structured data with application on graphs"
International Conference on Machine Learning (ICML). 2019.
"""
@@ -162,13 +162,13 @@ def cg(a, b, M, reg, f, df, G0=None, numItermax=200, numItermaxEmd=100000,
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + \mathrm{reg} \cdot f(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg} \cdot f(\gamma)
- s.t. \ \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} &= \mathbf{a}
- \gamma^T 1= b
+ \gamma^T \mathbf{1} &= \mathbf{b}
- \gamma\geq 0
+ \gamma &\geq 0
where :
- :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
@@ -309,13 +309,13 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10,
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + \mathrm{reg_1}\cdot\Omega(\gamma) + \mathrm{reg_2}\cdot f(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg_1}\cdot\Omega(\gamma) + \mathrm{reg_2}\cdot f(\gamma)
- s.t. \ \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} &= \mathbf{a}
- \gamma^T 1= b
+ \gamma^T \mathbf{1} &= \mathbf{b}
- \gamma\geq 0
+ \gamma &\geq 0
where :
- :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
@@ -452,7 +452,7 @@ def solve_1d_linesearch_quad(a, b, c):
.. math::
- arg\min_{0 \leq x \leq 1} f(x) = ax^{2} + bx + c
+ \mathop{\arg \min}_{0 \leq x \leq 1} f(x) = ax^{2} + bx + c
Parameters
----------
diff --git a/test/test_backend.py b/test/test_backend.py
index 5853282..0f11ace 100644
--- a/test/test_backend.py
+++ b/test/test_backend.py
@@ -207,6 +207,22 @@ def test_empty_backend():
nx.stack([M, M])
with pytest.raises(NotImplementedError):
nx.reshape(M, (5, 3, 2))
+ with pytest.raises(NotImplementedError):
+ nx.coo_matrix(M, M, M)
+ with pytest.raises(NotImplementedError):
+ nx.issparse(M)
+ with pytest.raises(NotImplementedError):
+ nx.tocsr(M)
+ with pytest.raises(NotImplementedError):
+ nx.eliminate_zeros(M)
+ with pytest.raises(NotImplementedError):
+ nx.todense(M)
+ with pytest.raises(NotImplementedError):
+ nx.where(M, M, M)
+ with pytest.raises(NotImplementedError):
+ nx.copy(M)
+ with pytest.raises(NotImplementedError):
+ nx.allclose(M, M)
def test_func_backends(nx):
@@ -216,6 +232,11 @@ def test_func_backends(nx):
v = rnd.randn(3)
val = np.array([1.0])
+ # Sparse tensors test
+ sp_row = np.array([0, 3, 1, 0, 3])
+ sp_col = np.array([0, 3, 1, 2, 2])
+ sp_data = np.array([4, 5, 7, 9, 0])
+
lst_tot = []
for nx in [ot.backend.NumpyBackend(), nx]:
@@ -229,6 +250,10 @@ def test_func_backends(nx):
vb = nx.from_numpy(v)
val = nx.from_numpy(val)
+ sp_rowb = nx.from_numpy(sp_row)
+ sp_colb = nx.from_numpy(sp_col)
+ sp_datab = nx.from_numpy(sp_data)
+
A = nx.set_gradients(val, v, v)
lst_b.append(nx.to_numpy(A))
lst_name.append('set_gradients')
@@ -438,6 +463,37 @@ def test_func_backends(nx):
lst_b.append(nx.to_numpy(A))
lst_name.append('reshape')
+ sp_Mb = nx.coo_matrix(sp_datab, sp_rowb, sp_colb, shape=(4, 4))
+ nx.todense(Mb)
+ lst_b.append(nx.to_numpy(nx.todense(sp_Mb)))
+ lst_name.append('coo_matrix')
+
+ assert not nx.issparse(Mb), 'Assert fail on: issparse (expected False)'
+ assert nx.issparse(sp_Mb) or nx.__name__ == "jax", 'Assert fail on: issparse (expected True)'
+
+ A = nx.tocsr(sp_Mb)
+ lst_b.append(nx.to_numpy(nx.todense(A)))
+ lst_name.append('tocsr')
+
+ A = nx.eliminate_zeros(nx.copy(sp_datab), threshold=5.)
+ lst_b.append(nx.to_numpy(A))
+ lst_name.append('eliminate_zeros (dense)')
+
+ A = nx.eliminate_zeros(sp_Mb)
+ lst_b.append(nx.to_numpy(nx.todense(A)))
+ lst_name.append('eliminate_zeros (sparse)')
+
+ A = nx.where(Mb >= nx.stack([nx.linspace(0, 1, 10)] * 3, axis=1), Mb, 0.0)
+ lst_b.append(nx.to_numpy(A))
+ lst_name.append('where')
+
+ A = nx.copy(Mb)
+ lst_b.append(nx.to_numpy(A))
+ lst_name.append('copy')
+
+ assert nx.allclose(Mb, Mb), 'Assert fail on: allclose (expected True)'
+ assert not nx.allclose(2 * Mb, Mb), 'Assert fail on: allclose (expected False)'
+
lst_tot.append(lst_b)
lst_np = lst_tot[0]
diff --git a/test/test_bregman.py b/test/test_bregman.py
index c1120ba..6923d31 100644
--- a/test/test_bregman.py
+++ b/test/test_bregman.py
@@ -477,8 +477,8 @@ def test_lazy_empirical_sinkhorn(nx):
b = ot.unif(n)
numIterMax = 1000
- X_s = np.reshape(np.arange(n), (n, 1))
- X_t = np.reshape(np.arange(0, n), (n, 1))
+ X_s = np.reshape(np.arange(n, dtype=np.float64), (n, 1))
+ X_t = np.reshape(np.arange(0, n, dtype=np.float64), (n, 1))
M = ot.dist(X_s, X_t)
M_m = ot.dist(X_s, X_t, metric='euclidean')
diff --git a/test/test_gromov.py b/test/test_gromov.py
index 0242d72..509c54d 100644
--- a/test/test_gromov.py
+++ b/test/test_gromov.py
@@ -8,11 +8,12 @@
import numpy as np
import ot
+from ot.backend import NumpyBackend
import pytest
-def test_gromov():
+def test_gromov(nx):
n_samples = 50 # nb samples
mu_s = np.array([0, 0])
@@ -31,37 +32,50 @@ def test_gromov():
C1 /= C1.max()
C2 /= C2.max()
+ C1b = nx.from_numpy(C1)
+ C2b = nx.from_numpy(C2)
+ pb = nx.from_numpy(p)
+ qb = nx.from_numpy(q)
+
G = ot.gromov.gromov_wasserstein(C1, C2, p, q, 'square_loss', verbose=True)
+ Gb = nx.to_numpy(ot.gromov.gromov_wasserstein(C1b, C2b, pb, qb, 'square_loss', verbose=True))
# check constraints
+ np.testing.assert_allclose(G, Gb, atol=1e-06)
np.testing.assert_allclose(
- p, G.sum(1), atol=1e-04) # cf convergence gromov
+ p, Gb.sum(1), atol=1e-04) # cf convergence gromov
np.testing.assert_allclose(
- q, G.sum(0), atol=1e-04) # cf convergence gromov
+ q, Gb.sum(0), atol=1e-04) # cf convergence gromov
Id = (1 / (1.0 * n_samples)) * np.eye(n_samples, n_samples)
- np.testing.assert_allclose(
- G, np.flipud(Id), atol=1e-04)
+ np.testing.assert_allclose(Gb, np.flipud(Id), atol=1e-04)
gw, log = ot.gromov.gromov_wasserstein2(C1, C2, p, q, 'kl_loss', log=True)
+ gwb, logb = ot.gromov.gromov_wasserstein2(C1b, C2b, pb, qb, 'kl_loss', log=True)
gw_val = ot.gromov.gromov_wasserstein2(C1, C2, p, q, 'kl_loss', log=False)
+ gw_valb = ot.gromov.gromov_wasserstein2(C1b, C2b, pb, qb, 'kl_loss', log=False)
G = log['T']
+ Gb = nx.to_numpy(logb['T'])
- np.testing.assert_allclose(gw, 0, atol=1e-1, rtol=1e-1)
+ np.testing.assert_allclose(gw, gwb, atol=1e-06)
+ np.testing.assert_allclose(gwb, 0, atol=1e-1, rtol=1e-1)
- np.testing.assert_allclose(gw, gw_val, atol=1e-1, rtol=1e-1) # cf log=False
+ np.testing.assert_allclose(gw_val, gw_valb, atol=1e-06)
+ np.testing.assert_allclose(gwb, gw_valb, atol=1e-1, rtol=1e-1) # cf log=False
# check constraints
+ np.testing.assert_allclose(G, Gb, atol=1e-06)
np.testing.assert_allclose(
- p, G.sum(1), atol=1e-04) # cf convergence gromov
+ p, Gb.sum(1), atol=1e-04) # cf convergence gromov
np.testing.assert_allclose(
- q, G.sum(0), atol=1e-04) # cf convergence gromov
+ q, Gb.sum(0), atol=1e-04) # cf convergence gromov
-def test_entropic_gromov():
+@pytest.skip_backend("jax", reason="test very slow with jax backend")
+def test_entropic_gromov(nx):
n_samples = 50 # nb samples
mu_s = np.array([0, 0])
@@ -80,30 +94,44 @@ def test_entropic_gromov():
C1 /= C1.max()
C2 /= C2.max()
+ C1b = nx.from_numpy(C1)
+ C2b = nx.from_numpy(C2)
+ pb = nx.from_numpy(p)
+ qb = nx.from_numpy(q)
+
G = ot.gromov.entropic_gromov_wasserstein(
C1, C2, p, q, 'square_loss', epsilon=5e-4, verbose=True)
+ Gb = nx.to_numpy(ot.gromov.entropic_gromov_wasserstein(
+ C1b, C2b, pb, qb, 'square_loss', epsilon=5e-4, verbose=True
+ ))
# check constraints
+ np.testing.assert_allclose(G, Gb, atol=1e-06)
np.testing.assert_allclose(
- p, G.sum(1), atol=1e-04) # cf convergence gromov
+ p, Gb.sum(1), atol=1e-04) # cf convergence gromov
np.testing.assert_allclose(
- q, G.sum(0), atol=1e-04) # cf convergence gromov
+ q, Gb.sum(0), atol=1e-04) # cf convergence gromov
gw, log = ot.gromov.entropic_gromov_wasserstein2(
C1, C2, p, q, 'kl_loss', epsilon=1e-2, log=True)
+ gwb, logb = ot.gromov.entropic_gromov_wasserstein2(
+ C1b, C2b, pb, qb, 'kl_loss', epsilon=1e-2, log=True)
G = log['T']
+ Gb = nx.to_numpy(logb['T'])
+ np.testing.assert_allclose(gw, gwb, atol=1e-06)
np.testing.assert_allclose(gw, 0, atol=1e-1, rtol=1e-1)
# check constraints
+ np.testing.assert_allclose(G, Gb, atol=1e-06)
np.testing.assert_allclose(
- p, G.sum(1), atol=1e-04) # cf convergence gromov
+ p, Gb.sum(1), atol=1e-04) # cf convergence gromov
np.testing.assert_allclose(
- q, G.sum(0), atol=1e-04) # cf convergence gromov
+ q, Gb.sum(0), atol=1e-04) # cf convergence gromov
-def test_pointwise_gromov():
+def test_pointwise_gromov(nx):
n_samples = 50 # nb samples
mu_s = np.array([0, 0])
@@ -122,33 +150,52 @@ def test_pointwise_gromov():
C1 /= C1.max()
C2 /= C2.max()
+ C1b = nx.from_numpy(C1)
+ C2b = nx.from_numpy(C2)
+ pb = nx.from_numpy(p)
+ qb = nx.from_numpy(q)
+
def loss(x, y):
return np.abs(x - y)
+ def lossb(x, y):
+ return nx.abs(x - y)
+
G, log = ot.gromov.pointwise_gromov_wasserstein(
C1, C2, p, q, loss, max_iter=100, log=True, verbose=True, random_state=42)
+ G = NumpyBackend().todense(G)
+ Gb, logb = ot.gromov.pointwise_gromov_wasserstein(
+ C1b, C2b, pb, qb, lossb, max_iter=100, log=True, verbose=True, random_state=42)
+ Gb = nx.to_numpy(nx.todense(Gb))
# check constraints
+ np.testing.assert_allclose(G, Gb, atol=1e-06)
np.testing.assert_allclose(
- p[:, np.newaxis], G.sum(1), atol=1e-04) # cf convergence gromov
+ p, Gb.sum(1), atol=1e-04) # cf convergence gromov
np.testing.assert_allclose(
- q[np.newaxis, :], G.sum(0), atol=1e-04) # cf convergence gromov
+ q, Gb.sum(0), atol=1e-04) # cf convergence gromov
- assert log['gw_dist_estimated'] == 0.0
- assert log['gw_dist_std'] == 0.0
+ np.testing.assert_allclose(logb['gw_dist_estimated'], 0.0, atol=1e-08)
+ np.testing.assert_allclose(logb['gw_dist_std'], 0.0, atol=1e-08)
G, log = ot.gromov.pointwise_gromov_wasserstein(
C1, C2, p, q, loss, max_iter=100, alpha=0.1, log=True, verbose=True, random_state=42)
+ G = NumpyBackend().todense(G)
+ Gb, logb = ot.gromov.pointwise_gromov_wasserstein(
+ C1b, C2b, pb, qb, lossb, max_iter=100, alpha=0.1, log=True, verbose=True, random_state=42)
+ Gb = nx.to_numpy(nx.todense(Gb))
- assert log['gw_dist_estimated'] == 0.10342276348494964
- assert log['gw_dist_std'] == 0.0015952535464736394
+ np.testing.assert_allclose(G, Gb, atol=1e-06)
+ np.testing.assert_allclose(logb['gw_dist_estimated'], 0.10342276348494964, atol=1e-8)
+ np.testing.assert_allclose(logb['gw_dist_std'], 0.0015952535464736394, atol=1e-8)
-def test_sampled_gromov():
+@pytest.skip_backend("jax", reason="test very slow with jax backend")
+def test_sampled_gromov(nx):
n_samples = 50 # nb samples
- mu_s = np.array([0, 0])
- cov_s = np.array([[1, 0], [0, 1]])
+ mu_s = np.array([0, 0], dtype=np.float64)
+ cov_s = np.array([[1, 0], [0, 1]], dtype=np.float64)
xs = ot.datasets.make_2D_samples_gauss(n_samples, mu_s, cov_s, random_state=42)
@@ -163,23 +210,35 @@ def test_sampled_gromov():
C1 /= C1.max()
C2 /= C2.max()
+ C1b = nx.from_numpy(C1)
+ C2b = nx.from_numpy(C2)
+ pb = nx.from_numpy(p)
+ qb = nx.from_numpy(q)
+
def loss(x, y):
return np.abs(x - y)
+ def lossb(x, y):
+ return nx.abs(x - y)
+
G, log = ot.gromov.sampled_gromov_wasserstein(
C1, C2, p, q, loss, max_iter=100, epsilon=1, log=True, verbose=True, random_state=42)
+ Gb, logb = ot.gromov.sampled_gromov_wasserstein(
+ C1b, C2b, pb, qb, lossb, max_iter=100, epsilon=1, log=True, verbose=True, random_state=42)
+ Gb = nx.to_numpy(Gb)
# check constraints
+ np.testing.assert_allclose(G, Gb, atol=1e-06)
np.testing.assert_allclose(
- p, G.sum(1), atol=1e-04) # cf convergence gromov
+ p, Gb.sum(1), atol=1e-04) # cf convergence gromov
np.testing.assert_allclose(
- q, G.sum(0), atol=1e-04) # cf convergence gromov
+ q, Gb.sum(0), atol=1e-04) # cf convergence gromov
- assert log['gw_dist_estimated'] == 0.05679474884977278
- assert log['gw_dist_std'] == 0.0005986592106971995
+ np.testing.assert_allclose(logb['gw_dist_estimated'], 0.05679474884977278, atol=1e-08)
+ np.testing.assert_allclose(logb['gw_dist_std'], 0.0005986592106971995, atol=1e-08)
-def test_gromov_barycenter():
+def test_gromov_barycenter(nx):
ns = 10
nt = 20
@@ -188,26 +247,42 @@ def test_gromov_barycenter():
C1 = ot.dist(Xs)
C2 = ot.dist(Xt)
-
+ p1 = ot.unif(ns)
+ p2 = ot.unif(nt)
n_samples = 3
- Cb = ot.gromov.gromov_barycenters(n_samples, [C1, C2],
- [ot.unif(ns), ot.unif(nt)
- ], ot.unif(n_samples), [.5, .5],
- 'square_loss', # 5e-4,
- max_iter=100, tol=1e-3,
- verbose=True)
- np.testing.assert_allclose(Cb.shape, (n_samples, n_samples))
+ p = ot.unif(n_samples)
- Cb2 = ot.gromov.gromov_barycenters(n_samples, [C1, C2],
- [ot.unif(ns), ot.unif(nt)
- ], ot.unif(n_samples), [.5, .5],
- 'kl_loss', # 5e-4,
- max_iter=100, tol=1e-3)
- np.testing.assert_allclose(Cb2.shape, (n_samples, n_samples))
+ C1b = nx.from_numpy(C1)
+ C2b = nx.from_numpy(C2)
+ p1b = nx.from_numpy(p1)
+ p2b = nx.from_numpy(p2)
+ pb = nx.from_numpy(p)
+
+ Cb = ot.gromov.gromov_barycenters(
+ n_samples, [C1, C2], [p1, p2], p, [.5, .5],
+ 'square_loss', max_iter=100, tol=1e-3, verbose=True, random_state=42
+ )
+ Cbb = nx.to_numpy(ot.gromov.gromov_barycenters(
+ n_samples, [C1b, C2b], [p1b, p2b], pb, [.5, .5],
+ 'square_loss', max_iter=100, tol=1e-3, verbose=True, random_state=42
+ ))
+ np.testing.assert_allclose(Cb, Cbb, atol=1e-06)
+ np.testing.assert_allclose(Cbb.shape, (n_samples, n_samples))
+
+ Cb2 = ot.gromov.gromov_barycenters(
+ n_samples, [C1, C2], [p1, p2], p, [.5, .5],
+ 'kl_loss', max_iter=100, tol=1e-3, random_state=42
+ )
+ Cb2b = nx.to_numpy(ot.gromov.gromov_barycenters(
+ n_samples, [C1b, C2b], [p1b, p2b], pb, [.5, .5],
+ 'kl_loss', max_iter=100, tol=1e-3, random_state=42
+ ))
+ np.testing.assert_allclose(Cb2, Cb2b, atol=1e-06)
+ np.testing.assert_allclose(Cb2b.shape, (n_samples, n_samples))
@pytest.mark.filterwarnings("ignore:divide")
-def test_gromov_entropic_barycenter():
+def test_gromov_entropic_barycenter(nx):
ns = 10
nt = 20
@@ -216,26 +291,41 @@ def test_gromov_entropic_barycenter():
C1 = ot.dist(Xs)
C2 = ot.dist(Xt)
-
+ p1 = ot.unif(ns)
+ p2 = ot.unif(nt)
n_samples = 2
- Cb = ot.gromov.entropic_gromov_barycenters(n_samples, [C1, C2],
- [ot.unif(ns), ot.unif(nt)
- ], ot.unif(n_samples), [.5, .5],
- 'square_loss', 1e-3,
- max_iter=50, tol=1e-3,
- verbose=True)
- np.testing.assert_allclose(Cb.shape, (n_samples, n_samples))
-
- Cb2 = ot.gromov.entropic_gromov_barycenters(n_samples, [C1, C2],
- [ot.unif(ns), ot.unif(nt)
- ], ot.unif(n_samples), [.5, .5],
- 'kl_loss', 1e-3,
- max_iter=100, tol=1e-3)
- np.testing.assert_allclose(Cb2.shape, (n_samples, n_samples))
-
-
-def test_fgw():
+ p = ot.unif(n_samples)
+ C1b = nx.from_numpy(C1)
+ C2b = nx.from_numpy(C2)
+ p1b = nx.from_numpy(p1)
+ p2b = nx.from_numpy(p2)
+ pb = nx.from_numpy(p)
+
+ Cb = ot.gromov.entropic_gromov_barycenters(
+ n_samples, [C1, C2], [p1, p2], p, [.5, .5],
+ 'square_loss', 1e-3, max_iter=50, tol=1e-3, verbose=True, random_state=42
+ )
+ Cbb = nx.to_numpy(ot.gromov.entropic_gromov_barycenters(
+ n_samples, [C1b, C2b], [p1b, p2b], pb, [.5, .5],
+ 'square_loss', 1e-3, max_iter=50, tol=1e-3, verbose=True, random_state=42
+ ))
+ np.testing.assert_allclose(Cb, Cbb, atol=1e-06)
+ np.testing.assert_allclose(Cbb.shape, (n_samples, n_samples))
+
+ Cb2 = ot.gromov.entropic_gromov_barycenters(
+ n_samples, [C1, C2], [p1, p2], p, [.5, .5],
+ 'kl_loss', 1e-3, max_iter=100, tol=1e-3, random_state=42
+ )
+ Cb2b = nx.to_numpy(ot.gromov.entropic_gromov_barycenters(
+ n_samples, [C1b, C2b], [p1b, p2b], pb, [.5, .5],
+ 'kl_loss', 1e-3, max_iter=100, tol=1e-3, random_state=42
+ ))
+ np.testing.assert_allclose(Cb2, Cb2b, atol=1e-06)
+ np.testing.assert_allclose(Cb2b.shape, (n_samples, n_samples))
+
+
+def test_fgw(nx):
n_samples = 50 # nb samples
mu_s = np.array([0, 0])
@@ -260,33 +350,46 @@ def test_fgw():
M = ot.dist(ys, yt)
M /= M.max()
+ Mb = nx.from_numpy(M)
+ C1b = nx.from_numpy(C1)
+ C2b = nx.from_numpy(C2)
+ pb = nx.from_numpy(p)
+ qb = nx.from_numpy(q)
+
G, log = ot.gromov.fused_gromov_wasserstein(M, C1, C2, p, q, 'square_loss', alpha=0.5, log=True)
+ Gb, logb = ot.gromov.fused_gromov_wasserstein(Mb, C1b, C2b, pb, qb, 'square_loss', alpha=0.5, log=True)
+ Gb = nx.to_numpy(Gb)
# check constraints
+ np.testing.assert_allclose(G, Gb, atol=1e-06)
np.testing.assert_allclose(
- p, G.sum(1), atol=1e-04) # cf convergence fgw
+ p, Gb.sum(1), atol=1e-04) # cf convergence fgw
np.testing.assert_allclose(
- q, G.sum(0), atol=1e-04) # cf convergence fgw
+ q, Gb.sum(0), atol=1e-04) # cf convergence fgw
Id = (1 / (1.0 * n_samples)) * np.eye(n_samples, n_samples)
np.testing.assert_allclose(
- G, np.flipud(Id), atol=1e-04) # cf convergence gromov
+ Gb, np.flipud(Id), atol=1e-04) # cf convergence gromov
fgw, log = ot.gromov.fused_gromov_wasserstein2(M, C1, C2, p, q, 'square_loss', alpha=0.5, log=True)
+ fgwb, logb = ot.gromov.fused_gromov_wasserstein2(Mb, C1b, C2b, pb, qb, 'square_loss', alpha=0.5, log=True)
G = log['T']
+ Gb = nx.to_numpy(logb['T'])
- np.testing.assert_allclose(fgw, 0, atol=1e-1, rtol=1e-1)
+ np.testing.assert_allclose(fgw, fgwb, atol=1e-08)
+ np.testing.assert_allclose(fgwb, 0, atol=1e-1, rtol=1e-1)
# check constraints
+ np.testing.assert_allclose(G, Gb, atol=1e-06)
np.testing.assert_allclose(
- p, G.sum(1), atol=1e-04) # cf convergence gromov
+ p, Gb.sum(1), atol=1e-04) # cf convergence gromov
np.testing.assert_allclose(
- q, G.sum(0), atol=1e-04) # cf convergence gromov
+ q, Gb.sum(0), atol=1e-04) # cf convergence gromov
-def test_fgw_barycenter():
+def test_fgw_barycenter(nx):
np.random.seed(42)
ns = 50
@@ -300,30 +403,44 @@ def test_fgw_barycenter():
C1 = ot.dist(Xs)
C2 = ot.dist(Xt)
-
+ p1, p2 = ot.unif(ns), ot.unif(nt)
n_samples = 3
- X, C = ot.gromov.fgw_barycenters(n_samples, [ys, yt], [C1, C2], [ot.unif(ns), ot.unif(nt)], [.5, .5], 0.5,
- fixed_structure=False, fixed_features=False,
- p=ot.unif(n_samples), loss_fun='square_loss',
- max_iter=100, tol=1e-3)
- np.testing.assert_allclose(C.shape, (n_samples, n_samples))
- np.testing.assert_allclose(X.shape, (n_samples, ys.shape[1]))
+ p = ot.unif(n_samples)
+
+ ysb = nx.from_numpy(ys)
+ ytb = nx.from_numpy(yt)
+ C1b = nx.from_numpy(C1)
+ C2b = nx.from_numpy(C2)
+ p1b = nx.from_numpy(p1)
+ p2b = nx.from_numpy(p2)
+ pb = nx.from_numpy(p)
+
+ Xb, Cb = ot.gromov.fgw_barycenters(
+ n_samples, [ysb, ytb], [C1b, C2b], [p1b, p2b], [.5, .5], 0.5, fixed_structure=False,
+ fixed_features=False, p=pb, loss_fun='square_loss', max_iter=100, tol=1e-3, random_state=12345
+ )
xalea = np.random.randn(n_samples, 2)
init_C = ot.dist(xalea, xalea)
-
- X, C = ot.gromov.fgw_barycenters(n_samples, [ys, yt], [C1, C2], ps=[ot.unif(ns), ot.unif(nt)], lambdas=[.5, .5], alpha=0.5,
- fixed_structure=True, init_C=init_C, fixed_features=False,
- p=ot.unif(n_samples), loss_fun='square_loss',
- max_iter=100, tol=1e-3)
- np.testing.assert_allclose(C.shape, (n_samples, n_samples))
- np.testing.assert_allclose(X.shape, (n_samples, ys.shape[1]))
+ init_Cb = nx.from_numpy(init_C)
+
+ Xb, Cb = ot.gromov.fgw_barycenters(
+ n_samples, [ysb, ytb], [C1b, C2b], ps=[p1b, p2b], lambdas=[.5, .5],
+ alpha=0.5, fixed_structure=True, init_C=init_Cb, fixed_features=False,
+ p=pb, loss_fun='square_loss', max_iter=100, tol=1e-3
+ )
+ Xb, Cb = nx.to_numpy(Xb), nx.to_numpy(Cb)
+ np.testing.assert_allclose(Cb.shape, (n_samples, n_samples))
+ np.testing.assert_allclose(Xb.shape, (n_samples, ys.shape[1]))
init_X = np.random.randn(n_samples, ys.shape[1])
-
- X, C, log = ot.gromov.fgw_barycenters(n_samples, [ys, yt], [C1, C2], [ot.unif(ns), ot.unif(nt)], [.5, .5], 0.5,
- fixed_structure=False, fixed_features=True, init_X=init_X,
- p=ot.unif(n_samples), loss_fun='square_loss',
- max_iter=100, tol=1e-3, log=True)
- np.testing.assert_allclose(C.shape, (n_samples, n_samples))
- np.testing.assert_allclose(X.shape, (n_samples, ys.shape[1]))
+ init_Xb = nx.from_numpy(init_X)
+
+ Xb, Cb, logb = ot.gromov.fgw_barycenters(
+ n_samples, [ysb, ytb], [C1b, C2b], [p1b, p2b], [.5, .5], 0.5,
+ fixed_structure=False, fixed_features=True, init_X=init_Xb,
+ p=pb, loss_fun='square_loss', max_iter=100, tol=1e-3, log=True, random_state=98765
+ )
+ Xb, Cb = nx.to_numpy(Xb), nx.to_numpy(Cb)
+ np.testing.assert_allclose(Cb.shape, (n_samples, n_samples))
+ np.testing.assert_allclose(Xb.shape, (n_samples, ys.shape[1]))