#!/usr/bin/env python3 # -*- coding: utf-8 -*- """ Partial OT """ # Author: Laetitia Chapel # License: MIT License import numpy as np from .lp import emd def partial_wasserstein_lagrange(a, b, M, reg_m=None, nb_dummies=1, log=False, **kwargs): r""" Solves the partial optimal transport problem for the quadratic cost and returns the OT plan The function considers the following problem: .. math:: \gamma = \arg\min_\gamma <\gamma,(M-\lambda)>_F s.t. \gamma\geq 0 \\ \gamma 1 \leq a\\ \gamma^T 1 \leq b\\ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\} or equivalently: .. math:: \gamma = \arg\min_\gamma <\gamma,M>_F + \sqrt(\lambda/2) (\|\gamma 1 - a\|_1 + \|\gamma^T 1 - b\|_1) s.t. \gamma\geq 0 \\ where : - M is the metric cost matrix - a and b are source and target unbalanced distributions - :math:`\lambda` is the lagragian cost. Tuning its value allows attaining a given mass to be transported m The formulation of the problem has been proposed in [26]_ Parameters ---------- a : np.ndarray (dim_a,) Unnormalized histogram of dimension dim_a b : np.ndarray (dim_b,) Unnormalized histograms of dimension dim_b M : np.ndarray (dim_a, dim_b) cost matrix for the quadratic cost reg_m : float, optional Lagragian cost log : bool, optional record log if True Returns ------- gamma : (dim_a x dim_b) ndarray Optimal transportation matrix for the given parameters log : dict log dictionary returned only if `log` is `True` Examples -------- >>> import ot >>> a = [.1, .2] >>> b = [.1, .1] >>> M = [[0., 1.], [2., 3.]] >>> np.round(partial_wasserstein_lagrange(a,b,M), 2) array([[0.1, 0. ], [0. , 0.1]]) >>> np.round(partial_wasserstein_lagrange(a,b,M,reg_m=2), 2) array([[0.1, 0. ], [0. , 0. ]]) References ---------- .. [26] Caffarelli, L. A., & McCann, R. J. (2010) Free boundaries in optimal transport and Monge-Ampere obstacle problems. Annals of mathematics, 673-730. See Also -------- ot.partial.partial_wasserstein : Partial Wasserstein with fixed mass """ if np.sum(a) > 1 or np.sum(b) > 1: raise ValueError("Problem infeasible. Check that a and b are in the " "simplex") if reg_m is None: reg_m = np.max(M) + 1 if reg_m < -np.max(M): return np.zeros((len(a), len(b))) eps = 1e-20 M = np.asarray(M, dtype=np.float64) b = np.asarray(b, dtype=np.float64) a = np.asarray(a, dtype=np.float64) M_star = M - reg_m # modified cost matrix # trick to fasten the computation: select only the subset of columns/lines # that can have marginals greater than 0 (that is to say M < 0) idx_x = np.where(np.min(M_star, axis=1) < eps)[0] idx_y = np.where(np.min(M_star, axis=0) < eps)[0] # extend a, b, M with "reservoir" or "dummy" points M_extended = np.zeros((len(idx_x) + nb_dummies, len(idx_y) + nb_dummies)) M_extended[:len(idx_x), :len(idx_y)] = M_star[np.ix_(idx_x, idx_y)] a_extended = np.append(a[idx_x], [(np.sum(a) - np.sum(a[idx_x]) + np.sum(b)) / nb_dummies] * nb_dummies) b_extended = np.append(b[idx_y], [(np.sum(b) - np.sum(b[idx_y]) + np.sum(a)) / nb_dummies] * nb_dummies) gamma_extended, log_emd = emd(a_extended, b_extended, M_extended, log=True, **kwargs) gamma = np.zeros((len(a), len(b))) gamma[np.ix_(idx_x, idx_y)] = gamma_extended[:-nb_dummies, :-nb_dummies] if log_emd['warning'] is not None: raise ValueError("Error in the EMD resolution: try to increase the" " number of dummy points") log_emd['cost'] = np.sum(gamma * M) if log: return gamma, log_emd else: return gamma def partial_wasserstein(a, b, M, m=None, nb_dummies=1, log=False, **kwargs): r""" Solves the partial optimal transport problem for the quadratic cost and returns the OT plan The function considers the following problem: .. math:: \gamma = \arg\min_\gamma <\gamma,M>_F s.t. \gamma\geq 0 \\ \gamma 1 \leq a\\ \gamma^T 1 \leq b\\ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\} where : - M is the metric cost matrix - a and b are source and target unbalanced distributions - m is the amount of mass to be transported Parameters ---------- a : np.ndarray (dim_a,) Unnormalized histogram of dimension dim_a b : np.ndarray (dim_b,) Unnormalized histograms of dimension dim_b M : np.ndarray (dim_a, dim_b) cost matrix for the quadratic cost m : float, optional amount of mass to be transported nb_dummies : int, optional, default:1 number of reservoir points to be added (to avoid numerical instabilities, increase its value if an error is raised) log : bool, optional record log if True Returns ------- :math:`gamma` : (dim_a x dim_b) ndarray Optimal transportation matrix for the given parameters log : dict log dictionary returned only if `log` is `True` Examples -------- >>> import ot >>> a = [.1, .2] >>> b = [.1, .1] >>> M = [[0., 1.], [2., 3.]] >>> np.round(partial_wasserstein(a,b,M), 2) array([[0.1, 0. ], [0. , 0.1]]) >>> np.round(partial_wasserstein(a,b,M,m=0.1), 2) array([[0.1, 0. ], [0. , 0. ]]) References ---------- .. [26] Caffarelli, L. A., & McCann, R. J. (2010) Free boundaries in optimal transport and Monge-Ampere obstacle problems. Annals of mathematics, 673-730. .. [27] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov- Wasserstein with Applications on Positive-Unlabeled Learning". arXiv preprint arXiv:2002.08276. See Also -------- ot.partial.partial_wasserstein_lagrange: Partial Wasserstein with regularization on the marginals ot.partial.entropic_partial_wasserstein: Partial Wasserstein with a entropic regularization parameter """ if m is None: return partial_wasserstein_lagrange(a, b, M, log=log, **kwargs) elif m < 0: raise ValueError("Problem infeasible. Parameter m should be greater" " than 0.") elif m > np.min((np.sum(a), np.sum(b))): raise ValueError("Problem infeasible. Parameter m should lower or" " equal than min(|a|_1, |b|_1).") b_extended = np.append(b, [(np.sum(a) - m) / nb_dummies] * nb_dummies) a_extended = np.append(a, [(np.sum(b) - m) / nb_dummies] * nb_dummies) M_extended = np.zeros((len(a_extended), len(b_extended))) M_extended[-1, -1] = np.max(M) * 1e5 M_extended[:len(a), :len(b)] = M gamma, log_emd = emd(a_extended, b_extended, M_extended, log=True, **kwargs) if log_emd['warning'] is not None: raise ValueError("Error in the EMD resolution: try to increase the" " number of dummy points") log_emd['partial_w_dist'] = np.sum(M * gamma[:len(a), :len(b)]) if log: return gamma[:len(a), :len(b)], log_emd else: return gamma[:len(a), :len(b)] def partial_wasserstein2(a, b, M, m=None, nb_dummies=1, log=False, **kwargs): r""" Solves the partial optimal transport problem for the quadratic cost and returns the partial GW discrepancy The function considers the following problem: .. math:: \gamma = \arg\min_\gamma <\gamma,M>_F s.t. \gamma\geq 0 \\ \gamma 1 \leq a\\ \gamma^T 1 \leq b\\ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\} where : - M is the metric cost matrix - a and b are source and target unbalanced distributions - m is the amount of mass to be transported Parameters ---------- a : np.ndarray (dim_a,) Unnormalized histogram of dimension dim_a b : np.ndarray (dim_b,) Unnormalized histograms of dimension dim_b M : np.ndarray (dim_a, dim_b) cost matrix for the quadratic cost m : float, optional amount of mass to be transported nb_dummies : int, optional, default:1 number of reservoir points to be added (to avoid numerical instabilities, increase its value if an error is raised) log : bool, optional record log if True Returns ------- :math:`gamma` : (dim_a x dim_b) ndarray Optimal transportation matrix for the given parameters log : dict log dictionary returned only if `log` is `True` Examples -------- >>> import ot >>> a=[.1, .2] >>> b=[.1, .1] >>> M=[[0., 1.], [2., 3.]] >>> np.round(partial_wasserstein2(a, b, M), 1) 0.3 >>> np.round(partial_wasserstein2(a,b,M,m=0.1), 1) 0.0 References ---------- .. [26] Caffarelli, L. A., & McCann, R. J. (2010) Free boundaries in optimal transport and Monge-Ampere obstacle problems. Annals of mathematics, 673-730. .. [27] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov- Wasserstein with Applications on Positive-Unlabeled Learning". arXiv preprint arXiv:2002.08276. """ partial_gw, log_w = partial_wasserstein(a, b, M, m, nb_dummies, log=True, **kwargs) log_w['T'] = partial_gw if log: return np.sum(partial_gw * M), log_w else: return np.sum(partial_gw * M) def gwgrad_partial(C1, C2, T): """Compute the GW gradient. Note: we can not use the trick in [12]_ as the marginals may not sum to 1. Parameters ---------- C1: array of shape (n_p,n_p) intra-source (P) cost matrix C2: array of shape (n_u,n_u) intra-target (U) cost matrix T : array of shape(n_p+nb_dummies, n_u) (default: None) Transport matrix Returns ------- numpy.array of shape (n_p+nb_dummies, n_u) gradient References ---------- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon, "Gromov-Wasserstein averaging of kernel and distance matrices." International Conference on Machine Learning (ICML). 2016. """ cC1 = np.dot(C1 ** 2 / 2, np.dot(T, np.ones(C2.shape[0]).reshape(-1, 1))) cC2 = np.dot(np.dot(np.ones(C1.shape[0]).reshape(1, -1), T), C2 ** 2 / 2) constC = cC1 + cC2 A = -np.dot(C1, T).dot(C2.T) tens = constC + A return tens * 2 def gwloss_partial(C1, C2, T): """Compute the GW loss. Parameters ---------- C1: array of shape (n_p,n_p) intra-source (P) cost matrix C2: array of shape (n_u,n_u) intra-target (U) cost matrix T : array of shape(n_p+nb_dummies, n_u) (default: None) Transport matrix Returns ------- GW loss """ g = gwgrad_partial(C1, C2, T) * 0.5 return np.sum(g * T) def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None, thres=1, numItermax=1000, tol=1e-7, log=False, verbose=False, **kwargs): r""" Solves the partial optimal transport problem and returns the OT plan The function considers the following problem: .. math:: \gamma = arg\min_\gamma <\gamma,M>_F s.t. \gamma 1 \leq a \\ \gamma^T 1 \leq b \\ \gamma\geq 0 \\ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\} \\ where : - M is the metric cost matrix - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma) =\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - a and b are the sample weights - m is the amount of mass to be transported The formulation of the problem has been proposed in [27]_ Parameters ---------- C1 : ndarray, shape (ns, ns) Metric cost matrix in the source space C2 : ndarray, shape (nt, nt) Metric costfr matrix in the target space p : ndarray, shape (ns,) Distribution in the source space q : ndarray, shape (nt,) Distribution in the target space m : float, optional Amount of mass to be transported (default: min (|p|_1, |q|_1)) nb_dummies : int, optional Number of dummy points to add (avoid instabilities in the EMD solver) G0 : ndarray, shape (ns, nt), optional Initialisation of the transportation matrix thres : float, optional quantile of the gradient matrix to populate the cost matrix when 0 (default: 1) numItermax : int, optional Max number of iterations log : bool, optional return log if True verbose : bool, optional Print information along iterations armijo : bool, optional If True the steps of the line-search is found via an armijo research. Else closed form is used. If there is convergence issues use False. **kwargs : dict parameters can be directly passed to the emd solver Returns ------- gamma : (dim_a x dim_b) ndarray Optimal transportation matrix for the given parameters log : dict log dictionary returned only if `log` is `True` Examples -------- >>> import ot >>> import scipy as sp >>> a = np.array([0.25] * 4) >>> b = np.array([0.25] * 4) >>> x = np.array([1,2,100,200]).reshape((-1,1)) >>> y = np.array([3,2,98,199]).reshape((-1,1)) >>> C1 = sp.spatial.distance.cdist(x, x) >>> C2 = sp.spatial.distance.cdist(y, y) >>> np.round(partial_gromov_wasserstein(C1, C2, a, b),2) array([[0. , 0.25, 0. , 0. ], [0.25, 0. , 0. , 0. ], [0. , 0. , 0.25, 0. ], [0. , 0. , 0. , 0.25]]) >>> np.round(partial_gromov_wasserstein(C1, C2, a, b, m=0.25),2) array([[0. , 0. , 0. , 0. ], [0. , 0. , 0. , 0. ], [0. , 0. , 0. , 0. ], [0. , 0. , 0. , 0.25]]) References ---------- .. [27] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov- Wasserstein with Applications on Positive-Unlabeled Learning". arXiv preprint arXiv:2002.08276. """ if m is None: m = np.min((np.sum(p), np.sum(q))) elif m < 0: raise ValueError("Problem infeasible. Parameter m should be greater" " than 0.") elif m > np.min((np.sum(p), np.sum(q))): raise ValueError("Problem infeasible. Parameter m should lower or" " equal than min(|a|_1, |b|_1).") if G0 is None: G0 = np.outer(p, q) dim_G_extended = (len(p) + nb_dummies, len(q) + nb_dummies) q_extended = np.append(q, [(np.sum(p) - m) / nb_dummies] * nb_dummies) p_extended = np.append(p, [(np.sum(q) - m) / nb_dummies] * nb_dummies) cpt = 0 err = 1 eps = 1e-20 if log: log = {'err': []} while (err > tol and cpt < numItermax): Gprev = G0 M = gwgrad_partial(C1, C2, G0) M[M < eps] = np.quantile(M, thres) M_emd = np.zeros(dim_G_extended) M_emd[:len(p), :len(q)] = M M_emd[-nb_dummies:, -nb_dummies:] = np.max(M) * 1e5 M_emd = np.asarray(M_emd, dtype=np.float64) Gc, logemd = emd(p_extended, q_extended, M_emd, log=True, **kwargs) if logemd['warning'] is not None: raise ValueError("Error in the EMD resolution: try to increase the" " number of dummy points") G0 = Gc[:len(p), :len(q)] if cpt % 10 == 0: # to speed up the computations err = np.linalg.norm(G0 - Gprev) if log: log['err'].append(err) if verbose: if cpt % 200 == 0: print('{:5s}|{:12s}|{:12s}'.format( 'It.', 'Err', 'Loss') + '\n' + '-' * 31) print('{:5d}|{:8e}|{:8e}'.format(cpt, err, gwloss_partial(C1, C2, G0))) cpt += 1 if log: log['partial_gw_dist'] = gwloss_partial(C1, C2, G0) return G0[:len(p), :len(q)], log else: return G0[:len(p), :len(q)] def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None, thres=0.75, numItermax=1000, tol=1e-7, log=False, verbose=False, **kwargs): r""" Solves the partial optimal transport problem and returns the partial Gromov-Wasserstein discrepancy The function considers the following problem: .. math:: \gamma = arg\min_\gamma <\gamma,M>_F s.t. \gamma 1 \leq a \\ \gamma^T 1 \leq b \\ \gamma\geq 0 \\ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\} \\ where : - M is the metric cost matrix - :math:`\Omega` is the entropic regularization term :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - a and b are the sample weights - m is the amount of mass to be transported The formulation of the problem has been proposed in [27]_ Parameters ---------- C1 : ndarray, shape (ns, ns) Metric cost matrix in the source space C2 : ndarray, shape (nt, nt) Metric costfr matrix in the target space p : ndarray, shape (ns,) Distribution in the source space q : ndarray, shape (nt,) Distribution in the target space m : float, optional Amount of mass to be transported (default: min (|p|_1, |q|_1)) nb_dummies : int, optional Number of dummy points to add (avoid instabilities in the EMD solver) G0 : ndarray, shape (ns, nt), optional Initialisation of the transportation matrix thres : float, optional quantile of the gradient matrix to populate the cost matrix when 0 (default: 1) numItermax : int, optional Max number of iterations log : bool, optional return log if True verbose : bool, optional Print information along iterations **kwargs : dict parameters can be directly passed to the emd solver Returns ------- partial_gw_dist : (dim_a x dim_b) ndarray partial GW discrepancy log : dict log dictionary returned only if `log` is `True` Examples -------- >>> import ot >>> import scipy as sp >>> a = np.array([0.25] * 4) >>> b = np.array([0.25] * 4) >>> x = np.array([1,2,100,200]).reshape((-1,1)) >>> y = np.array([3,2,98,199]).reshape((-1,1)) >>> C1 = sp.spatial.distance.cdist(x, x) >>> C2 = sp.spatial.distance.cdist(y, y) >>> np.round(partial_gromov_wasserstein2(C1, C2, a, b),2) 1.69 >>> np.round(partial_gromov_wasserstein2(C1, C2, a, b, m=0.25),2) 0.0 References ---------- .. [27] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov- Wasserstein with Applications on Positive-Unlabeled Learning". arXiv preprint arXiv:2002.08276. """ partial_gw, log_gw = partial_gromov_wasserstein(C1, C2, p, q, m, nb_dummies, G0, thres, numItermax, tol, True, verbose, **kwargs) log_gw['T'] = partial_gw if log: return log_gw['partial_gw_dist'], log_gw else: return log_gw['partial_gw_dist'] def entropic_partial_wasserstein(a, b, M, reg, m=None, numItermax=1000, stopThr=1e-100, verbose=False, log=False): r""" Solves the partial optimal transport problem and returns the OT plan The function considers the following problem: .. math:: \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) s.t. \gamma 1 \leq a \\ \gamma^T 1 \leq b \\ \gamma\geq 0 \\ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\} \\ where : - M is the metric cost matrix - :math:`\Omega` is the entropic regularization term :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - a and b are the sample weights - m is the amount of mass to be transported The formulation of the problem has been proposed in [3]_ Parameters ---------- a : np.ndarray (dim_a,) Unnormalized histogram of dimension dim_a b : np.ndarray (dim_b,) Unnormalized histograms of dimension dim_b M : np.ndarray (dim_a, dim_b) cost matrix reg : float Regularization term > 0 m : float, optional Amount of mass to be transported numItermax : int, optional Max number of iterations stopThr : float, optional Stop threshold on error (>0) verbose : bool, optional Print information along iterations log : bool, optional record log if True Returns ------- gamma : (dim_a x dim_b) ndarray Optimal transportation matrix for the given parameters log : dict log dictionary returned only if `log` is `True` Examples -------- >>> import ot >>> a = [.1, .2] >>> b = [.1, .1] >>> M = [[0., 1.], [2., 3.]] >>> np.round(entropic_partial_wasserstein(a, b, M, 1, 0.1), 2) array([[0.06, 0.02], [0.01, 0. ]]) References ---------- .. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G. (2015). Iterative Bregman projections for regularized transportation problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138. See Also -------- ot.partial.partial_wasserstein: exact Partial Wasserstein """ a = np.asarray(a, dtype=np.float64) b = np.asarray(b, dtype=np.float64) M = np.asarray(M, dtype=np.float64) dim_a, dim_b = M.shape dx = np.ones(dim_a, dtype=np.float64) dy = np.ones(dim_b, dtype=np.float64) if len(a) == 0: a = np.ones(dim_a, dtype=np.float64) / dim_a if len(b) == 0: b = np.ones(dim_b, dtype=np.float64) / dim_b if m is None: m = np.min((np.sum(a), np.sum(b))) * 1.0 if m < 0: raise ValueError("Problem infeasible. Parameter m should be greater" " than 0.") if m > np.min((np.sum(a), np.sum(b))): raise ValueError("Problem infeasible. Parameter m should lower or" " equal than min(|a|_1, |b|_1).") log_e = {'err': []} # Next 3 lines equivalent to K=np.exp(-M/reg), but faster to compute K = np.empty(M.shape, dtype=M.dtype) np.divide(M, -reg, out=K) np.exp(K, out=K) np.multiply(K, m / np.sum(K), out=K) err, cpt = 1, 0 while (err > stopThr and cpt < numItermax): Kprev = K K1 = np.dot(np.diag(np.minimum(a / np.sum(K, axis=1), dx)), K) K2 = np.dot(K1, np.diag(np.minimum(b / np.sum(K1, axis=0), dy))) K = K2 * (m / np.sum(K2)) if np.any(np.isnan(K)) or np.any(np.isinf(K)): print('Warning: numerical errors at iteration', cpt) break if cpt % 10 == 0: err = np.linalg.norm(Kprev - K) if log: log_e['err'].append(err) if verbose: if cpt % 200 == 0: print( '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 11) print('{:5d}|{:8e}|'.format(cpt, err)) cpt = cpt + 1 log_e['partial_w_dist'] = np.sum(M * K) if log: return K, log_e else: return K def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None, numItermax=1000, tol=1e-7, log=False, verbose=False): r""" Returns the partial Gromov-Wasserstein transport between (C1,p) and (C2,q) The function solves the following optimization problem: .. math:: GW = \arg\min_{\gamma} \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})\cdot \gamma_{i,j}\cdot\gamma_{k,l} + reg\cdot\Omega(\gamma) s.t. \gamma\geq 0 \\ \gamma 1 \leq a\\ \gamma^T 1 \leq b\\ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\} where : - C1 is the metric cost matrix in the source space - C2 is the metric cost matrix in the target space - p and q are the sample weights - L : quadratic loss function - :math:`\Omega` is the entropic regularization term :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - m is the amount of mass to be transported The formulation of the problem has been proposed in [12]. Parameters ---------- C1 : ndarray, shape (ns, ns) Metric cost matrix in the source space C2 : ndarray, shape (nt, nt) Metric costfr matrix in the target space p : ndarray, shape (ns,) Distribution in the source space q : ndarray, shape (nt,) Distribution in the target space reg: float entropic regularization parameter m : float, optional Amount of mass to be transported (default: min (|p|_1, |q|_1)) G0 : ndarray, shape (ns, nt), optional Initialisation of the transportation matrix numItermax : int, optional Max number of iterations log : bool, optional return log if True verbose : bool, optional Print information along iterations Examples -------- >>> import ot >>> import scipy as sp >>> a = np.array([0.25] * 4) >>> b = np.array([0.25] * 4) >>> x = np.array([1,2,100,200]).reshape((-1,1)) >>> y = np.array([3,2,98,199]).reshape((-1,1)) >>> C1 = sp.spatial.distance.cdist(x, x) >>> C2 = sp.spatial.distance.cdist(y, y) >>> np.round(entropic_partial_gromov_wasserstein(C1, C2, a, b,50), 2) array([[0.12, 0.13, 0. , 0. ], [0.13, 0.12, 0. , 0. ], [0. , 0. , 0.25, 0. ], [0. , 0. , 0. , 0.25]]) >>> np.round(entropic_partial_gromov_wasserstein(C1, C2, a, b, 50, m=0.25), 2) array([[0.02, 0.03, 0. , 0.03], [0.03, 0.03, 0. , 0.03], [0. , 0. , 0.03, 0. ], [0.02, 0.02, 0. , 0.03]]) Returns ------- :math: `gamma` : (dim_a x dim_b) ndarray Optimal transportation matrix for the given parameters log : dict log dictionary returned only if `log` is `True` References ---------- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon, "Gromov-Wasserstein averaging of kernel and distance matrices." International Conference on Machine Learning (ICML). 2016. See Also -------- ot.partial.partial_gromov_wasserstein: exact Partial Gromov-Wasserstein """ if G0 is None: G0 = np.outer(p, q) if m is None: m = np.min((np.sum(p), np.sum(q))) elif m < 0: raise ValueError("Problem infeasible. Parameter m should be greater" " than 0.") elif m > np.min((np.sum(p), np.sum(q))): raise ValueError("Problem infeasible. Parameter m should lower or" " equal than min(|a|_1, |b|_1).") cpt = 0 err = 1 loge = {'err': []} while (err > tol and cpt < numItermax): Gprev = G0 M_entr = gwgrad_partial(C1, C2, G0) G0 = entropic_partial_wasserstein(p, q, M_entr, reg, m) if cpt % 10 == 0: # to speed up the computations err = np.linalg.norm(G0 - Gprev) if log: loge['err'].append(err) if verbose: if cpt % 200 == 0: print('{:5s}|{:12s}|{:12s}'.format( 'It.', 'Err', 'Loss') + '\n' + '-' * 31) print('{:5d}|{:8e}|{:8e}'.format(cpt, err, gwloss_partial(C1, C2, G0))) cpt += 1 if log: loge['partial_gw_dist'] = gwloss_partial(C1, C2, G0) return G0, loge else: return G0 def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None, numItermax=1000, tol=1e-7, log=False, verbose=False): r""" Returns the partial Gromov-Wasserstein discrepancy between (C1,p) and (C2,q) The function solves the following optimization problem: .. math:: GW = \arg\min_{\gamma} \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})\cdot \gamma_{i,j}\cdot\gamma_{k,l} + reg\cdot\Omega(\gamma) s.t. \gamma\geq 0 \\ \gamma 1 \leq a\\ \gamma^T 1 \leq b\\ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\} where : - C1 is the metric cost matrix in the source space - C2 is the metric cost matrix in the target space - p and q are the sample weights - L : quadratic loss function - :math:`\Omega` is the entropic regularization term :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - m is the amount of mass to be transported The formulation of the problem has been proposed in [12]. Parameters ---------- C1 : ndarray, shape (ns, ns) Metric cost matrix in the source space C2 : ndarray, shape (nt, nt) Metric costfr matrix in the target space p : ndarray, shape (ns,) Distribution in the source space q : ndarray, shape (nt,) Distribution in the target space reg: float entropic regularization parameter m : float, optional Amount of mass to be transported (default: min (|p|_1, |q|_1)) G0 : ndarray, shape (ns, nt), optional Initialisation of the transportation matrix numItermax : int, optional Max number of iterations log : bool, optional return log if True verbose : bool, optional Print information along iterations Returns ------- partial_gw_dist: float Gromov-Wasserstein distance log : dict log dictionary returned only if `log` is `True` Examples -------- >>> import ot >>> import scipy as sp >>> a = np.array([0.25] * 4) >>> b = np.array([0.25] * 4) >>> x = np.array([1,2,100,200]).reshape((-1,1)) >>> y = np.array([3,2,98,199]).reshape((-1,1)) >>> C1 = sp.spatial.distance.cdist(x, x) >>> C2 = sp.spatial.distance.cdist(y, y) >>> np.round(entropic_partial_gromov_wasserstein2(C1, C2, a, b,50), 2) 1.87 References ---------- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon, "Gromov-Wasserstein averaging of kernel and distance matrices." International Conference on Machine Learning (ICML). 2016. """ partial_gw, log_gw = entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m, G0, numItermax, tol, True, verbose) log_gw['T'] = partial_gw if log: return log_gw['partial_gw_dist'], log_gw else: return log_gw['partial_gw_dist']