diff options
Diffstat (limited to 'ot')
-rw-r--r-- | ot/backend.py | 2 | ||||
-rw-r--r-- | ot/bregman.py | 14 | ||||
-rw-r--r-- | ot/coot.py | 9 | ||||
-rw-r--r-- | ot/da.py | 10 | ||||
-rw-r--r-- | ot/datasets.py | 2 | ||||
-rw-r--r-- | ot/dr.py | 8 | ||||
-rw-r--r-- | ot/gromov/_bregman.py | 6 | ||||
-rw-r--r-- | ot/gromov/_dictionary.py | 10 | ||||
-rw-r--r-- | ot/gromov/_gw.py | 17 | ||||
-rw-r--r-- | ot/gromov/_semirelaxed.py | 15 | ||||
-rw-r--r-- | ot/gromov/_utils.py | 8 | ||||
-rw-r--r-- | ot/lp/__init__.py | 11 | ||||
-rw-r--r-- | ot/lp/cvx.py | 2 | ||||
-rw-r--r-- | ot/lp/solver_1d.py | 12 | ||||
-rw-r--r-- | ot/optim.py | 9 | ||||
-rwxr-xr-x | ot/partial.py | 26 | ||||
-rw-r--r-- | ot/plot.py | 2 | ||||
-rw-r--r-- | ot/regpath.py | 2 | ||||
-rw-r--r-- | ot/sliced.py | 2 | ||||
-rw-r--r-- | ot/unbalanced.py | 9 |
20 files changed, 98 insertions, 78 deletions
diff --git a/ot/backend.py b/ot/backend.py index 0dd6fb8..a82c448 100644 --- a/ot/backend.py +++ b/ot/backend.py @@ -27,7 +27,7 @@ Examples np_config.enable_numpy_behavior() Performance --------- +----------- - CPU: Intel(R) Xeon(R) Gold 6248 CPU @ 2.50GHz - GPU: Tesla V100-SXM2-32GB diff --git a/ot/bregman.py b/ot/bregman.py index 20bef7e..4503ffc 100644 --- a/ot/bregman.py +++ b/ot/bregman.py @@ -150,7 +150,7 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, stopThr=1e-9, ot.bregman.sinkhorn_knopp : Classic Sinkhorn :ref:`[2] <references-sinkhorn>` ot.bregman.sinkhorn_stabilized: Stabilized sinkhorn :ref:`[9] <references-sinkhorn>` :ref:`[10] <references-sinkhorn>` - ot.bregman.sinkhorn_epsilon_scaling: Sinkhorn with epslilon scaling + ot.bregman.sinkhorn_epsilon_scaling: Sinkhorn with epsilon scaling :ref:`[9] <references-sinkhorn>` :ref:`[10] <references-sinkhorn>` """ @@ -384,6 +384,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, \gamma^T \mathbf{1} &= \mathbf{b} \gamma &\geq 0 + where : - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix @@ -572,6 +573,7 @@ def sinkhorn_log(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, \gamma^T \mathbf{1} &= \mathbf{b} \gamma &\geq 0 + where : - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix @@ -784,6 +786,7 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, \gamma^T \mathbf{1} &= \mathbf{b} \gamma &\geq 0 + where : - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix @@ -950,6 +953,7 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9, \gamma^T \mathbf{1} &= \mathbf{b} \gamma &\geq 0 + where : - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix @@ -2657,7 +2661,7 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000, ---------- .. [4] S. Nakhostin, N. Courty, R. Flamary, D. Tuia, T. Corpetti, - Supervised planetary unmixing with optimal transport, Whorkshop + Supervised planetary unmixing with optimal transport, Workshop on Hyperspectral Image and Signal Processing : Evolution in Remote Sensing (WHISPERS), 2016. """ @@ -2908,6 +2912,7 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', \gamma^T \mathbf{1} &= \mathbf{b} \gamma &\geq 0 + where : - :math:`\mathbf{M}` is the (`n_samples_a`, `n_samples_b`) metric cost matrix @@ -3104,6 +3109,7 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', \gamma^T \mathbf{1} &= \mathbf{b} \gamma &\geq 0 + where : - :math:`\mathbf{M}` is the (`n_samples_a`, `n_samples_b`) metric cost matrix @@ -3257,7 +3263,6 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli sinkhorn divergence :math:`S`: .. math:: - W &= \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F + \mathrm{reg} \cdot\Omega(\gamma) @@ -3287,6 +3292,7 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli \gamma_b^T \mathbf{1} &= \mathbf{b} \gamma_b &\geq 0 + where : - :math:`\mathbf{M}` (resp. :math:`\mathbf{M_a}`, :math:`\mathbf{M_b}`) @@ -3352,7 +3358,7 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli ---------- .. [23] Aude Genevay, Gabriel Peyré, Marco Cuturi, Learning Generative Models with Sinkhorn Divergences, Proceedings of the Twenty-First - International Conference on Artficial Intelligence and Statistics, + International Conference on Artificial Intelligence and Statistics, (AISTATS) 21, 2018 ''' X_s, X_t = list_to_array(X_s, X_t) @@ -74,7 +74,7 @@ def co_optimal_transport(X, Y, wx_samp=None, wx_feat=None, wy_samp=None, wy_feat Sinkhorn solver. If epsilon is scalar, then the same epsilon is applied to both regularization of sample and feature couplings. alpha : scalar or indexable object of length 2, float or int, optional (default = 0) - Coeffficient parameter of linear terms with respect to the sample and feature couplings. + Coefficient parameter of linear terms with respect to the sample and feature couplings. If alpha is scalar, then the same alpha is applied to both linear terms. M_samp : (n_sample_x, n_sample_y), float, optional (default = None) Sample matrix with respect to the linear term on sample coupling. @@ -295,7 +295,7 @@ def co_optimal_transport2(X, Y, wx_samp=None, wx_feat=None, wy_samp=None, wy_fea + \varepsilon_1 \mathbf{KL}(\mathbf{P} | \mathbf{w}_{xs} \mathbf{w}_{ys}^T) + \varepsilon_2 \mathbf{KL}(\mathbf{Q} | \mathbf{w}_{xf} \mathbf{w}_{yf}^T) - Where : + where : - :math:`\mathbf{X}`: Data matrix in the source space - :math:`\mathbf{Y}`: Data matrix in the target space @@ -333,7 +333,7 @@ def co_optimal_transport2(X, Y, wx_samp=None, wx_feat=None, wy_samp=None, wy_fea Sinkhorn solver. If epsilon is scalar, then the same epsilon is applied to both regularization of sample and feature couplings. alpha : scalar or indexable object of length 2, float or int, optional (default = 0) - Coeffficient parameter of linear terms with respect to the sample and feature couplings. + Coefficient parameter of linear terms with respect to the sample and feature couplings. If alpha is scalar, then the same alpha is applied to both linear terms. M_samp : (n_sample_x, n_sample_y), float, optional (default = None) Sample matrix with respect to the linear term on sample coupling. @@ -345,7 +345,6 @@ def co_optimal_transport2(X, Y, wx_samp=None, wx_feat=None, wy_samp=None, wy_fea tuples of 2 vectors of size (n_sample_x, n_sample_y) and (n_feature_x, n_feature_y). Initialization of sample and feature dual vectors if using Sinkhorn algorithm. Zero vectors by default. - - "pi_sample" and "pi_feature" whose values are matrices of size (n_sample_x, n_sample_y) and (n_feature_x, n_feature_y). Initialization of sample and feature couplings. @@ -382,7 +381,7 @@ def co_optimal_transport2(X, Y, wx_samp=None, wx_feat=None, wy_samp=None, wy_fea float CO-Optimal Transport distance. dict - Contains logged informations from :any:`co_optimal_transport` solver. + Contains logged information from :any:`co_optimal_transport` solver. Only returned if `log` parameter is True References @@ -28,7 +28,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, numInnerItermax=200, stopInnerThr=1e-9, verbose=False, log=False): r""" - Solve the entropic regularization optimal transport problem with nonconvex + Solve the entropic regularization optimal transport problem with non-convex group lasso regularization The function solves the following optimization problem: @@ -172,13 +172,13 @@ def sinkhorn_l1l2_gl(a, labels_a, b, M, reg, eta=0.1, numItermax=10, - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - :math:`\Omega_g` is the group lasso regulaization term + - :math:`\Omega_g` is the group lasso regularization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^2` where :math:`\mathcal{I}_c` are the index of samples from class `c` in the source domain. - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1) - The algorithm used for solving the problem is the generalised conditional + The algorithm used for solving the problem is the generalized conditional gradient as proposed in :ref:`[5, 7] <references-sinkhorn-l1l2-gl>`. @@ -296,7 +296,7 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False, material of :ref:`[8] <references-joint-OT-mapping-linear>`) using the bias optional argument. The algorithm used for solving the problem is the block coordinate - descent that alternates between updates of :math:`\mathbf{G}` (using conditionnal gradient) + descent that alternates between updates of :math:`\mathbf{G}` (using conditional gradient) and the update of :math:`\mathbf{L}` using a classical least square solver. @@ -494,7 +494,7 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian', material of :ref:`[8] <references-joint-OT-mapping-kernel>`) using the bias optional argument. The algorithm used for solving the problem is the block coordinate - descent that alternates between updates of :math:`\mathbf{G}` (using conditionnal gradient) + descent that alternates between updates of :math:`\mathbf{G}` (using conditional gradient) and the update of :math:`\mathbf{L}` using a classical kernel least square solver. diff --git a/ot/datasets.py b/ot/datasets.py index a839074..3d633f4 100644 --- a/ot/datasets.py +++ b/ot/datasets.py @@ -22,7 +22,7 @@ def make_1D_gauss(n, m, s): m : float mean value of the gaussian distribution s : float - standard deviaton of the gaussian distribution + standard deviation of the gaussian distribution Returns ------- @@ -5,7 +5,7 @@ Dimension reduction with OT .. warning:: Note that by default the module is not imported in :mod:`ot`. In order to - use it you need to explicitely import :mod:`ot.dr` + use it you need to explicitly import :mod:`ot.dr` """ @@ -83,7 +83,7 @@ def fda(X, y, p=2, reg=1e-16): y : ndarray, shape (n,) Labels for training samples. p : int, optional - Size of dimensionnality reduction. + Size of dimensionality reduction. reg : float, optional Regularization term >0 (ridge regularization) @@ -164,7 +164,7 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, sinkhorn_method='sinkhorn', maxiter y : ndarray, shape (n,) Labels for training samples. p : int, optional - Size of dimensionnality reduction. + Size of dimensionality reduction. reg : float, optional Regularization term >0 (entropic regularization) solver : None | str, optional @@ -175,7 +175,7 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, sinkhorn_method='sinkhorn', maxiter P0 : ndarray, shape (d, p) Initial starting point for projection. normalize : bool, optional - Normalise the Wasserstaiun distance by the average distance on P0 (default : False) + Normalize the Wasserstaiun distance by the average distance on P0 (default : False) verbose : int, optional Print information along iterations. diff --git a/ot/gromov/_bregman.py b/ot/gromov/_bregman.py index b0cccfb..aa25f1f 100644 --- a/ot/gromov/_bregman.py +++ b/ot/gromov/_bregman.py @@ -69,7 +69,7 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon, symmetric=None, symmetric : bool, optional Either C1 and C2 are to be assumed symmetric or not. If let to its default None value, a symmetry test will be conducted. - Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric). + Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric). G0: array-like, shape (ns,nt), optional If None the initial transport plan of the solver is pq^T. Otherwise G0 must satisfy marginal constraints and will be used as initial transport of the solver. @@ -152,7 +152,7 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon, symmetric=None, def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon, symmetric=None, G0=None, max_iter=1000, tol=1e-9, verbose=False, log=False): r""" - Returns the entropic gromov-wasserstein discrepancy between the two measured similarity matrices :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` + Returns the entropic Gromov-Wasserstein discrepancy between the two measured similarity matrices :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` The function solves the following optimization problem: @@ -194,7 +194,7 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon, symmetric=None symmetric : bool, optional Either C1 and C2 are to be assumed symmetric or not. If let to its default None value, a symmetry test will be conducted. - Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric). + Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric). G0: array-like, shape (ns,nt), optional If None the initial transport plan of the solver is pq^T. Otherwise G0 must satisfy marginal constraints and will be used as initial transport of the solver. diff --git a/ot/gromov/_dictionary.py b/ot/gromov/_dictionary.py index 5b32671..0d618d1 100644 --- a/ot/gromov/_dictionary.py +++ b/ot/gromov/_dictionary.py @@ -148,7 +148,7 @@ def gromov_wasserstein_dictionary_learning(Cs, D, nt, reg=0., ps=None, q=None, e Ts = [None] * batch_size for batch_idx, C_idx in enumerate(batch): - # BCD solver for Gromov-Wassersteisn linear unmixing used independently on each structure of the sampled batch + # BCD solver for Gromov-Wasserstein linear unmixing used independently on each structure of the sampled batch unmixings[batch_idx], Cs_embedded[batch_idx], Ts[batch_idx], current_loss = gromov_wasserstein_linear_unmixing( Cs[C_idx], Cdict, reg=reg, p=ps[C_idx], q=q, tol_outer=tol_outer, tol_inner=tol_inner, max_iter_outer=max_iter_outer, max_iter_inner=max_iter_inner, symmetric=symmetric, **kwargs @@ -252,7 +252,7 @@ def gromov_wasserstein_linear_unmixing(C, Cdict, reg=0., p=None, q=None, tol_out Returns ------- w: array-like, shape (D,) - gromov-wasserstein linear unmixing of :math:`(\mathbf{C},\mathbf{p})` onto the span of the dictionary. + Gromov-Wasserstein linear unmixing of :math:`(\mathbf{C},\mathbf{p})` onto the span of the dictionary. Cembedded: array-like, shape (nt,nt) embedded structure of :math:`(\mathbf{C},\mathbf{p})` onto the dictionary, :math:`\sum_d w_d\mathbf{C_{dict}[d]}`. T: array-like (ns, nt) @@ -559,7 +559,7 @@ def fused_gromov_wasserstein_dictionary_learning(Cs, Ys, D, nt, alpha, reg=0., p Feature matrices composing the dictionary. The dictionary leading to the best loss over an epoch is saved and returned. log: dict - If use_log is True, contains loss evolutions by batches and epoches. + If use_log is True, contains loss evolutions by batches and epochs. References ------- .. [38] C. Vincent-Cuaz, T. Vayer, R. Flamary, M. Corneli, N. Courty, Online @@ -634,7 +634,7 @@ def fused_gromov_wasserstein_dictionary_learning(Cs, Ys, D, nt, alpha, reg=0., p Ts = [None] * batch_size for batch_idx, C_idx in enumerate(batch): - # BCD solver for Gromov-Wassersteisn linear unmixing used independently on each structure of the sampled batch + # BCD solver for Gromov-Wasserstein linear unmixing used independently on each structure of the sampled batch unmixings[batch_idx], Cs_embedded[batch_idx], Ys_embedded[batch_idx], Ts[batch_idx], current_loss = fused_gromov_wasserstein_linear_unmixing( Cs[C_idx], Ys[C_idx], Cdict, Ydict, alpha, reg=reg, p=ps[C_idx], q=q, tol_outer=tol_outer, tol_inner=tol_inner, max_iter_outer=max_iter_outer, max_iter_inner=max_iter_inner, symmetric=symmetric, **kwargs @@ -736,7 +736,7 @@ def fused_gromov_wasserstein_linear_unmixing(C, Y, Cdict, Ydict, alpha, reg=0., Returns ------- w: array-like, shape (D,) - fused gromov-wasserstein linear unmixing of (C,Y,p) onto the span of the dictionary. + fused Gromov-Wasserstein linear unmixing of (C,Y,p) onto the span of the dictionary. Cembedded: array-like, shape (nt,nt) embedded structure of :math:`(\mathbf{C},\mathbf{Y}, \mathbf{p})` onto the dictionary, :math:`\sum_d w_d\mathbf{C_{dict}[d]}`. Yembedded: array-like, shape (nt,d) diff --git a/ot/gromov/_gw.py b/ot/gromov/_gw.py index bc4719d..cdfa9a3 100644 --- a/ot/gromov/_gw.py +++ b/ot/gromov/_gw.py @@ -26,7 +26,7 @@ from ._utils import update_square_loss, update_kl_loss def gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', symmetric=None, log=False, armijo=False, G0=None, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs): r""" - Returns the gromov-wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` + Returns the Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` The function solves the following optimization problem: @@ -39,6 +39,7 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', symmetric=None, log \mathbf{\gamma}^T \mathbf{1} &= \mathbf{q} \mathbf{\gamma} &\geq 0 + Where : - :math:`\mathbf{C_1}`: Metric cost matrix in the source space @@ -68,7 +69,7 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', symmetric=None, log symmetric : bool, optional Either C1 and C2 are to be assumed symmetric or not. If let to its default None value, a symmetry test will be conducted. - Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric). + Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric). verbose : bool, optional Print information along iterations log : bool, optional @@ -170,7 +171,7 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun='square_loss', symmetric=None, log def gromov_wasserstein2(C1, C2, p, q, loss_fun='square_loss', symmetric=None, log=False, armijo=False, G0=None, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs): r""" - Returns the gromov-wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` + Returns the Gromov-Wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})` The function solves the following optimization problem: @@ -183,6 +184,7 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun='square_loss', symmetric=None, lo \mathbf{\gamma}^T \mathbf{1} &= \mathbf{q} \mathbf{\gamma} &\geq 0 + Where : - :math:`\mathbf{C_1}`: Metric cost matrix in the source space @@ -216,7 +218,7 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun='square_loss', symmetric=None, lo symmetric : bool, optional Either C1 and C2 are to be assumed symmetric or not. If let to its default None value, a symmetry test will be conducted. - Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric). + Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric). verbose : bool, optional Print information along iterations log : bool, optional @@ -241,7 +243,7 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun='square_loss', symmetric=None, lo gw_dist : float Gromov-Wasserstein distance log : dict - convergence information and Coupling marix + convergence information and Coupling matrix References ---------- @@ -310,6 +312,7 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', symmetric= which can lead to copy overhead on GPU arrays. .. note:: All computations in the conjugate gradient solver are done with numpy to limit memory overhead. + The algorithm used for solving the problem is conditional gradient as discussed in :ref:`[24] <references-fused-gromov-wasserstein>` Parameters @@ -329,7 +332,7 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', symmetric= symmetric : bool, optional Either C1 and C2 are to be assumed symmetric or not. If let to its default None value, a symmetry test will be conducted. - Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric). + Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric). alpha : float, optional Trade-off parameter (0 < alpha < 1) armijo : bool, optional @@ -503,7 +506,7 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', symmetric Returns ------- fgw-distance : float - Fused gromov wasserstein distance for the given parameters. + Fused Gromov-Wasserstein distance for the given parameters. log : dict Log dictionary return only if log==True in parameters. diff --git a/ot/gromov/_semirelaxed.py b/ot/gromov/_semirelaxed.py index 638bb1c..cb2bf28 100644 --- a/ot/gromov/_semirelaxed.py +++ b/ot/gromov/_semirelaxed.py @@ -21,7 +21,7 @@ from ._utils import init_matrix_semirelaxed, gwloss, gwggrad def semirelaxed_gromov_wasserstein(C1, C2, p, loss_fun='square_loss', symmetric=None, log=False, G0=None, max_iter=1e4, tol_rel=1e-9, tol_abs=1e-9, **kwargs): r""" - Returns the semi-relaxed gromov-wasserstein divergence transport from :math:`(\mathbf{C_1}, \mathbf{p})` to :math:`\mathbf{C_2}` + Returns the semi-relaxed Gromov-Wasserstein divergence transport from :math:`(\mathbf{C_1}, \mathbf{p})` to :math:`\mathbf{C_2}` The function solves the following optimization problem: @@ -32,6 +32,7 @@ def semirelaxed_gromov_wasserstein(C1, C2, p, loss_fun='square_loss', symmetric= s.t. \ \mathbf{\gamma} \mathbf{1} &= \mathbf{p} \mathbf{\gamma} &\geq 0 + Where : - :math:`\mathbf{C_1}`: Metric cost matrix in the source space @@ -58,7 +59,7 @@ def semirelaxed_gromov_wasserstein(C1, C2, p, loss_fun='square_loss', symmetric= symmetric : bool, optional Either C1 and C2 are to be assumed symmetric or not. If let to its default None value, a symmetry test will be conducted. - Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric). + Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric). verbose : bool, optional Print information along iterations log : bool, optional @@ -156,6 +157,7 @@ def semirelaxed_gromov_wasserstein2(C1, C2, p, loss_fun='square_loss', symmetric s.t. \ \mathbf{\gamma} \mathbf{1} &= \mathbf{p} \mathbf{\gamma} &\geq 0 + Where : - :math:`\mathbf{C_1}`: Metric cost matrix in the source space @@ -166,6 +168,7 @@ def semirelaxed_gromov_wasserstein2(C1, C2, p, loss_fun='square_loss', symmetric Note that when using backends, this loss function is differentiable wrt the matrices (C1, C2) but not yet for the weights p. + .. note:: This function is backend-compatible and will work on arrays from all compatible backends. However all the steps in the conditional gradient are not differentiable. @@ -184,7 +187,7 @@ def semirelaxed_gromov_wasserstein2(C1, C2, p, loss_fun='square_loss', symmetric symmetric : bool, optional Either C1 and C2 are to be assumed symmetric or not. If let to its default None value, a symmetry test will be conducted. - Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric). + Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric). verbose : bool, optional Print information along iterations log : bool, optional @@ -278,7 +281,7 @@ def semirelaxed_fused_gromov_wasserstein(M, C1, C2, p, loss_fun='square_loss', s symmetric : bool, optional Either C1 and C2 are to be assumed symmetric or not. If let to its default None value, a symmetry test will be conducted. - Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric). + Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric). alpha : float, optional Trade-off parameter (0 < alpha < 1) G0: array-like, shape (ns,nt), optional @@ -415,7 +418,7 @@ def semirelaxed_fused_gromov_wasserstein2(M, C1, C2, p, loss_fun='square_loss', symmetric : bool, optional Either C1 and C2 are to be assumed symmetric or not. If let to its default None value, a symmetry test will be conducted. - Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymetric). + Else if set to True (resp. False), C1 and C2 will be assumed symmetric (resp. asymmetric). alpha : float, optional Trade-off parameter (0 < alpha < 1) G0: array-like, shape (ns,nt), optional @@ -435,7 +438,7 @@ def semirelaxed_fused_gromov_wasserstein2(M, C1, C2, p, loss_fun='square_loss', Returns ------- srfgw-divergence : float - Semi-relaxed Fused gromov wasserstein divergence for the given parameters. + Semi-relaxed Fused Gromov-Wasserstein divergence for the given parameters. log : dict Log dictionary return only if log==True in parameters. diff --git a/ot/gromov/_utils.py b/ot/gromov/_utils.py index e842250..ef8cd88 100644 --- a/ot/gromov/_utils.py +++ b/ot/gromov/_utils.py @@ -20,7 +20,7 @@ def init_matrix(C1, C2, p, q, loss_fun='square_loss', nx=None): r"""Return loss matrices and tensors for Gromov-Wasserstein fast computation Returns the value of :math:`\mathcal{L}(\mathbf{C_1}, \mathbf{C_2}) \otimes \mathbf{T}` with the - selected loss function as the loss function of Gromow-Wasserstein discrepancy. + selected loss function as the loss function of Gromov-Wasserstein discrepancy. The matrices are computed as described in Proposition 1 in :ref:`[12] <references-init-matrix>` @@ -195,7 +195,7 @@ def gwloss(constC, hC1, hC2, T, nx=None): Returns ------- loss : float - Gromov Wasserstein loss + Gromov-Wasserstein loss .. _references-gwloss: @@ -235,7 +235,7 @@ def gwggrad(constC, hC1, hC2, T, nx=None): Returns ------- grad : array-like, shape (`ns`, `nt`) - Gromov Wasserstein gradient + Gromov-Wasserstein gradient .. _references-gwggrad: @@ -328,7 +328,7 @@ def init_matrix_semirelaxed(C1, C2, p, loss_fun='square_loss', nx=None): r"""Return loss matrices and tensors for semi-relaxed Gromov-Wasserstein fast computation Returns the value of :math:`\mathcal{L}(\mathbf{C_1}, \mathbf{C_2}) \otimes \mathbf{T}` with the - selected loss function as the loss function of semi-relaxed Gromow-Wasserstein discrepancy. + selected loss function as the loss function of semi-relaxed Gromov-Wasserstein discrepancy. The matrices are computed as described in Proposition 1 in :ref:`[12] <references-init-matrix>` and adapted to the semi-relaxed problem where the second marginal is not a constant anymore. diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 2ff02ab..4952a21 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -253,7 +253,7 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True, numThreads=1): Otherwise returns only the optimal transportation matrix. center_dual: boolean, optional (default=True) If True, centers the dual potential using function - :ref:`center_ot_dual`. + :py:func:`ot.lp.center_ot_dual`. numThreads: int or "max", optional (default=1, i.e. OpenMP is not used) If compiled with OpenMP, chooses the number of threads to parallelize. "max" selects the highest number possible. @@ -418,7 +418,7 @@ def emd2(a, b, M, processes=1, If True, returns the optimal transportation matrix in the log. center_dual: boolean, optional (default=True) If True, centers the dual potential using function - :ref:`center_ot_dual`. + :py:func:`ot.lp.center_ot_dual`. numThreads: int or "max", optional (default=1, i.e. OpenMP is not used) If compiled with OpenMP, chooses the number of threads to parallelize. "max" selects the highest number possible. @@ -631,6 +631,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None .. _references-free-support-barycenter: + References ---------- .. [20] Cuturi, Marco, and Arnaud Doucet. "Fast computation of Wasserstein barycenters." International Conference on Machine Learning. 2014. @@ -688,7 +689,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None def generalized_free_support_barycenter(X_list, a_list, P_list, n_samples_bary, Y_init=None, b=None, weights=None, numItermax=100, stopThr=1e-7, verbose=False, log=None, numThreads=1, eps=0): r""" - Solves the free support generalised Wasserstein barycenter problem: finding a barycenter (a discrete measure with + Solves the free support generalized Wasserstein barycenter problem: finding a barycenter (a discrete measure with a fixed amount of points of uniform weights) whose respective projections fit the input measures. More formally: @@ -776,7 +777,7 @@ def generalized_free_support_barycenter(X_list, a_list, P_list, n_samples_bary, Y_init = nx.randn(n_samples_bary, d, type_as=X_list[0]) if b is None: - b = nx.ones(n_samples_bary, type_as=X_list[0]) / n_samples_bary # not optimised + b = nx.ones(n_samples_bary, type_as=X_list[0]) / n_samples_bary # not optimized out = free_support_barycenter(Z_list, a_list, Y_init, b, numItermax=numItermax, stopThr=stopThr, verbose=verbose, log=log, numThreads=numThreads) @@ -786,7 +787,7 @@ def generalized_free_support_barycenter(X_list, a_list, P_list, n_samples_bary, else: Y = out log_dict = None - Y = Y @ B.T # return to the Generalised WB formulation + Y = Y @ B.T # return to the Generalized WB formulation if log: return Y, log_dict diff --git a/ot/lp/cvx.py b/ot/lp/cvx.py index 361ad0f..3f7eb36 100644 --- a/ot/lp/cvx.py +++ b/ot/lp/cvx.py @@ -52,7 +52,7 @@ def barycenter(A, M, weights=None, verbose=False, log=False, solver='interior-po reg : float Regularization term >0 weights : np.ndarray (n,) - Weights of each histogram a_i on the simplex (barycentric coodinates) + Weights of each histogram a_i on the simplex (barycentric coordinates) verbose : bool, optional Print information along iterations log : bool, optional diff --git a/ot/lp/solver_1d.py b/ot/lp/solver_1d.py index 840801a..8d841ec 100644 --- a/ot/lp/solver_1d.py +++ b/ot/lp/solver_1d.py @@ -37,7 +37,7 @@ def quantile_function(qs, cws, xs): n = xs.shape[0] if nx.__name__ == 'torch': # this is to ensure the best performance for torch searchsorted - # and avoid a warninng related to non-contiguous arrays + # and avoid a warning related to non-contiguous arrays cws = cws.T.contiguous() qs = qs.T.contiguous() else: @@ -145,6 +145,7 @@ def emd_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True, s.t. \gamma 1 = a, \gamma^T 1= b, \gamma\geq 0 + where : - d is the metric @@ -283,6 +284,7 @@ def emd2_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True, s.t. \gamma 1 = a, \gamma^T 1= b, \gamma\geq 0 + where : - d is the metric @@ -464,7 +466,7 @@ def derivative_cost_on_circle(theta, u_values, v_values, u_cdf, v_cdf, p=2): if nx.__name__ == 'torch': # this is to ensure the best performance for torch searchsorted - # and avoid a warninng related to non-contiguous arrays + # and avoid a warning related to non-contiguous arrays u_cdf = u_cdf.contiguous() v_cdf_theta = v_cdf_theta.contiguous() @@ -478,7 +480,7 @@ def derivative_cost_on_circle(theta, u_values, v_values, u_cdf, v_cdf, p=2): if nx.__name__ == 'torch': # this is to ensure the best performance for torch searchsorted - # and avoid a warninng related to non-contiguous arrays + # and avoid a warning related to non-contiguous arrays u_cdfm = u_cdfm.contiguous() v_cdf_theta = v_cdf_theta.contiguous() @@ -665,8 +667,8 @@ def binary_search_circle(u_values, v_values, u_weights=None, v_weights=None, p=1 if u_values.shape[1] != v_values.shape[1]: raise ValueError( - "u and v must have the same number of batchs {} and {} respectively given".format(u_values.shape[1], - v_values.shape[1])) + "u and v must have the same number of batches {} and {} respectively given".format(u_values.shape[1], + v_values.shape[1])) u_values = u_values % 1 v_values = v_values % 1 diff --git a/ot/optim.py b/ot/optim.py index b15c77b..9e65e81 100644 --- a/ot/optim.py +++ b/ot/optim.py @@ -138,6 +138,7 @@ def generic_conditional_gradient(a, b, M, f, df, reg1, reg2, lp_solver, line_sea \gamma^T \mathbf{1} &= \mathbf{b} (optional constraint) \gamma &\geq 0 + where : - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix @@ -157,6 +158,7 @@ def generic_conditional_gradient(a, b, M, f, df, reg1, reg2, lp_solver, line_sea \gamma^T \mathbf{1} &= \mathbf{b} \gamma &\geq 0 + where : - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` @@ -224,7 +226,7 @@ def generic_conditional_gradient(a, b, M, f, df, reg1, reg2, lp_solver, line_sea See Also -------- - ot.lp.emd : Unregularized optimal ransport + ot.lp.emd : Unregularized optimal transport ot.bregman.sinkhorn : Entropic regularized optimal transport """ a, b, M, G0 = list_to_array(a, b, M, G0) @@ -325,6 +327,7 @@ def cg(a, b, M, reg, f, df, G0=None, line_search=line_search_armijo, \gamma^T \mathbf{1} &= \mathbf{b} \gamma &\geq 0 + where : - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix @@ -380,7 +383,7 @@ def cg(a, b, M, reg, f, df, G0=None, line_search=line_search_armijo, See Also -------- - ot.lp.emd : Unregularized optimal ransport + ot.lp.emd : Unregularized optimal transport ot.bregman.sinkhorn : Entropic regularized optimal transport """ @@ -407,6 +410,7 @@ def semirelaxed_cg(a, b, M, reg, f, df, G0=None, line_search=line_search_armijo, s.t. \ \gamma \mathbf{1} &= \mathbf{a} \gamma &\geq 0 + where : - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix @@ -492,6 +496,7 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10, \gamma^T \mathbf{1} &= \mathbf{b} \gamma &\geq 0 + where : - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix diff --git a/ot/partial.py b/ot/partial.py index bf4119d..43f3362 100755 --- a/ot/partial.py +++ b/ot/partial.py @@ -516,7 +516,7 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None, nb_dummies : int, optional Number of dummy points to add (avoid instabilities in the EMD solver) G0 : ndarray, shape (ns, nt), optional - Initialisation of the transportation matrix + Initialization of the transportation matrix thres : float, optional quantile of the gradient matrix to populate the cost matrix when 0 (default: 1) @@ -686,7 +686,7 @@ def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None, C1 : ndarray, shape (ns, ns) Metric cost matrix in the source space C2 : ndarray, shape (nt, nt) - Metric costfr matrix in the target space + Metric cost matrix in the target space p : ndarray, shape (ns,) Distribution in the source space q : ndarray, shape (nt,) @@ -697,7 +697,7 @@ def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None, nb_dummies : int, optional Number of dummy points to add (avoid instabilities in the EMD solver) G0 : ndarray, shape (ns, nt), optional - Initialisation of the transportation matrix + Initialization of the transportation matrix thres : float, optional quantile of the gradient matrix to populate the cost matrix when 0 (default: 1) @@ -958,15 +958,15 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None, - `m` is the amount of mass to be transported The formulation of the GW problem has been proposed in - :ref:`[12] <references-entropic-partial-gromov-wassertein>` and the - partial GW in :ref:`[29] <references-entropic-partial-gromov-wassertein>` + :ref:`[12] <references-entropic-partial-gromov-wasserstein>` and the + partial GW in :ref:`[29] <references-entropic-partial-gromov-wasserstein>` Parameters ---------- C1 : ndarray, shape (ns, ns) Metric cost matrix in the source space C2 : ndarray, shape (nt, nt) - Metric costfr matrix in the target space + Metric cost matrix in the target space p : ndarray, shape (ns,) Distribution in the source space q : ndarray, shape (nt,) @@ -977,7 +977,7 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None, Amount of mass to be transported (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`) G0 : ndarray, shape (ns, nt), optional - Initialisation of the transportation matrix + Initialization of the transportation matrix numItermax : int, optional Max number of iterations tol : float, optional @@ -1016,7 +1016,7 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None, log dictionary returned only if `log` is `True` - .. _references-entropic-partial-gromov-wassertein: + .. _references-entropic-partial-gromov-wasserstein: References ---------- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon, @@ -1107,8 +1107,8 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None, - `m` is the amount of mass to be transported The formulation of the GW problem has been proposed in - :ref:`[12] <references-entropic-partial-gromov-wassertein2>` and the - partial GW in :ref:`[29] <references-entropic-partial-gromov-wassertein2>` + :ref:`[12] <references-entropic-partial-gromov-wasserstein2>` and the + partial GW in :ref:`[29] <references-entropic-partial-gromov-wasserstein2>` Parameters @@ -1116,7 +1116,7 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None, C1 : ndarray, shape (ns, ns) Metric cost matrix in the source space C2 : ndarray, shape (nt, nt) - Metric costfr matrix in the target space + Metric cost matrix in the target space p : ndarray, shape (ns,) Distribution in the source space q : ndarray, shape (nt,) @@ -1127,7 +1127,7 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None, Amount of mass to be transported (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`) G0 : ndarray, shape (ns, nt), optional - Initialisation of the transportation matrix + Initialization of the transportation matrix numItermax : int, optional Max number of iterations tol : float, optional @@ -1159,7 +1159,7 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None, 1.87 - .. _references-entropic-partial-gromov-wassertein2: + .. _references-entropic-partial-gromov-wasserstein2: References ---------- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon, @@ -3,7 +3,7 @@ Functions for plotting OT matrices .. warning:: Note that by default the module is not import in :mod:`ot`. In order to - use it you need to explicitely import :mod:`ot.plot` + use it you need to explicitly import :mod:`ot.plot` """ diff --git a/ot/regpath.py b/ot/regpath.py index e745288..8a9b6d8 100644 --- a/ot/regpath.py +++ b/ot/regpath.py @@ -399,7 +399,7 @@ def compute_next_removal(phi, delta, current_gamma): def complement_schur(M_current, b, d, id_pop): r""" This function computes the inverse of the design matrix in the \ - regularization path using the Schur complement. Two cases may arise: + regularization path using the Schur complement. Two cases may arise: Case 1: one variable is added to the active set diff --git a/ot/sliced.py b/ot/sliced.py index fa2141e..3a1644d 100644 --- a/ot/sliced.py +++ b/ot/sliced.py @@ -173,7 +173,7 @@ def max_sliced_wasserstein_distance(X_s, X_t, a=None, b=None, n_projections=50, where : - - :math:`\theta_\# \mu` stands for the pushforwars of the projection :math:`\mathbb{R}^d \ni X \mapsto \langle \theta, X \rangle` + - :math:`\theta_\# \mu` stands for the pushforwards of the projection :math:`\mathbb{R}^d \ni X \mapsto \langle \theta, X \rangle` Parameters diff --git a/ot/unbalanced.py b/ot/unbalanced.py index a71a0dd..9584d77 100644 --- a/ot/unbalanced.py +++ b/ot/unbalanced.py @@ -121,7 +121,7 @@ def sinkhorn_unbalanced(a, b, M, reg, reg_m, method='sinkhorn', numItermax=1000, ot.unbalanced.sinkhorn_stabilized_unbalanced: Unbalanced Stabilized sinkhorn :ref:`[9, 10] <references-sinkhorn-unbalanced>` ot.unbalanced.sinkhorn_reg_scaling_unbalanced: - Unbalanced Sinkhorn with epslilon scaling :ref:`[9, 10] <references-sinkhorn-unbalanced>` + Unbalanced Sinkhorn with epsilon scaling :ref:`[9, 10] <references-sinkhorn-unbalanced>` """ @@ -163,6 +163,7 @@ def sinkhorn_unbalanced2(a, b, M, reg, reg_m, method='sinkhorn', s.t. \gamma\geq 0 + where : - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix @@ -240,7 +241,7 @@ def sinkhorn_unbalanced2(a, b, M, reg, reg_m, method='sinkhorn', -------- ot.unbalanced.sinkhorn_knopp : Unbalanced Classic Sinkhorn :ref:`[10] <references-sinkhorn-unbalanced2>` ot.unbalanced.sinkhorn_stabilized: Unbalanced Stabilized sinkhorn :ref:`[9, 10] <references-sinkhorn-unbalanced2>` - ot.unbalanced.sinkhorn_reg_scaling: Unbalanced Sinkhorn with epslilon scaling :ref:`[9, 10] <references-sinkhorn-unbalanced2>` + ot.unbalanced.sinkhorn_reg_scaling: Unbalanced Sinkhorn with epsilon scaling :ref:`[9, 10] <references-sinkhorn-unbalanced2>` """ b = list_to_array(b) @@ -492,7 +493,7 @@ def sinkhorn_stabilized_unbalanced(a, b, M, reg, reg_m, tau=1e5, numItermax=1000 reg_m: float Marginal relaxation term > 0 tau : float - thershold for max value in u or v for log scaling + threshold for max value in u or v for log scaling numItermax : int, optional Max number of iterations stopThr : float, optional @@ -699,7 +700,7 @@ def barycenter_unbalanced_stabilized(A, M, reg, reg_m, weights=None, tau=1e3, tau : float Stabilization threshold for log domain absorption. weights : array-like (n_hists,) optional - Weight of each distribution (barycentric coodinates) + Weight of each distribution (barycentric coordinates) If None, uniform weights are used. numItermax : int, optional Max number of iterations |