summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorncassereau-idris <84033440+ncassereau-idris@users.noreply.github.com>2021-11-03 17:29:16 +0100
committerGitHub <noreply@github.com>2021-11-03 17:29:16 +0100
commit9c6ac880d426b7577918b0c77bd74b3b01930ef6 (patch)
tree93b0899a0378a6fe8f063800091252d2c6ad9801
parente1b67c641da3b3e497db6811af2c200022b10302 (diff)
[MRG] Docs updates (#298)
* bregman docs * sliced docs * docs partial * unbalanced docs * stochastic docs * plot docs * datasets docs * utils docs * dr docs * dr docs corrected * smooth docs * docs da * pep8 * docs gromov * more space after min and argmin * docs lp * bregman docs * bregman docs mistake corrected * pep8 Co-authored-by: Rémi Flamary <remi.flamary@gmail.com>
-rw-r--r--ot/bregman.py236
-rw-r--r--ot/da.py499
-rw-r--r--ot/datasets.py12
-rw-r--r--ot/dr.py40
-rw-r--r--ot/gromov.py35
-rw-r--r--ot/lp/__init__.py100
-rw-r--r--ot/optim.py8
-rwxr-xr-xot/partial.py283
-rw-r--r--ot/plot.py10
-rw-r--r--ot/sliced.py7
-rw-r--r--ot/smooth.py179
-rw-r--r--ot/stochastic.py192
-rw-r--r--ot/unbalanced.py206
-rw-r--r--ot/utils.py118
14 files changed, 1048 insertions, 877 deletions
diff --git a/ot/bregman.py b/ot/bregman.py
index 786f151..cce52e2 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -33,7 +33,8 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
The function solves the following optimization problem:
.. math::
- \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg}\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg}\cdot\Omega(\gamma)
s.t. \ \gamma \mathbf{1} &= \mathbf{a}
@@ -45,9 +46,9 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
- :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
- :math:`\Omega` is the entropic regularization term
- :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target
- weights (histograms, both sum to 1)
+ weights (histograms, both sum to 1)
.. note:: This function is backend-compatible and will work on arrays
from all compatible backends.
@@ -70,7 +71,7 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
version of the sinkhorn :py:func:`ot.bregman.screenkhorn` aim at providing a
fast approximation of the Sinkhorn problem. For use of GPU and gradient
computation with small number of iterations we strongly recommend the
- :any:`ot.bregman.sinkhorn_log` solver that will no need to check for
+ :py:func:`ot.bregman.sinkhorn_log` solver that will no need to check for
numerical problems.
@@ -189,7 +190,8 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
The function solves the following optimization problem:
.. math::
- W = \min_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg}\cdot\Omega(\gamma)
+ W = \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg}\cdot\Omega(\gamma)
s.t. \ \gamma \mathbf{1} &= \mathbf{a}
@@ -201,9 +203,9 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
- :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
- :math:`\Omega` is the entropic regularization term
- :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target
- weights (histograms, both sum to 1)
+ weights (histograms, both sum to 1)
.. note:: This function is backend-compatible and will work on arrays
from all compatible backends.
@@ -217,17 +219,17 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
By default and when using a regularization parameter that is not too small
the default sinkhorn solver should be enough. If you need to use a small
regularization to get sharper OT matrices, you should use the
- :any:`ot.bregman.sinkhorn_log` solver that will avoid numerical
+ :py:func:`ot.bregman.sinkhorn_log` solver that will avoid numerical
errors. This last solver can be very slow in practice and might not even
converge to a reasonable OT matrix in a finite time. This is why
- :any:`ot.bregman.sinkhorn_epsilon_scaling` that relies on iterating the value
+ :py:func:`ot.bregman.sinkhorn_epsilon_scaling` that relies on iterating the value
of the regularization (and using warm start) sometimes leads to better
solutions. Note that the greedy version of the sinkhorn
- :any:`ot.bregman.greenkhorn` can also lead to a speedup and the screening
- version of the sinkhorn :any:`ot.bregman.screenkhorn` aim a providing a
+ :py:func:`ot.bregman.greenkhorn` can also lead to a speedup and the screening
+ version of the sinkhorn :py:func:`ot.bregman.screenkhorn` aim a providing a
fast approximation of the Sinkhorn problem. For use of GPU and gradient
computation with small number of iterations we strongly recommend the
- :any:`ot.bregman.sinkhorn_log` solver that will no need to check for
+ :py:func:`ot.bregman.sinkhorn_log` solver that will no need to check for
numerical problems.
Parameters
@@ -301,15 +303,15 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
divergences. In The 22nd International Conference on Artificial
Intelligence and Statistics (pp. 2681-2690). PMLR.
+
See Also
--------
ot.lp.emd : Unregularized OT
ot.optim.cg : General regularized OT
ot.bregman.sinkhorn_knopp : Classic Sinkhorn :ref:`[2] <references-sinkhorn2>`
ot.bregman.greenkhorn : Greenkhorn :ref:`[21] <references-sinkhorn2>`
- ot.bregman.sinkhorn_stabilized: Stabilized sinkhorn :ref:`[9] <references-sinkhorn2>`
- :ref:`[10] <references-sinkhorn2>`
-
+ ot.bregman.sinkhorn_stabilized: Stabilized sinkhorn
+ :ref:`[9] <references-sinkhorn2>` :ref:`[10] <references-sinkhorn2>`
"""
M, a, b = list_to_array(M, a, b)
@@ -362,7 +364,8 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
The function solves the following optimization problem:
.. math::
- \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg}\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg}\cdot\Omega(\gamma)
s.t. \ \gamma \mathbf{1} &= \mathbf{a}
@@ -373,9 +376,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
- :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
- :math:`\Omega` is the entropic regularization term
- :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target
- weights (histograms, both sum to 1)
+ weights (histograms, both sum to 1)
The algorithm used for solving the problem is the Sinkhorn-Knopp
matrix scaling algorithm as proposed in :ref:`[2] <references-sinkhorn-knopp>`
@@ -543,7 +546,8 @@ def sinkhorn_log(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False,
The function solves the following optimization problem:
.. math::
- \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg}\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg}\cdot\Omega(\gamma)
s.t. \ \gamma \mathbf{1} &= \mathbf{a}
@@ -553,12 +557,13 @@ def sinkhorn_log(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False,
where :
- :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
- - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\Omega` is the entropic regularization term
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (histograms, both sum to 1)
The algorithm used for solving the problem is the Sinkhorn-Knopp matrix
- scaling algorithm :ref:`[2] <references-sinkhorn-knopp>` with the
- implementation from :ref:`[34] <references-sinkhorn-knopp>`
+ scaling algorithm :ref:`[2] <references-sinkhorn-log>` with the
+ implementation from :ref:`[34] <references-sinkhorn-log>`
Parameters
@@ -744,7 +749,8 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False,
The function solves the following optimization problem:
.. math::
- \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg}\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg}\cdot\Omega(\gamma)
s.t. \ \gamma \mathbf{1} &= \mathbf{a}
@@ -755,9 +761,9 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False,
- :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
- :math:`\Omega` is the entropic regularization term
- :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target
- weights (histograms, both sum to 1)
+ weights (histograms, both sum to 1)
Parameters
@@ -903,7 +909,8 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
The function solves the following optimization problem:
.. math::
- \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg}\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg}\cdot\Omega(\gamma)
s.t. \ \gamma \mathbf{1} &= \mathbf{a}
@@ -914,9 +921,9 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
- :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
- :math:`\Omega` is the entropic regularization term
- :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target
- weights (histograms, both sum to 1)
+ weights (histograms, both sum to 1)
The algorithm used for solving the problem is the Sinkhorn-Knopp matrix
@@ -1145,20 +1152,24 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4,
Solve the entropic regularization optimal transport problem with log
stabilization and epsilon scaling.
The function solves the following optimization problem:
+
.. math::
- \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg}\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg}\cdot\Omega(\gamma)
s.t. \ \gamma \mathbf{1} &= \mathbf{a}
\gamma^T \mathbf{1} &= \mathbf{b}
\gamma &\geq 0
+
where :
+
- :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
- :math:`\Omega` is the entropic regularization term
- :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights
- (histograms, both sum to 1)
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (histograms, both sum to 1)
+
The algorithm used for solving the problem is the Sinkhorn-Knopp matrix
scaling algorithm as proposed in :ref:`[2] <references-sinkhorn-epsilon-scaling>`
but with the log stabilization
@@ -1340,17 +1351,17 @@ def barycenter(A, M, reg, weights=None, method="sinkhorn", numItermax=10000,
The function solves the following optimization problem:
.. math::
- \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
+ \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \quad \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
where :
- - :math:`OT_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein
- distance (see :py:func:`ot.bregman.sinkhorn`)
- if `method` is `sinkhorn` or `sinkhorn_stabilized` or `sinkhorn_log`.
+ - :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein
+ distance (see :py:func:`ot.bregman.sinkhorn`)
+ if `method` is `sinkhorn` or `sinkhorn_stabilized` or `sinkhorn_log`.
- :math:`\mathbf{a}_i` are training distributions in the columns of matrix
- :math:`\mathbf{A}`
+ :math:`\mathbf{A}`
- `reg` and :math:`\mathbf{M}` are respectively the regularization term and
- the cost matrix for OT
+ the cost matrix for OT
The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling
algorithm as proposed in :ref:`[3] <references-barycenter>`
@@ -1424,16 +1435,16 @@ def barycenter_sinkhorn(A, M, reg, weights=None, numItermax=1000,
The function solves the following optimization problem:
.. math::
- \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
+ \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \quad \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
where :
- :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance
- (see :py:func:`ot.bregman.sinkhorn`)
+ (see :py:func:`ot.bregman.sinkhorn`)
- :math:`\mathbf{a}_i` are training distributions in the columns of matrix
- :math:`\mathbf{A}`
+ :math:`\mathbf{A}`
- `reg` and :math:`\mathbf{M}` are respectively the regularization term and
- the cost matrix for OT
+ the cost matrix for OT
The algorithm used for solving the problem is the Sinkhorn-Knopp matrix
scaling algorithm as proposed in :ref:`[3]<references-barycenter-sinkhorn>`.
@@ -1598,16 +1609,16 @@ def barycenter_stabilized(A, M, reg, tau=1e10, weights=None, numItermax=1000,
The function solves the following optimization problem:
.. math::
- \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
+ \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \quad \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
where :
- :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein
- distance (see :py:func:`ot.bregman.sinkhorn`)
+ distance (see :py:func:`ot.bregman.sinkhorn`)
- :math:`\mathbf{a}_i` are training distributions in the columns of matrix
- :math:`\mathbf{A}`
+ :math:`\mathbf{A}`
- `reg` and :math:`\mathbf{M}` are respectively the regularization term and
- the cost matrix for OT
+ the cost matrix for OT
The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling
algorithm as proposed in :ref:`[3] <references-barycenter-stabilized>`
@@ -1736,24 +1747,24 @@ def barycenter_debiased(A, M, reg, weights=None, method="sinkhorn", numItermax=1
The function solves the following optimization problem:
.. math::
- \mathbf{a} = arg\min_\mathbf{a} \sum_i S_{reg}(\mathbf{a},\mathbf{a}_i)
+ \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \quad \sum_i S_{reg}(\mathbf{a},\mathbf{a}_i)
where :
- :math:`S_{reg}(\cdot,\cdot)` is the debiased Sinkhorn divergence
- (see :py:func:`ot.bregman.emirical_sinkhorn_divergence`)
+ (see :py:func:`ot.bregman.empirical_sinkhorn_divergence`)
- :math:`\mathbf{a}_i` are training distributions in the columns of matrix
- :math:`\mathbf{A}`
+ :math:`\mathbf{A}`
- `reg` and :math:`\mathbf{M}` are respectively the regularization term and
- the cost matrix for OT
+ the cost matrix for OT
The algorithm used for solving the problem is the debiased Sinkhorn
- algorithm as proposed in :ref:`[37] <references-sinkhorn-debiased>`
+ algorithm as proposed in :ref:`[37] <references-barycenter-debiased>`
Parameters
----------
A : array-like, shape (dim, n_hists)
- `n_hists` training distributions :math:`a_i` of size `dim`
+ `n_hists` training distributions :math:`\mathbf{a}_i` of size `dim`
M : array-like, shape (dim, dim)
loss matrix for OT
reg : float
@@ -1761,7 +1772,7 @@ def barycenter_debiased(A, M, reg, weights=None, method="sinkhorn", numItermax=1
method : str (optional)
method used for the solver either 'sinkhorn' or 'sinkhorn_log'
weights : array-like, shape (n_hists,)
- Weights of each histogram :math:`a_i` on the simplex (barycentric coodinates)
+ Weights of each histogram :math:`\mathbf{a}_i` on the simplex (barycentric coodinates)
numItermax : int, optional
Max number of iterations
stopThr : float, optional
@@ -1774,7 +1785,6 @@ def barycenter_debiased(A, M, reg, weights=None, method="sinkhorn", numItermax=1
if True, raises a warning if the algorithm doesn't convergence.
-
Returns
-------
a : (dim,) array-like
@@ -1782,12 +1792,12 @@ def barycenter_debiased(A, M, reg, weights=None, method="sinkhorn", numItermax=1
log : dict
log dictionary return only if log==True in parameters
- .. _references-sinkhorn-debiased:
- References
- ----------
- .. [37] Janati, H., Cuturi, M., Gramfort, A. Proceedings of the 37th International
- Conference on Machine Learning, PMLR 119:4692-4701, 2020
+ .. _references-barycenter-debiased:
+ References
+ ----------
+ .. [37] Janati, H., Cuturi, M., Gramfort, A. Proceedings of the 37th International
+ Conference on Machine Learning, PMLR 119:4692-4701, 2020
"""
if method.lower() == 'sinkhorn':
@@ -1934,20 +1944,20 @@ def _barycenter_debiased_log(A, M, reg, weights=None, numItermax=1000,
def convolutional_barycenter2d(A, reg, weights=None, method="sinkhorn", numItermax=10000,
stopThr=1e-4, verbose=False, log=False,
warn=True, **kwargs):
- r"""Compute the entropic regularized wasserstein barycenter of distributions A
- where A is a collection of 2D images.
+ r"""Compute the entropic regularized wasserstein barycenter of distributions :math:`\mathbf{A}`
+ where :math:`\mathbf{A}` is a collection of 2D images.
The function solves the following optimization problem:
.. math::
- \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
+ \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \quad \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
where :
- :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein
- distance (see :py:func:`ot.bregman.sinkhorn`)
+ distance (see :py:func:`ot.bregman.sinkhorn`)
- :math:`\mathbf{a}_i` are training distributions (2D images) in the mast two dimensions
- of matrix :math:`\mathbf{A}`
+ of matrix :math:`\mathbf{A}`
- `reg` is the regularization strength scalar value
The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm
@@ -2166,24 +2176,24 @@ def convolutional_barycenter2d_debiased(A, reg, weights=None, method="sinkhorn",
numItermax=10000, stopThr=1e-3,
verbose=False, log=False, warn=True,
**kwargs):
- r"""Compute the debiased sinkhorn barycenter of distributions A
- where A is a collection of 2D images.
+ r"""Compute the debiased sinkhorn barycenter of distributions :math:`\mathbf{A}`
+ where :math:`\mathbf{A}` is a collection of 2D images.
The function solves the following optimization problem:
.. math::
- \mathbf{a} = arg\min_\mathbf{a} \sum_i S_{reg}(\mathbf{a},\mathbf{a}_i)
+ \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \quad \sum_i S_{reg}(\mathbf{a},\mathbf{a}_i)
where :
- :math:`S_{reg}(\cdot,\cdot)` is the debiased entropic regularized Wasserstein
- distance (see :py:func:`ot.bregman.sinkhorn_debiased`)
+ distance (see :py:func:`ot.bregman.barycenter_debiased`)
- :math:`\mathbf{a}_i` are training distributions (2D images) in the mast two
- dimensions of matrix :math:`\mathbf{A}`
+ dimensions of matrix :math:`\mathbf{A}`
- `reg` is the regularization strength scalar value
The algorithm used for solving the problem is the debiased Sinkhorn scaling
- algorithm as proposed in :ref:`[37] <references-sinkhorn-debiased>`
+ algorithm as proposed in :ref:`[37] <references-convolutional-barycenter2d-debiased>`
Parameters
----------
@@ -2217,7 +2227,7 @@ def convolutional_barycenter2d_debiased(A, reg, weights=None, method="sinkhorn",
log dictionary return only if log==True in parameters
- .. _references-sinkhorn-debiased:
+ .. _references-convolutional-barycenter2d-debiased:
References
----------
@@ -2406,23 +2416,25 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
.. math::
- \mathbf{h} = \mathop{\arg \min}_\mathbf{h} (1- \alpha) W_{\mathbf{M}, \mathrm{reg}}(\mathbf{a},\mathbf{Dh})+\alpha W_{\mathbf{M_0},\mathrm{reg}_0}(\mathbf{h}_0,\mathbf{h})
+ \mathbf{h} = \mathop{\arg \min}_\mathbf{h} \quad
+ (1 - \alpha) W_{\mathbf{M}, \mathrm{reg}}(\mathbf{a}, \mathbf{Dh}) +
+ \alpha W_{\mathbf{M_0}, \mathrm{reg}_0}(\mathbf{h}_0, \mathbf{h})
where :
- :math:`W_{M,reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance
- with M loss matrix (see :py:func:`ot.bregman.sinkhorn`)
+ with :math:`\mathbf{M}` loss matrix (see :py:func:`ot.bregman.sinkhorn`)
- :math:`\mathbf{D}` is a dictionary of `n_atoms` atoms of dimension `dim_a`,
- its expected shape is `(dim_a, n_atoms)`
+ its expected shape is `(dim_a, n_atoms)`
- :math:`\mathbf{h}` is the estimated unmixing of dimension `n_atoms`
- :math:`\mathbf{a}` is an observed distribution of dimension `dim_a`
- :math:`\mathbf{h}_0` is a prior on :math:`\mathbf{h}` of dimension `dim_prior`
- `reg` and :math:`\mathbf{M}` are respectively the regularization term and the
- cost matrix (`dim_a`, `dim_a`) for OT data fitting
+ cost matrix (`dim_a`, `dim_a`) for OT data fitting
- `reg`:math:`_0` and :math:`\mathbf{M_0}` are respectively the regularization
- term and the cost matrix (`dim_prior`, `n_atoms`) regularization
- - :math:`\\alpha` weight data fitting and regularization
+ term and the cost matrix (`dim_prior`, `n_atoms`) regularization
+ - :math:`\alpha` weight data fitting and regularization
The optimization problem is solved following the algorithm described
in :ref:`[4] <references-unmix>`
@@ -2535,7 +2547,7 @@ def jcpot_barycenter(Xs, Ys, Xt, reg, metric='sqeuclidean', numItermax=100,
.. math::
- \mathbf{h} = \mathop{\arg \min}_{\mathbf{h}} \sum_{k=1}^{K} \lambda_k
+ \mathbf{h} = \mathop{\arg \min}_{\mathbf{h}} \quad \sum_{k=1}^{K} \lambda_k
W_{reg}((\mathbf{D}_2^{(k)} \mathbf{h})^T, \mathbf{a})
s.t. \ \forall k, \mathbf{D}_1^{(k)} \gamma_k \mathbf{1}_n= \mathbf{h}
@@ -2544,15 +2556,15 @@ def jcpot_barycenter(Xs, Ys, Xt, reg, metric='sqeuclidean', numItermax=100,
- :math:`\lambda_k` is the weight of `k`-th source domain
- :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance
- (see :py:func:`ot.bregman.sinkhorn`)
+ (see :py:func:`ot.bregman.sinkhorn`)
- :math:`\mathbf{D}_2^{(k)}` is a matrix of weights related to `k`-th source domain
- defined as in [p. 5, :ref:`27 <references-jcpot-barycenter>`], its expected shape
- is :math:`(n_k, C)` where :math:`n_k` is the number of elements in the `k`-th source
- domain and `C` is the number of classes
+ defined as in [p. 5, :ref:`27 <references-jcpot-barycenter>`], its expected shape
+ is :math:`(n_k, C)` where :math:`n_k` is the number of elements in the `k`-th source
+ domain and `C` is the number of classes
- :math:`\mathbf{h}` is a vector of estimated proportions in the target domain of size `C`
- :math:`\mathbf{a}` is a uniform vector of weights in the target domain of size `n`
- :math:`\mathbf{D}_1^{(k)}` is a matrix of class assignments defined as in
- [p. 5, :ref:`27 <references-jcpot-barycenter>`], its expected shape is :math:`(n_k, C)`
+ [p. 5, :ref:`27 <references-jcpot-barycenter>`], its expected shape is :math:`(n_k, C)`
The problem consist in solving a Wasserstein barycenter problem to estimate
the proportions :math:`\mathbf{h}` in the target domain.
@@ -2714,18 +2726,19 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
The function solves the following optimization problem:
.. math::
- \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg} \cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg} \cdot\Omega(\gamma)
- s.t. \ \gamma \mathbf{1} &= a
+ s.t. \ \gamma \mathbf{1} &= \mathbf{a}
- \gamma^T \mathbf{1} &= b
+ \gamma^T \mathbf{1} &= \mathbf{b}
\gamma &\geq 0
where :
- :math:`\mathbf{M}` is the (`n_samples_a`, `n_samples_b`) metric cost matrix
- :math:`\Omega` is the entropic regularization term
- :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
@@ -2900,18 +2913,19 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
The function solves the following optimization problem:
.. math::
- W = \min_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg} \cdot\Omega(\gamma)
+ W = \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg} \cdot\Omega(\gamma)
- s.t. \ \gamma \mathbf{1} &= a
+ s.t. \ \gamma \mathbf{1} &= \mathbf{a}
- \gamma^T \mathbf{1} &= b
+ \gamma^T \mathbf{1} &= \mathbf{b}
\gamma &\geq 0
where :
- :math:`\mathbf{M}` is the (`n_samples_a`, `n_samples_b`) metric cost matrix
- :math:`\Omega` is the entropic regularization term
- :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
@@ -3055,18 +3069,21 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
.. math::
- W &= \min_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg} \cdot\Omega(\gamma)
+ W &= \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg} \cdot\Omega(\gamma)
- W_a &= \min_{\gamma_a} <\gamma_a, \mathbf{M_a}>_F + \mathrm{reg} \cdot\Omega(\gamma_a)
+ W_a &= \min_{\gamma_a} \quad \langle \gamma_a, \mathbf{M_a} \rangle_F +
+ \mathrm{reg} \cdot\Omega(\gamma_a)
- W_b &= \min_{\gamma_b} <\gamma_b, \mathbf{M_b}>_F + \mathrm{reg} \cdot\Omega(\gamma_b)
+ W_b &= \min_{\gamma_b} \quad \langle \gamma_b, \mathbf{M_b} \rangle_F +
+ \mathrm{reg} \cdot\Omega(\gamma_b)
S &= W - \frac{W_a + W_b}{2}
.. math::
- s.t. \ \gamma \mathbf{1} &= a
+ s.t. \ \gamma \mathbf{1} &= \mathbf{a}
- \gamma^T \mathbf{1} &= b
+ \gamma^T \mathbf{1} &= \mathbf{b}
\gamma &\geq 0
@@ -3084,10 +3101,10 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
where :
- :math:`\mathbf{M}` (resp. :math:`\mathbf{M_a}`, :math:`\mathbf{M_b}`)
- is the (`n_samples_a`, `n_samples_b`) metric cost matrix
- (resp (`n_samples_a, n_samples_a`) and (`n_samples_b`, `n_samples_b`))
+ is the (`n_samples_a`, `n_samples_b`) metric cost matrix
+ (resp (`n_samples_a, n_samples_a`) and (`n_samples_b`, `n_samples_b`))
- :math:`\Omega` is the entropic regularization term
- :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
@@ -3198,7 +3215,10 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False,
.. math::
- (\mathbf{u}, \mathbf{v}) = \mathop{\arg \min}_{\mathbf{u}, \mathbf{v}} \ \mathbf{1}_{ns}^T \mathbf{B}(\mathbf{u}, \mathbf{v}) \mathbf{1}_{nt} - <\kappa \mathbf{u}, \mathbf{a}> - <\mathbf{v} / \kappa, \mathbf{b}>
+ (\mathbf{u}, \mathbf{v}) = \mathop{\arg \min}_{\mathbf{u}, \mathbf{v}} \quad
+ \mathbf{1}_{ns}^T \mathbf{B}(\mathbf{u}, \mathbf{v}) \mathbf{1}_{nt} -
+ \langle \kappa \mathbf{u}, \mathbf{a} \rangle -
+ \langle \frac{1}{\kappa} \mathbf{v}, \mathbf{b} \rangle
where:
@@ -3249,13 +3269,15 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False,
If `True`, display informations about the cardinals of the active sets
and the parameters kappa and epsilon
- Dependency
- ----------
- To gain more efficiency, screenkhorn needs to call the "Bottleneck"
- package (https://pypi.org/project/Bottleneck/)
- in the screening pre-processing step. If Bottleneck isn't installed,
- the following error message appears:
- "Bottleneck module doesn't exist. Install it from https://pypi.org/project/Bottleneck/"
+
+ .. admonition:: Dependency
+
+ To gain more efficiency, :py:func:`ot.bregman.screenkhorn` needs to call the "Bottleneck"
+ package (https://pypi.org/project/Bottleneck/) in the screening pre-processing step.
+
+ If Bottleneck isn't installed, the following error message appears:
+
+ "Bottleneck module doesn't exist. Install it from https://pypi.org/project/Bottleneck/"
Returns
diff --git a/ot/da.py b/ot/da.py
index cdc747c..4fd97df 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -33,27 +33,29 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)
- + \eta \Omega_g(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg} \cdot \Omega_e(\gamma) + \eta \ \Omega_g(\gamma)
- s.t. \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} = \mathbf{a}
+
+ \gamma^T \mathbf{1} = \mathbf{b}
+
+ \gamma \geq 0
- \gamma^T 1= b
- \gamma\geq 0
where :
- - M is the (ns,nt) metric cost matrix
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
- :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e
(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- :math:`\Omega_g` is the group lasso regularization term
:math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1`
- where :math:`\mathcal{I}_c` are the index of samples from class c
+ where :math:`\mathcal{I}_c` are the index of samples from class `c`
in the source domain.
- - a and b are source and target weights (sum to 1)
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
The algorithm used for solving the problem is the generalized conditional
- gradient as proposed in [5]_ [7]_
+ gradient as proposed in :ref:`[5, 7] <references-sinkhorn-lpl1-mm>`.
Parameters
@@ -84,19 +86,20 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : (ns, nt) ndarray
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
+ .. _references-sinkhorn-lpl1-mm:
References
----------
-
.. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
"Optimal Transport for Domain Adaptation," in IEEE
Transactions on Pattern Analysis and Machine Intelligence ,
vol.PP, no.99, pp.1-1
+
.. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
Generalized conditional gradient: analysis of convergence
and applications. arXiv preprint arXiv:1510.06567.
@@ -144,27 +147,29 @@ def sinkhorn_l1l2_gl(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+
- \eta \Omega_g(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg} \cdot \Omega_e(\gamma) + \eta \ \Omega_g(\gamma)
+
+ s.t. \ \gamma \mathbf{1} = \mathbf{a}
- s.t. \gamma 1 = a
+ \gamma^T \mathbf{1} = \mathbf{b}
+
+ \gamma \geq 0
- \gamma^T 1= b
- \gamma\geq 0
where :
- - M is the (ns,nt) metric cost matrix
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
- :math:`\Omega_e` is the entropic regularization term
:math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- :math:`\Omega_g` is the group lasso regulaization term
:math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^2`
where :math:`\mathcal{I}_c` are the index of samples from class
- c in the source domain.
- - a and b are source and target weights (sum to 1)
+ `c` in the source domain.
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
The algorithm used for solving the problem is the generalised conditional
- gradient as proposed in [5]_ [7]_
+ gradient as proposed in :ref:`[5, 7] <references-sinkhorn-l1l2-gl>`.
Parameters
@@ -195,18 +200,19 @@ def sinkhorn_l1l2_gl(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : (ns, nt) ndarray
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
+ .. _references-sinkhorn-l1l2-gl:
References
----------
-
.. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
"Optimal Transport for Domain Adaptation," in IEEE Transactions
on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
+
.. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
Generalized conditional gradient: analysis of convergence and
applications. arXiv preprint arXiv:1510.06567.
@@ -245,38 +251,40 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False,
verbose2=False, numItermax=100, numInnerItermax=10,
stopInnerThr=1e-6, stopThr=1e-5, log=False,
**kwargs):
- r"""Joint OT and linear mapping estimation as proposed in [8]
+ r"""Joint OT and linear mapping estimation as proposed in
+ :ref:`[8] <references-joint-OT-mapping-linear>`.
The function solves the following optimization problem:
.. math::
- \min_{\gamma,L}\quad \|L(X_s) -n_s\gamma X_t\|^2_F +
- \mu<\gamma,M>_F + \eta \|L -I\|^2_F
+ \min_{\gamma,L}\quad \|L(\mathbf{X_s}) - n_s\gamma \mathbf{X_t} \|^2_F +
+ \mu \langle \gamma, \mathbf{M} \rangle_F + \eta \|L - \mathbf{I}\|^2_F
+
+ s.t. \ \gamma \mathbf{1} = \mathbf{a}
- s.t. \gamma 1 = a
+ \gamma^T \mathbf{1} = \mathbf{b}
- \gamma^T 1= b
+ \gamma \geq 0
- \gamma\geq 0
where :
- - M is the (ns,nt) squared euclidean cost matrix between samples in
- Xs and Xt (scaled by ns)
- - :math:`L` is a dxd linear operator that approximates the barycentric
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) squared euclidean cost matrix between samples in
+ :math:`\mathbf{X_s}` and :math:`\mathbf{X_t}` (scaled by :math:`n_s`)
+ - :math:`L` is a :math:`d\times d` linear operator that approximates the barycentric
mapping
- - :math:`I` is the identity matrix (neutral linear mapping)
- - a and b are uniform source and target weights
+ - :math:`\mathbf{I}` is the identity matrix (neutral linear mapping)
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are uniform source and target weights
The problem consist in solving jointly an optimal transport matrix
:math:`\gamma` and a linear mapping that fits the barycentric mapping
- :math:`n_s\gamma X_t`.
+ :math:`n_s\gamma \mathbf{X_t}`.
One can also estimate a mapping with constant bias (see supplementary
- material of [8]) using the bias optional argument.
+ material of :ref:`[8] <references-joint-OT-mapping-linear>`) using the bias optional argument.
The algorithm used for solving the problem is the block coordinate
- descent that alternates between updates of G (using conditionnal gradient)
- and the update of L using a classical least square solver.
+ descent that alternates between updates of :math:`\mathbf{G}` (using conditionnal gradient)
+ and the update of :math:`\mathbf{L}` using a classical least square solver.
Parameters
@@ -307,17 +315,17 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False,
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : (ns, nt) ndarray
Optimal transportation matrix for the given parameters
- L : (d x d) ndarray
- Linear mapping matrix (d+1 x d if bias)
+ L : (d, d) ndarray
+ Linear mapping matrix ((:math:`d+1`, `d`) if bias)
log : dict
log dictionary return only if log==True in parameters
+ .. _references-joint-OT-mapping-linear:
References
----------
-
.. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard,
"Mapping estimation for discrete optimal transport",
Neural Information Processing Systems (NIPS), 2016.
@@ -434,37 +442,41 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
numItermax=100, numInnerItermax=10,
stopInnerThr=1e-6, stopThr=1e-5, log=False,
**kwargs):
- r"""Joint OT and nonlinear mapping estimation with kernels as proposed in [8]
+ r"""Joint OT and nonlinear mapping estimation with kernels as proposed in
+ :ref:`[8] <references-joint-OT-mapping-kernel>`.
The function solves the following optimization problem:
.. math::
- \min_{\gamma,L\in\mathcal{H}}\quad \|L(X_s) -
- n_s\gamma X_t\|^2_F + \mu<\gamma,M>_F + \eta \|L\|^2_\mathcal{H}
+ \min_{\gamma, L\in\mathcal{H}}\quad \|L(\mathbf{X_s}) -
+ n_s\gamma \mathbf{X_t}\|^2_F + \mu \langle \gamma, \mathbf{M} \rangle_F +
+ \eta \|L\|^2_\mathcal{H}
+
+ s.t. \ \gamma \mathbf{1} = \mathbf{a}
+
+ \gamma^T \mathbf{1} = \mathbf{b}
- s.t. \gamma 1 = a
+ \gamma \geq 0
- \gamma^T 1= b
- \gamma\geq 0
where :
- - M is the (ns,nt) squared euclidean cost matrix between samples in
- Xs and Xt (scaled by ns)
- - :math:`L` is a ns x d linear operator on a kernel matrix that
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) squared euclidean cost matrix between samples in
+ :math:`\mathbf{X_s}` and :math:`\mathbf{X_t}` (scaled by :math:`n_s`)
+ - :math:`L` is a :math:`n_s \times d` linear operator on a kernel matrix that
approximates the barycentric mapping
- - a and b are uniform source and target weights
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are uniform source and target weights
The problem consist in solving jointly an optimal transport matrix
:math:`\gamma` and the nonlinear mapping that fits the barycentric mapping
- :math:`n_s\gamma X_t`.
+ :math:`n_s\gamma \mathbf{X_t}`.
One can also estimate a mapping with constant bias (see supplementary
- material of [8]) using the bias optional argument.
+ material of :ref:`[8] <references-joint-OT-mapping-kernel>`) using the bias optional argument.
The algorithm used for solving the problem is the block coordinate
- descent that alternates between updates of G (using conditionnal gradient)
- and the update of L using a classical kernel least square solver.
+ descent that alternates between updates of :math:`\mathbf{G}` (using conditionnal gradient)
+ and the update of :math:`\mathbf{L}` using a classical kernel least square solver.
Parameters
@@ -478,7 +490,7 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
eta : float, optional
Regularization term for the linear mapping L (>0)
kerneltype : str,optional
- kernel used by calling function ot.utils.kernel (gaussian by default)
+ kernel used by calling function :py:func:`ot.utils.kernel` (gaussian by default)
sigma : float, optional
Gaussian kernel bandwidth.
bias : bool,optional
@@ -501,17 +513,17 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : (ns, nt) ndarray
Optimal transportation matrix for the given parameters
- L : (ns x d) ndarray
- Nonlinear mapping matrix (ns+1 x d if bias)
+ L : (ns, d) ndarray
+ Nonlinear mapping matrix ((:math:`n_s+1`, `d`) if bias)
log : dict
log dictionary return only if log==True in parameters
+ .. _references-joint-OT-mapping-kernel:
References
----------
-
.. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard,
"Mapping estimation for discrete optimal transport",
Neural Information Processing Systems (NIPS), 2016.
@@ -645,26 +657,27 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
def OT_mapping_linear(xs, xt, reg=1e-6, ws=None,
wt=None, bias=True, log=False):
- r""" return OT linear operator between samples
+ r"""Return OT linear operator between samples.
The function estimates the optimal linear operator that aligns the two
empirical distributions. This is equivalent to estimating the closed
- form mapping between two Gaussian distributions :math:`N(\mu_s,\Sigma_s)`
- and :math:`N(\mu_t,\Sigma_t)` as proposed in [14] and discussed in remark
- 2.29 in [15].
+ form mapping between two Gaussian distributions :math:`\mathcal{N}(\mu_s,\Sigma_s)`
+ and :math:`\mathcal{N}(\mu_t,\Sigma_t)` as proposed in
+ :ref:`[14] <references-OT-mapping-linear>` and discussed in remark 2.29 in
+ :ref:`[15] <references-OT-mapping-linear>`.
The linear operator from source to target :math:`M`
.. math::
- M(x)=Ax+b
+ M(\mathbf{x})= \mathbf{A} \mathbf{x} + \mathbf{b}
where :
.. math::
- A=\Sigma_s^{-1/2}(\Sigma_s^{1/2}\Sigma_t\Sigma_s^{1/2})^{1/2}
+ \mathbf{A} &= \Sigma_s^{-1/2} \left(\Sigma_s^{1/2}\Sigma_t\Sigma_s^{1/2} \right)^{1/2}
\Sigma_s^{-1/2}
- .. math::
- b=\mu_t-A\mu_s
+
+ \mathbf{b} &= \mu_t - \mathbf{A} \mu_s
Parameters
----------
@@ -673,35 +686,35 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None,
xt : np.ndarray (nt,d)
samples in the target domain
reg : float,optional
- regularization added to the diagonals of convariances (>0)
+ regularization added to the diagonals of covariances (>0)
ws : np.ndarray (ns,1), optional
weights for the source samples
wt : np.ndarray (ns,1), optional
weights for the target samples
bias: boolean, optional
- estimate bias b else b=0 (default:True)
+ estimate bias :math:`\mathbf{b}` else :math:`\mathbf{b} = 0` (default:True)
log : bool, optional
record log if True
Returns
-------
- A : (d x d) ndarray
+ A : (d, d) ndarray
Linear operator
- b : (1 x d) ndarray
+ b : (1, d) ndarray
bias
log : dict
log dictionary return only if log==True in parameters
+ .. _references-OT-mapping-linear:
References
----------
-
.. [14] Knott, M. and Smith, C. S. "On the optimal mapping of
distributions", Journal of Optimization Theory and Applications
Vol 43, 1984
- .. [15] Peyré, G., & Cuturi, M. (2017). "Computational Optimal
+ .. [15] Peyré, G., & Cuturi, M. (2017). "Computational Optimal
Transport", 2018.
@@ -754,24 +767,34 @@ def emd_laplace(a, b, xs, xt, M, sim='knn', sim_param=None, reg='pos', eta=1, al
r"""Solve the optimal transport problem (OT) with Laplacian regularization
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + eta\Omega_\alpha(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \eta \cdot \Omega_\alpha(\gamma)
- s.t.\ \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} = \mathbf{a}
- \gamma^T 1= b
+ \gamma^T \mathbf{1} = \mathbf{b}
- \gamma\geq 0
+ \gamma \geq 0
where:
- - a and b are source and target weights (sum to 1)
- - xs and xt are source and target samples
- - M is the (ns,nt) metric cost matrix
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
+ - :math:`\mathbf{x_s}` and :math:`\mathbf{x_t}` are source and target samples
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
- :math:`\Omega_\alpha` is the Laplacian regularization term
- :math:`\Omega_\alpha = (1-\alpha)/n_s^2\sum_{i,j}S^s_{i,j}\|T(\mathbf{x}^s_i)-T(\mathbf{x}^s_j)\|^2+\alpha/n_t^2\sum_{i,j}S^t_{i,j}^'\|T(\mathbf{x}^t_i)-T(\mathbf{x}^t_j)\|^2`
- with :math:`S^s_{i,j}, S^t_{i,j}` denoting source and target similarity matrices and :math:`T(\cdot)` being a barycentric mapping
- The algorithm used for solving the problem is the conditional gradient algorithm as proposed in [5].
+ .. math::
+ \Omega_\alpha = \frac{1 - \alpha}{n_s^2} \sum_{i,j}
+ \mathbf{S^s}_{i,j} \|T(\mathbf{x}^s_i) - T(\mathbf{x}^s_j) \|^2 +
+ \frac{\alpha}{n_t^2} \sum_{i,j}
+ \mathbf{S^t}_{i,j} \|T(\mathbf{x}^t_i) - T(\mathbf{x}^t_j) \|^2
+
+
+ with :math:`\mathbf{S^s}_{i,j}, \mathbf{S^t}_{i,j}` denoting source and target similarity
+ matrices and :math:`T(\cdot)` being a barycentric mapping.
+
+ The algorithm used for solving the problem is the conditional gradient algorithm as proposed in
+ :ref:`[5] <references-emd-laplace>`.
Parameters
----------
@@ -811,22 +834,23 @@ def emd_laplace(a, b, xs, xt, M, sim='knn', sim_param=None, reg='pos', eta=1, al
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : (ns, nt) ndarray
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
+ .. _references-emd-laplace:
References
----------
-
.. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
"Optimal Transport for Domain Adaptation," in IEEE
- Transactions on Pattern Analysis and Machine Intelligence ,
+ Transactions on Pattern Analysis and Machine Intelligence,
vol.PP, no.99, pp.1-1
+
.. [30] R. Flamary, N. Courty, D. Tuia, A. Rakotomamonjy,
"Optimal transport with Laplacian regularization: Applications to domain adaptation and shape matching,"
- in NIPS Workshop on Optimal Transport and Machine Learning OTML, 2014.
+ in NIPS Workshop on Optimal Transport and Machine Learning OTML, 2014.
See Also
--------
@@ -882,7 +906,7 @@ def emd_laplace(a, b, xs, xt, M, sim='knn', sim_param=None, reg='pos', eta=1, al
def distribution_estimation_uniform(X):
- """estimates a uniform distribution from an array of samples X
+ """estimates a uniform distribution from an array of samples :math:`\mathbf{X}`
Parameters
----------
@@ -892,7 +916,7 @@ def distribution_estimation_uniform(X):
Returns
-------
mu : array-like, shape (n_samples,)
- The uniform distribution estimated from X
+ The uniform distribution estimated from :math:`\mathbf{X}`
"""
return unif(X.shape[0])
@@ -902,32 +926,32 @@ class BaseTransport(BaseEstimator):
"""Base class for OTDA objects
- Notes
- -----
- All estimators should specify all the parameters that can be set
- at the class level in their ``__init__`` as explicit keyword
- arguments (no ``*args`` or ``**kwargs``).
+ .. note::
+ All estimators should specify all the parameters that can be set
+ at the class level in their ``__init__`` as explicit keyword
+ arguments (no ``*args`` or ``**kwargs``).
- the fit method should:
+ The fit method should:
- estimate a cost matrix and store it in a `cost_` attribute
- - estimate a coupling matrix and store it in a `coupling_`
- attribute
+ - estimate a coupling matrix and store it in a `coupling_` attribute
- estimate distributions from source and target data and store them in
- mu_s and mu_t attributes
- - store Xs and Xt in attributes to be used later on in transform and
- inverse_transform methods
+ `mu_s` and `mu_t` attributes
+ - store `Xs` and `Xt` in attributes to be used later on in `transform` and
+ `inverse_transform` methods
+
+ `transform` method should always get as input a `Xs` parameter
- transform method should always get as input a Xs parameter
- inverse_transform method should always get as input a Xt parameter
+ `inverse_transform` method should always get as input a `Xt` parameter
- transform_labels method should always get as input a ys parameter
- inverse_transform_labels method should always get as input a yt parameter
+ `transform_labels` method should always get as input a `ys` parameter
+
+ `inverse_transform_labels` method should always get as input a `yt` parameter
"""
def fit(self, Xs=None, ys=None, Xt=None, yt=None):
"""Build a coupling matrix from source and target sets of samples
- (Xs, ys) and (Xt, yt)
+ :math:`(\mathbf{X_s}, \mathbf{y_s})` and :math:`(\mathbf{X_t}, \mathbf{y_t})`
Parameters
----------
@@ -938,8 +962,8 @@ class BaseTransport(BaseEstimator):
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -987,8 +1011,8 @@ class BaseTransport(BaseEstimator):
def fit_transform(self, Xs=None, ys=None, Xt=None, yt=None):
"""Build a coupling matrix from source and target sets of samples
- (Xs, ys) and (Xt, yt) and transports source samples Xs onto target
- ones Xt
+ :math:`(\mathbf{X_s}, \mathbf{y_s})` and :math:`(\mathbf{X_t}, \mathbf{y_t})`
+ and transports source samples :math:`\mathbf{X_s}` onto target ones :math:`\mathbf{X_t}`
Parameters
----------
@@ -999,8 +1023,8 @@ class BaseTransport(BaseEstimator):
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -1014,7 +1038,7 @@ class BaseTransport(BaseEstimator):
return self.fit(Xs, ys, Xt, yt).transform(Xs, ys, Xt, yt)
def transform(self, Xs=None, ys=None, Xt=None, yt=None, batch_size=128):
- """Transports source samples Xs onto target ones Xt
+ """Transports source samples :math:`\mathbf{X_s}` onto target ones :math:`\mathbf{X_t}`
Parameters
----------
@@ -1025,8 +1049,8 @@ class BaseTransport(BaseEstimator):
Xt : array-like, shape (n_target_samples, n_features)
The target input samples.
yt : array-like, shape (n_target_samples,)
- The class labels for target. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels for target. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -1081,7 +1105,8 @@ class BaseTransport(BaseEstimator):
return transp_Xs
def transform_labels(self, ys=None):
- """Propagate source labels ys to obtain estimated target labels as in [27]
+ """Propagate source labels :math:`\mathbf{y_s}` to obtain estimated target labels as in
+ :ref:`[27] <references-basetransport-transform-labels>`.
Parameters
----------
@@ -1093,9 +1118,10 @@ class BaseTransport(BaseEstimator):
transp_ys : array-like, shape (n_target_samples, nb_classes)
Estimated soft target labels.
+
+ .. _references-basetransport-transform-labels:
References
----------
-
.. [27] Ievgen Redko, Nicolas Courty, Rémi Flamary, Devis Tuia
"Optimal transport for multi-source domain adaptation under target shift",
International Conference on Artificial Intelligence and Statistics (AISTATS), 2019.
@@ -1126,7 +1152,7 @@ class BaseTransport(BaseEstimator):
def inverse_transform(self, Xs=None, ys=None, Xt=None, yt=None,
batch_size=128):
- """Transports target samples Xt onto source samples Xs
+ """Transports target samples :math:`\mathbf{X_t}` onto source samples :math:`\mathbf{X_s}`
Parameters
----------
@@ -1137,8 +1163,8 @@ class BaseTransport(BaseEstimator):
Xt : array-like, shape (n_target_samples, n_features)
The target input samples.
yt : array-like, shape (n_target_samples,)
- The target class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The target class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -1192,7 +1218,8 @@ class BaseTransport(BaseEstimator):
return transp_Xt
def inverse_transform_labels(self, yt=None):
- """Propagate target labels yt to obtain estimated source labels ys
+ """Propagate target labels :math:`\mathbf{y_t}` to obtain estimated source labels
+ :math:`\mathbf{y_s}`
Parameters
----------
@@ -1232,35 +1259,37 @@ class LinearTransport(BaseTransport):
The function estimates the optimal linear operator that aligns the two
empirical distributions. This is equivalent to estimating the closed
- form mapping between two Gaussian distributions :math:`N(\mu_s,\Sigma_s)`
- and :math:`N(\mu_t,\Sigma_t)` as proposed in [14] and discussed in
- remark 2.29 in [15].
+ form mapping between two Gaussian distributions :math:`\mathcal{N}(\mu_s,\Sigma_s)`
+ and :math:`\mathcal{N}(\mu_t,\Sigma_t)` as proposed in
+ :ref:`[14] <references-lineartransport>` and discussed in remark 2.29 in
+ :ref:`[15] <references-lineartransport>`.
The linear operator from source to target :math:`M`
.. math::
- M(x)=Ax+b
+ M(\mathbf{x})= \mathbf{A} \mathbf{x} + \mathbf{b}
where :
.. math::
- A=\Sigma_s^{-1/2}(\Sigma_s^{1/2}\Sigma_t\Sigma_s^{1/2})^{1/2}
+ \mathbf{A} &= \Sigma_s^{-1/2} \left(\Sigma_s^{1/2}\Sigma_t\Sigma_s^{1/2} \right)^{1/2}
\Sigma_s^{-1/2}
- .. math::
- b=\mu_t-A\mu_s
+
+ \mathbf{b} &= \mu_t - \mathbf{A} \mu_s
Parameters
----------
reg : float,optional
- regularization added to the daigonals of convariances (>0)
+ regularization added to the daigonals of covariances (>0)
bias: boolean, optional
- estimate bias b else b=0 (default:True)
+ estimate bias :math:`\mathbf{b}` else :math:`\mathbf{b} = 0` (default:True)
log : bool, optional
record log if True
+
+ .. _references-lineartransport:
References
----------
-
.. [14] Knott, M. and Smith, C. S. "On the optimal mapping of
distributions", Journal of Optimization Theory and Applications
Vol 43, 1984
@@ -1279,7 +1308,7 @@ class LinearTransport(BaseTransport):
def fit(self, Xs=None, ys=None, Xt=None, yt=None):
"""Build a coupling matrix from source and target sets of samples
- (Xs, ys) and (Xt, yt)
+ :math:`(\mathbf{X_s}, \mathbf{y_s})` and :math:`(\mathbf{X_t}, \mathbf{y_t})`
Parameters
----------
@@ -1290,8 +1319,8 @@ class LinearTransport(BaseTransport):
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -1325,7 +1354,7 @@ class LinearTransport(BaseTransport):
return self
def transform(self, Xs=None, ys=None, Xt=None, yt=None, batch_size=128):
- """Transports source samples Xs onto target ones Xt
+ """Transports source samples :math:`\mathbf{X_s}` onto target ones :math:`\mathbf{X_t}`
Parameters
----------
@@ -1336,8 +1365,8 @@ class LinearTransport(BaseTransport):
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -1358,7 +1387,7 @@ class LinearTransport(BaseTransport):
def inverse_transform(self, Xs=None, ys=None, Xt=None, yt=None,
batch_size=128):
- """Transports target samples Xt onto target samples Xs
+ """Transports target samples :math:`\mathbf{X_t}` onto source samples :math:`\mathbf{X_s}`
Parameters
----------
@@ -1369,8 +1398,8 @@ class LinearTransport(BaseTransport):
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -1392,7 +1421,7 @@ class LinearTransport(BaseTransport):
class SinkhornTransport(BaseTransport):
- """Domain Adapatation OT method based on Sinkhorn Algorithm
+ """Domain Adaptation OT method based on Sinkhorn Algorithm
Parameters
----------
@@ -1400,7 +1429,7 @@ class SinkhornTransport(BaseTransport):
Entropic regularization parameter
max_iter : int, float, optional (default=1000)
The minimum number of iteration before stopping the optimization
- algorithm if no it has not converged
+ algorithm if it has not converged
tol : float, optional (default=10e-9)
The precision required to stop the optimization algorithm.
verbose : bool, optional (default=False)
@@ -1417,8 +1446,8 @@ class SinkhornTransport(BaseTransport):
out_of_sample_map : string, optional (default="ferradans")
The kind of out of sample mapping to apply to transport samples
from a domain into another one. Currently the only possible option is
- "ferradans" which uses the method proposed in [6].
- limit_max: float, optional (defaul=np.infty)
+ "ferradans" which uses the method proposed in :ref:`[6] <references-sinkhorntransport>`.
+ limit_max: float, optional (default=np.infty)
Controls the semi supervised mode. Transport between labeled source
and target samples of different classes will exhibit an cost defined
by this variable
@@ -1428,16 +1457,20 @@ class SinkhornTransport(BaseTransport):
coupling_ : array-like, shape (n_source_samples, n_target_samples)
The optimal coupling
log_ : dictionary
- The dictionary of log, empty dic if parameter log is not True
+ The dictionary of log, empty dict if parameter log is not True
+
+ .. _references-sinkhorntransport:
References
----------
.. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
"Optimal Transport for Domain Adaptation," in IEEE Transactions
on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
+
.. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal
Transport, Advances in Neural Information Processing Systems (NIPS)
26, 2013
+
.. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
Regularized discrete optimal transport. SIAM Journal on Imaging
Sciences, 7(3), 1853-1882.
@@ -1461,7 +1494,7 @@ class SinkhornTransport(BaseTransport):
def fit(self, Xs=None, ys=None, Xt=None, yt=None):
"""Build a coupling matrix from source and target sets of samples
- (Xs, ys) and (Xt, yt)
+ :math:`(\mathbf{X_s}, \mathbf{y_s})` and :math:`(\mathbf{X_t}, \mathbf{y_t})`
Parameters
----------
@@ -1472,8 +1505,8 @@ class SinkhornTransport(BaseTransport):
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -1504,7 +1537,7 @@ class SinkhornTransport(BaseTransport):
class EMDTransport(BaseTransport):
- """Domain Adapatation OT method based on Earth Mover's Distance
+ """Domain Adaptation OT method based on Earth Mover's Distance
Parameters
----------
@@ -1520,7 +1553,7 @@ class EMDTransport(BaseTransport):
out_of_sample_map : string, optional (default="ferradans")
The kind of out of sample mapping to apply to transport samples
from a domain into another one. Currently the only possible option is
- "ferradans" which uses the method proposed in [6].
+ "ferradans" which uses the method proposed in :ref:`[6] <references-emdtransport>`.
limit_max: float, optional (default=10)
Controls the semi supervised mode. Transport between labeled source
and target samples of different classes will exhibit an infinite cost
@@ -1534,14 +1567,16 @@ class EMDTransport(BaseTransport):
coupling_ : array-like, shape (n_source_samples, n_target_samples)
The optimal coupling
+
+ .. _references-emdtransport:
References
----------
.. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
- "Optimal Transport for Domain Adaptation," in IEEE Transactions
- on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
+ "Optimal Transport for Domain Adaptation," in IEEE Transactions
+ on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
.. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
- Regularized discrete optimal transport. SIAM Journal on Imaging
- Sciences, 7(3), 1853-1882.
+ Regularized discrete optimal transport. SIAM Journal on Imaging
+ Sciences, 7(3), 1853-1882.
"""
def __init__(self, metric="sqeuclidean", norm=None, log=False,
@@ -1558,7 +1593,7 @@ class EMDTransport(BaseTransport):
def fit(self, Xs, ys=None, Xt=None, yt=None):
"""Build a coupling matrix from source and target sets of samples
- (Xs, ys) and (Xt, yt)
+ :math:`(\mathbf{X_s}, \mathbf{y_s})` and :math:`(\mathbf{X_t}, \mathbf{y_t})`
Parameters
----------
@@ -1569,8 +1604,8 @@ class EMDTransport(BaseTransport):
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -1597,8 +1632,7 @@ class EMDTransport(BaseTransport):
class SinkhornLpl1Transport(BaseTransport):
-
- """Domain Adapatation OT method based on sinkhorn algorithm +
+ r"""Domain Adaptation OT method based on sinkhorn algorithm +
LpL1 class regularization.
Parameters
@@ -1609,7 +1643,7 @@ class SinkhornLpl1Transport(BaseTransport):
Class regularization parameter
max_iter : int, float, optional (default=10)
The minimum number of iteration before stopping the optimization
- algorithm if no it has not converged
+ algorithm if it has not converged
max_inner_iter : int, float, optional (default=200)
The number of iteration in the inner loop
log : bool, optional (default=False)
@@ -1628,8 +1662,8 @@ class SinkhornLpl1Transport(BaseTransport):
out_of_sample_map : string, optional (default="ferradans")
The kind of out of sample mapping to apply to transport samples
from a domain into another one. Currently the only possible option is
- "ferradans" which uses the method proposed in [6].
- limit_max: float, optional (defaul=np.infty)
+ "ferradans" which uses the method proposed in :ref:`[6] <references-sinkhornlpl1transport>`.
+ limit_max: float, optional (default=np.infty)
Controls the semi supervised mode. Transport between labeled source
and target samples of different classes will exhibit a cost defined by
limit_max.
@@ -1639,16 +1673,19 @@ class SinkhornLpl1Transport(BaseTransport):
coupling_ : array-like, shape (n_source_samples, n_target_samples)
The optimal coupling
+
+ .. _references-sinkhornlpl1transport:
References
----------
-
.. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
"Optimal Transport for Domain Adaptation," in IEEE
Transactions on Pattern Analysis and Machine Intelligence ,
vol.PP, no.99, pp.1-1
+
.. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
Generalized conditional gradient: analysis of convergence
and applications. arXiv preprint arXiv:1510.06567.
+
.. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
Regularized discrete optimal transport. SIAM Journal on Imaging
Sciences, 7(3), 1853-1882.
@@ -1675,7 +1712,7 @@ class SinkhornLpl1Transport(BaseTransport):
def fit(self, Xs, ys=None, Xt=None, yt=None):
"""Build a coupling matrix from source and target sets of samples
- (Xs, ys) and (Xt, yt)
+ :math:`(\mathbf{X_s}, \mathbf{y_s})` and :math:`(\mathbf{X_t}, \mathbf{y_t})`
Parameters
----------
@@ -1686,8 +1723,8 @@ class SinkhornLpl1Transport(BaseTransport):
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -1719,13 +1756,14 @@ class SinkhornLpl1Transport(BaseTransport):
class EMDLaplaceTransport(BaseTransport):
- """Domain Adapatation OT method based on Earth Mover's Distance with Laplacian regularization
+ """Domain Adaptation OT method based on Earth Mover's Distance with Laplacian regularization
Parameters
----------
reg_type : string optional (default='pos')
Type of the regularization term: 'pos' and 'disp' for
- regularization term defined in [2] and [6], respectively.
+ regularization term defined in :ref:`[2] <references-emdlaplacetransport>` and
+ :ref:`[6] <references-emdlaplacetransport>`, respectively.
reg_lap : float, optional (default=1)
Laplacian regularization parameter
reg_src : float, optional (default=0.5)
@@ -1756,24 +1794,27 @@ class EMDLaplaceTransport(BaseTransport):
out_of_sample_map : string, optional (default="ferradans")
The kind of out of sample mapping to apply to transport samples
from a domain into another one. Currently the only possible option is
- "ferradans" which uses the method proposed in [6].
+ "ferradans" which uses the method proposed in :ref:`[6] <references-emdlaplacetransport>`.
Attributes
----------
coupling_ : array-like, shape (n_source_samples, n_target_samples)
The optimal coupling
+
+ .. _references-emdlaplacetransport:
References
----------
.. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
"Optimal Transport for Domain Adaptation," in IEEE Transactions
on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
+
.. [2] R. Flamary, N. Courty, D. Tuia, A. Rakotomamonjy,
"Optimal transport with Laplacian regularization: Applications to domain adaptation and shape matching,"
- in NIPS Workshop on Optimal Transport and Machine Learning OTML, 2014.
+ in NIPS Workshop on Optimal Transport and Machine Learning OTML, 2014.
+
.. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
- Regularized discrete optimal transport. SIAM Journal on Imaging
- Sciences, 7(3), 1853-1882.
+ Regularized discrete optimal transport. SIAM Journal on Imaging Sciences, 7(3), 1853-1882.
"""
def __init__(self, reg_type='pos', reg_lap=1., reg_src=1., metric="sqeuclidean",
@@ -1799,7 +1840,7 @@ class EMDLaplaceTransport(BaseTransport):
def fit(self, Xs, ys=None, Xt=None, yt=None):
"""Build a coupling matrix from source and target sets of samples
- (Xs, ys) and (Xt, yt)
+ :math:`(\mathbf{X_s}, \mathbf{y_s})` and :math:`(\mathbf{X_t}, \mathbf{y_t})`
Parameters
----------
@@ -1810,8 +1851,8 @@ class EMDLaplaceTransport(BaseTransport):
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -1840,8 +1881,8 @@ class EMDLaplaceTransport(BaseTransport):
class SinkhornL1l2Transport(BaseTransport):
- """Domain Adapatation OT method based on sinkhorn algorithm +
- l1l2 class regularization.
+ """Domain Adaptation OT method based on sinkhorn algorithm +
+ L1L2 class regularization.
Parameters
----------
@@ -1851,7 +1892,7 @@ class SinkhornL1l2Transport(BaseTransport):
Class regularization parameter
max_iter : int, float, optional (default=10)
The minimum number of iteration before stopping the optimization
- algorithm if no it has not converged
+ algorithm if it has not converged
max_inner_iter : int, float, optional (default=200)
The number of iteration in the inner loop
tol : float, optional (default=10e-9)
@@ -1870,7 +1911,7 @@ class SinkhornL1l2Transport(BaseTransport):
out_of_sample_map : string, optional (default="ferradans")
The kind of out of sample mapping to apply to transport samples
from a domain into another one. Currently the only possible option is
- "ferradans" which uses the method proposed in [6].
+ "ferradans" which uses the method proposed in :ref:`[6] <references-sinkhornl1l2transport>`.
limit_max: float, optional (default=10)
Controls the semi supervised mode. Transport between labeled source
and target samples of different classes will exhibit an infinite cost
@@ -1881,18 +1922,21 @@ class SinkhornL1l2Transport(BaseTransport):
coupling_ : array-like, shape (n_source_samples, n_target_samples)
The optimal coupling
log_ : dictionary
- The dictionary of log, empty dic if parameter log is not True
+ The dictionary of log, empty dict if parameter log is not True
+
+ .. _references-sinkhornl1l2transport:
References
----------
-
.. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
"Optimal Transport for Domain Adaptation," in IEEE
Transactions on Pattern Analysis and Machine Intelligence ,
vol.PP, no.99, pp.1-1
+
.. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
Generalized conditional gradient: analysis of convergence
and applications. arXiv preprint arXiv:1510.06567.
+
.. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
Regularized discrete optimal transport. SIAM Journal on Imaging
Sciences, 7(3), 1853-1882.
@@ -1919,7 +1963,7 @@ class SinkhornL1l2Transport(BaseTransport):
def fit(self, Xs, ys=None, Xt=None, yt=None):
"""Build a coupling matrix from source and target sets of samples
- (Xs, ys) and (Xt, yt)
+ :math:`(\mathbf{X_s}, \mathbf{y_s})` and :math:`(\mathbf{X_t}, \mathbf{y_t})`
Parameters
----------
@@ -1930,8 +1974,8 @@ class SinkhornL1l2Transport(BaseTransport):
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -1973,7 +2017,7 @@ class MappingTransport(BaseEstimator):
mu : float, optional (default=1)
Weight for the linear OT loss (>0)
eta : float, optional (default=0.001)
- Regularization term for the linear mapping L (>0)
+ Regularization term for the linear mapping `L` (>0)
bias : bool, optional (default=False)
Estimate linear mapping with constant bias
metric : string, optional (default="sqeuclidean")
@@ -2004,17 +2048,20 @@ class MappingTransport(BaseEstimator):
----------
coupling_ : array-like, shape (n_source_samples, n_target_samples)
The optimal coupling
- mapping_ : array-like, shape (n_features (+ 1), n_features)
- (if bias) for kernel == linear
+ mapping_ :
The associated mapping
- array-like, shape (n_source_samples (+ 1), n_features)
- (if bias) for kernel == gaussian
+
+ - array-like, shape (`n_features` (+ 1), `n_features`),
+ (if bias) for kernel == linear
+
+ - array-like, shape (`n_source_samples` (+ 1), `n_features`),
+ (if bias) for kernel == gaussian
log_ : dictionary
- The dictionary of log, empty dic if parameter log is not True
+ The dictionary of log, empty dict if parameter log is not True
+
References
----------
-
.. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard,
"Mapping estimation for discrete optimal transport",
Neural Information Processing Systems (NIPS), 2016.
@@ -2042,7 +2089,8 @@ class MappingTransport(BaseEstimator):
def fit(self, Xs=None, ys=None, Xt=None, yt=None):
"""Builds an optimal coupling and estimates the associated mapping
- from source and target sets of samples (Xs, ys) and (Xt, yt)
+ from source and target sets of samples
+ :math:`(\mathbf{X_s}, \mathbf{y_s})` and :math:`(\mathbf{X_t}, \mathbf{y_t})`
Parameters
----------
@@ -2053,8 +2101,8 @@ class MappingTransport(BaseEstimator):
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -2098,7 +2146,7 @@ class MappingTransport(BaseEstimator):
return self
def transform(self, Xs):
- """Transports source samples Xs onto target ones Xt
+ """Transports source samples :math:`\mathbf{X_s}` onto target ones :math:`\mathbf{X_t}`
Parameters
----------
@@ -2138,7 +2186,7 @@ class MappingTransport(BaseEstimator):
class UnbalancedSinkhornTransport(BaseTransport):
- """Domain Adapatation unbalanced OT method based on sinkhorn algorithm
+ """Domain Adaptation unbalanced OT method based on sinkhorn algorithm
Parameters
----------
@@ -2151,7 +2199,7 @@ class UnbalancedSinkhornTransport(BaseTransport):
'sinkhorn_epsilon_scaling', see those function for specific parameters
max_iter : int, float, optional (default=10)
The minimum number of iteration before stopping the optimization
- algorithm if no it has not converged
+ algorithm if it has not converged
tol : float, optional (default=10e-9)
Stop threshold on error (inner sinkhorn solver) (>0)
verbose : bool, optional (default=False)
@@ -2168,7 +2216,7 @@ class UnbalancedSinkhornTransport(BaseTransport):
out_of_sample_map : string, optional (default="ferradans")
The kind of out of sample mapping to apply to transport samples
from a domain into another one. Currently the only possible option is
- "ferradans" which uses the method proposed in [6].
+ "ferradans" which uses the method proposed in :ref:`[6] <references-unbalancedsinkhorntransport>`.
limit_max: float, optional (default=10)
Controls the semi supervised mode. Transport between labeled source
and target samples of different classes will exhibit an infinite cost
@@ -2179,14 +2227,16 @@ class UnbalancedSinkhornTransport(BaseTransport):
coupling_ : array-like, shape (n_source_samples, n_target_samples)
The optimal coupling
log_ : dictionary
- The dictionary of log, empty dic if parameter log is not True
+ The dictionary of log, empty dict if parameter log is not True
+
+ .. _references-unbalancedsinkhorntransport:
References
----------
-
.. [1] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016).
- Scaling algorithms for unbalanced transport problems. arXiv preprint
- arXiv:1607.05816.
+ Scaling algorithms for unbalanced transport problems. arXiv preprint
+ arXiv:1607.05816.
+
.. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
Regularized discrete optimal transport. SIAM Journal on Imaging
Sciences, 7(3), 1853-1882.
@@ -2212,7 +2262,7 @@ class UnbalancedSinkhornTransport(BaseTransport):
def fit(self, Xs, ys=None, Xt=None, yt=None):
"""Build a coupling matrix from source and target sets of samples
- (Xs, ys) and (Xt, yt)
+ :math:`(\mathbf{X_s}, \mathbf{y_s})` and :math:`(\mathbf{X_t}, \mathbf{y_t})`
Parameters
----------
@@ -2223,8 +2273,8 @@ class UnbalancedSinkhornTransport(BaseTransport):
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -2258,7 +2308,7 @@ class UnbalancedSinkhornTransport(BaseTransport):
class JCPOTTransport(BaseTransport):
- """Domain Adapatation OT method for multi-source target shift based on Wasserstein barycenter algorithm.
+ """Domain Adaptation OT method for multi-source target shift based on Wasserstein barycenter algorithm.
Parameters
----------
@@ -2266,7 +2316,7 @@ class JCPOTTransport(BaseTransport):
Entropic regularization parameter
max_iter : int, float, optional (default=10)
The minimum number of iteration before stopping the optimization
- algorithm if no it has not converged
+ algorithm if it has not converged
tol : float, optional (default=10e-9)
Stop threshold on error (inner sinkhorn solver) (>0)
verbose : bool, optional (default=False)
@@ -2283,7 +2333,7 @@ class JCPOTTransport(BaseTransport):
out_of_sample_map : string, optional (default="ferradans")
The kind of out of sample mapping to apply to transport samples
from a domain into another one. Currently the only possible option is
- "ferradans" which uses the method proposed in [6].
+ "ferradans" which uses the method proposed in :ref:`[6] <references-jcpottransport>`.
Attributes
----------
@@ -2292,11 +2342,12 @@ class JCPOTTransport(BaseTransport):
proportions_ : array-like, shape (n_classes,)
Estimated class proportions in the target domain
log_ : dictionary
- The dictionary of log, empty dic if parameter log is not True
+ The dictionary of log, empty dict if parameter log is not True
+
+ .. _references-jcpottransport:
References
----------
-
.. [1] Ievgen Redko, Nicolas Courty, Rémi Flamary, Devis Tuia
"Optimal transport for multi-source domain adaptation under target shift",
International Conference on Artificial Intelligence and Statistics (AISTATS),
@@ -2323,7 +2374,7 @@ class JCPOTTransport(BaseTransport):
def fit(self, Xs, ys=None, Xt=None, yt=None):
"""Building coupling matrices from a list of source and target sets of samples
- (Xs, ys) and (Xt, yt)
+ :math:`(\mathbf{X_s}, \mathbf{y_s})` and :math:`(\mathbf{X_t}, \mathbf{y_t})`
Parameters
----------
@@ -2334,8 +2385,8 @@ class JCPOTTransport(BaseTransport):
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -2368,7 +2419,7 @@ class JCPOTTransport(BaseTransport):
return self
def transform(self, Xs=None, ys=None, Xt=None, yt=None, batch_size=128):
- """Transports source samples Xs onto target ones Xt
+ """Transports source samples :math:`\mathbf{X_s}` onto target ones :math:`\mathbf{X_t}`
Parameters
----------
@@ -2379,8 +2430,8 @@ class JCPOTTransport(BaseTransport):
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
- yt's elements with -1.
+ The class labels. If some target samples are unlabelled, fill the
+ :math:`\mathbf{y_t}`'s elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
class label
@@ -2440,7 +2491,8 @@ class JCPOTTransport(BaseTransport):
return transp_Xs
def transform_labels(self, ys=None):
- """Propagate source labels ys to obtain target labels as in [27]
+ """Propagate source labels :math:`\mathbf{y_s}` to obtain target labels as in
+ :ref:`[27] <references-jcpottransport-transform-labels>`
Parameters
----------
@@ -2451,6 +2503,14 @@ class JCPOTTransport(BaseTransport):
-------
yt : array-like, shape (n_target_samples, nb_classes)
Estimated soft target labels.
+
+
+ .. _references-jcpottransport-transform-labels:
+ References
+ ----------
+ .. [27] Ievgen Redko, Nicolas Courty, Rémi Flamary, Devis Tuia
+ "Optimal transport for multi-source domain adaptation under target shift",
+ International Conference on Artificial Intelligence and Statistics (AISTATS), 2019.
"""
# check the necessary inputs parameters are here
@@ -2482,11 +2542,12 @@ class JCPOTTransport(BaseTransport):
return yt.T
def inverse_transform_labels(self, yt=None):
- """Propagate source labels ys to obtain target labels
+ """Propagate target labels :math:`\mathbf{y_t}` to obtain estimated source labels
+ :math:`\mathbf{y_s}`
Parameters
----------
- yt : array-like, shape (n_source_samples,)
+ yt : array-like, shape (n_target_samples,)
The target class labels
Returns
diff --git a/ot/datasets.py b/ot/datasets.py
index b86ef3b..ad6390c 100644
--- a/ot/datasets.py
+++ b/ot/datasets.py
@@ -13,7 +13,7 @@ from .utils import check_random_state, deprecated
def make_1D_gauss(n, m, s):
- """return a 1D histogram for a gaussian distribution (n bins, mean m and std s)
+ """return a 1D histogram for a gaussian distribution (`n` bins, mean `m` and std `s`)
Parameters
----------
@@ -26,7 +26,7 @@ def make_1D_gauss(n, m, s):
Returns
-------
- h : ndarray (n,)
+ h : ndarray (`n`,)
1D histogram for a gaussian distribution
"""
x = np.arange(n, dtype=np.float64)
@@ -41,7 +41,7 @@ def get_1D_gauss(n, m, sigma):
def make_2D_samples_gauss(n, m, sigma, random_state=None):
- """Return n samples drawn from 2D gaussian N(m,sigma)
+ """Return `n` samples drawn from 2D gaussian :math:`\mathcal{N}(m, \sigma)`
Parameters
----------
@@ -59,8 +59,8 @@ def make_2D_samples_gauss(n, m, sigma, random_state=None):
Returns
-------
- X : ndarray, shape (n, 2)
- n samples drawn from N(m, sigma).
+ X : ndarray, shape (`n`, 2)
+ n samples drawn from :math:`\mathcal{N}(m, \sigma)`.
"""
generator = check_random_state(random_state)
@@ -102,7 +102,7 @@ def make_data_classif(dataset, n, nz=.5, theta=0, p=.5, random_state=None, **kwa
Returns
-------
X : ndarray, shape (n, d)
- n observation of size d
+ `n` observation of size `d`
y : ndarray, shape (n,)
labels of the samples.
"""
diff --git a/ot/dr.py b/ot/dr.py
index 7469270..c2f51f8 100644
--- a/ot/dr.py
+++ b/ot/dr.py
@@ -22,7 +22,7 @@ from pymanopt.solvers import SteepestDescent, TrustRegions
def dist(x1, x2):
- """ Compute squared euclidean distance between samples (autograd)
+ r""" Compute squared euclidean distance between samples (autograd)
"""
x1p2 = np.sum(np.square(x1), 1)
x2p2 = np.sum(np.square(x2), 1)
@@ -30,7 +30,7 @@ def dist(x1, x2):
def sinkhorn(w1, w2, M, reg, k):
- """Sinkhorn algorithm with fixed number of iteration (autograd)
+ r"""Sinkhorn algorithm with fixed number of iteration (autograd)
"""
K = np.exp(-M / reg)
ui = np.ones((M.shape[0],))
@@ -43,14 +43,14 @@ def sinkhorn(w1, w2, M, reg, k):
def split_classes(X, y):
- """split samples in X by classes in y
+ r"""split samples in :math:`\mathbf{X}` by classes in :math:`\mathbf{y}`
"""
lstsclass = np.unique(y)
return [X[y == i, :].astype(np.float32) for i in lstsclass]
def fda(X, y, p=2, reg=1e-16):
- """Fisher Discriminant Analysis
+ r"""Fisher Discriminant Analysis
Parameters
----------
@@ -111,18 +111,19 @@ def fda(X, y, p=2, reg=1e-16):
def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None, normalize=False):
r"""
- Wasserstein Discriminant Analysis [11]_
+ Wasserstein Discriminant Analysis :ref:`[11] <references-wda>`
The function solves the following optimization problem:
.. math::
- P = \\text{arg}\min_P \\frac{\\sum_i W(PX^i,PX^i)}{\\sum_{i,j\\neq i} W(PX^i,PX^j)}
+ \mathbf{P} = \mathop{\arg \min}_\mathbf{P} \quad
+ \frac{\sum\limits_i W(P \mathbf{X}^i, P \mathbf{X}^i)}{\sum\limits_{i, j \neq i} W(P \mathbf{X}^i, P \mathbf{X}^j)}
where :
- - :math:`P` is a linear projection operator in the Stiefel(p,d) manifold
+ - :math:`P` is a linear projection operator in the Stiefel(`p`, `d`) manifold
- :math:`W` is entropic regularized Wasserstein distances
- - :math:`X^i` are samples in the dataset corresponding to class i
+ - :math:`\mathbf{X}^i` are samples in the dataset corresponding to class i
Parameters
----------
@@ -140,7 +141,7 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None, no
P0 : ndarray, shape (d, p)
Initial starting point for projection.
normalize : bool, optional
- Normalise the Wasserstaiun distane by the average distance on P0 (default : False)
+ Normalise the Wasserstaiun distance by the average distance on P0 (default : False)
verbose : int, optional
Print information along iterations.
@@ -151,6 +152,8 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None, no
proj : callable
Projection function including mean centering.
+
+ .. _references-wda:
References
----------
.. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016).
@@ -217,27 +220,28 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None, no
def projection_robust_wasserstein(X, Y, a, b, tau, U0=None, reg=0.1, k=2, stopThr=1e-3, maxiter=100, verbose=0):
r"""
- Projection Robust Wasserstein Distance [32]
+ Projection Robust Wasserstein Distance :ref:`[32] <references-projection-robust-wasserstein>`
The function solves the following optimization problem:
.. math::
- \max_{U \in St(d, k)} \min_{\pi \in \Pi(\mu,\nu)} \sum_{i,j} \pi_{i,j} \|U^T(x_i - y_j)\|^2 - reg * H(\pi)
+ \max_{U \in St(d, k)} \ \min_{\pi \in \Pi(\mu,\nu)} \quad \sum_{i,j} \pi_{i,j}
+ \|U^T(\mathbf{x}_i - \mathbf{y}_j)\|^2 - \mathrm{reg} \cdot H(\pi)
- - :math:`U` is a linear projection operator in the Stiefel(d, k) manifold
+ - :math:`U` is a linear projection operator in the Stiefel(`d`, `k`) manifold
- :math:`H(\pi)` is entropy regularizer
- - :math:`x_i`, :math:`y_j` are samples of measures \mu and \nu respectively
+ - :math:`\mathbf{x}_i`, :math:`\mathbf{y}_j` are samples of measures :math:`\mu` and :math:`\nu` respectively
Parameters
----------
X : ndarray, shape (n, d)
- Samples from measure \mu
+ Samples from measure :math:`\mu`
Y : ndarray, shape (n, d)
- Samples from measure \nu
+ Samples from measure :math:`\nu`
a : ndarray, shape (n, )
- weights for measure \mu
+ weights for measure :math:`\mu`
b : ndarray, shape (n, )
- weights for measure \nu
+ weights for measure :math:`\nu`
tau : float
stepsize for Riemannian Gradient Descent
U0 : ndarray, shape (d, p)
@@ -258,6 +262,8 @@ def projection_robust_wasserstein(X, Y, a, b, tau, U0=None, reg=0.1, k=2, stopTh
U : ndarray, shape (d, k)
Projection operator.
+
+ .. _references-projection-robust-wasserstein:
References
----------
.. [32] Huang, M. , Ma S. & Lai L. (2021).
diff --git a/ot/gromov.py b/ot/gromov.py
index a0fbf48..465693d 100644
--- a/ot/gromov.py
+++ b/ot/gromov.py
@@ -327,7 +327,8 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs
The function solves the following optimization problem:
.. math::
- \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+ \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \quad \sum_{i,j,k,l}
+ L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
Where :
@@ -410,7 +411,8 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwarg
The function solves the following optimization problem:
.. math::
- GW = \min_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+ GW = \min_\mathbf{T} \quad \sum_{i,j,k,l}
+ L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
Where :
@@ -487,8 +489,8 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5,
Computes the FGW transport between two graphs (see :ref:`[24] <references-fused-gromov-wasserstein>`)
.. math::
- \gamma = \mathop{\arg \min}_\gamma (1 - \alpha) <\gamma, \mathbf{M}>_F + \alpha \sum_{i,j,k,l}
- L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+ \gamma = \mathop{\arg \min}_\gamma \quad (1 - \alpha) \langle \gamma, \mathbf{M} \rangle_F +
+ \alpha \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
s.t. \ \mathbf{\gamma} \mathbf{1} &= \mathbf{p}
@@ -569,7 +571,7 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
Computes the FGW distance between two graphs see (see :ref:`[24] <references-fused-gromov-wasserstein2>`)
.. math::
- \min_\gamma (1 - \alpha) <\gamma, \mathbf{M}>_F + \alpha \sum_{i,j,k,l}
+ \min_\gamma \quad (1 - \alpha) \langle \gamma, \mathbf{M} \rangle_F + \alpha \sum_{i,j,k,l}
L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
s.t. \ \mathbf{\gamma} \mathbf{1} &= \mathbf{p}
@@ -591,9 +593,9 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
M : array-like, shape (ns, nt)
Metric cost matrix between features across domains
C1 : array-like, shape (ns, ns)
- Metric cost matrix respresentative of the structure in the source space.
+ Metric cost matrix representative of the structure in the source space.
C2 : array-like, shape (nt, nt)
- Metric cost matrix espresentative of the structure in the target space.
+ Metric cost matrix representative of the structure in the target space.
p : array-like, shape (ns,)
Distribution in the source space.
q : array-like, shape (nt,)
@@ -612,8 +614,8 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
Returns
-------
- gamma : array-like, shape (ns, nt)
- Optimal transportation matrix for the given parameters.
+ fgw-distance : float
+ Fused gromov wasserstein distance for the given parameters.
log : dict
Log dictionary return only if log==True in parameters.
@@ -780,7 +782,8 @@ def pointwise_gromov_wasserstein(C1, C2, p, q, loss_fun,
The function solves the following optimization problem:
.. math::
- \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+ \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \quad \sum_{i,j,k,l}
+ L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
@@ -901,7 +904,8 @@ def sampled_gromov_wasserstein(C1, C2, p, q, loss_fun,
The function solves the following optimization problem:
.. math::
- \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
+ \mathbf{GW} = \mathop{\arg \min}_\mathbf{T} \quad \sum_{i,j,k,l}
+ L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l}
s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
@@ -1052,7 +1056,7 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
The function solves the following optimization problem:
.. math::
- \mathbf{GW} = \mathop{\arg\min}_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} - \epsilon(H(\mathbf{T}))
+ \mathbf{GW} = \mathop{\arg\min}_\mathbf{T} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} - \epsilon(H(\mathbf{T}))
s.t. \ \mathbf{T} \mathbf{1} &= \mathbf{p}
@@ -1157,7 +1161,8 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
The function solves the following optimization problem:
.. math::
- GW = \min_\mathbf{T} \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l}) \mathbf{T}_{i,j} \mathbf{T}_{k,l} - \epsilon(H(\mathbf{T}))
+ GW = \min_\mathbf{T} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l})
+ \mathbf{T}_{i,j} \mathbf{T}_{k,l} - \epsilon(H(\mathbf{T}))
Where :
@@ -1223,7 +1228,7 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
.. math::
- \mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
+ \mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
Where :
@@ -1336,7 +1341,7 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
.. math::
- \mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
+ \mathbf{C} = \mathop{\arg \min}_{\mathbf{C}\in \mathbb{R}^{N \times N}} \quad \sum_s \lambda_s \mathrm{GW}(\mathbf{C}, \mathbf{C}_s, \mathbf{p}, \mathbf{p}_s)
Where :
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 2c18a88..5da897d 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -62,7 +62,7 @@ def center_ot_dual(alpha0, beta0, a=None, b=None):
is the following:
.. math::
- \alpha^T a= \beta^T b
+ \alpha^T \mathbf{a} = \beta^T \mathbf{b}
in addition to the OT problem constraints.
@@ -70,11 +70,11 @@ def center_ot_dual(alpha0, beta0, a=None, b=None):
a constant from both :math:`\alpha_0` and :math:`\beta_0`.
.. math::
- c=\frac{\beta0^T b-\alpha_0^T a}{1^Tb+1^Ta}
+ c &= \frac{\beta_0^T \mathbf{b} - \alpha_0^T \mathbf{a}}{\mathbf{1}^T \mathbf{b} + \mathbf{1}^T \mathbf{a}}
- \alpha=\alpha_0+c
+ \alpha &= \alpha_0 + c
- \beta=\beta0+c
+ \beta &= \beta_0 + c
Parameters
----------
@@ -117,7 +117,7 @@ def estimate_dual_null_weights(alpha0, beta0, a, b, M):
The feasible values are computed efficiently but rather coarsely.
.. warning::
- This function is necessary because the C++ solver in emd_c
+ This function is necessary because the C++ solver in `emd_c`
discards all samples in the distributions with
zeros weights. This means that while the primal variable (transport
matrix) is exact, the solver only returns feasible dual potentials
@@ -126,26 +126,26 @@ def estimate_dual_null_weights(alpha0, beta0, a, b, M):
First we compute the constraints violations:
.. math::
- V=\alpha+\beta^T-M
+ \mathbf{V} = \alpha + \beta^T - \mathbf{M}
- Next we compute the max amount of violation per row (alpha) and
- columns (beta)
+ Next we compute the max amount of violation per row (:math:`\alpha`) and
+ columns (:math:`beta`)
.. math::
- v^a_i=\max_j V_{i,j}
+ \mathbf{v^a}_i = \max_j \mathbf{V}_{i,j}
- v^b_j=\max_i V_{i,j}
+ \mathbf{v^b}_j = \max_i \mathbf{V}_{i,j}
Finally we update the dual potential with 0 weights if a
constraint is violated
.. math::
- \alpha_i = \alpha_i -v^a_i \quad \text{ if } a_i=0 \text{ and } v^a_i>0
+ \alpha_i = \alpha_i - \mathbf{v^a}_i \quad \text{ if } \mathbf{a}_i=0 \text{ and } \mathbf{v^a}_i>0
- \beta_j = \beta_j -v^b_j \quad \text{ if } b_j=0 \text{ and } v^b_j>0
+ \beta_j = \beta_j - \mathbf{v^b}_j \quad \text{ if } \mathbf{b}_j=0 \text{ and } \mathbf{v^b}_j > 0
In the end the dual potentials are centered using function
- :ref:`center_ot_dual`.
+ :py:func:`ot.lp.center_ot_dual`.
Note that all those updates do not change the objective value of the
solution but provide dual potentials that do not violate the constraints.
@@ -201,26 +201,28 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True, numThreads=1):
r"""Solves the Earth Movers distance problem and returns the OT matrix
- .. math:: \gamma = arg\min_\gamma <\gamma,M>_F
+ .. math::
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F
+
+ s.t. \ \gamma \mathbf{1} = \mathbf{a}
- s.t. \gamma 1 = a
+ \gamma^T \mathbf{1} = \mathbf{b}
- \gamma^T 1= b
+ \gamma \geq 0
- \gamma\geq 0
where :
- - M is the metric cost matrix
- - a and b are the sample weights
+ - :math:`\mathbf{M}` is the metric cost matrix
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights
- .. warning:: Note that the M matrix in numpy needs to be a C-order
+ .. warning:: Note that the :math:`\mathbf{M}` matrix in numpy needs to be a C-order
numpy.array in float64 format. It will be converted if not in this
format
.. note:: This function is backend-compatible and will work on arrays
from all compatible backends.
- Uses the algorithm proposed in [1]_
+ Uses the algorithm proposed in :ref:`[1] <references-emd>`.
Parameters
----------
@@ -267,17 +269,19 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True, numThreads=1):
array([[0.5, 0. ],
[0. , 0.5]])
+
+ .. _references-emd:
References
----------
-
.. [1] Bonneel, N., Van De Panne, M., Paris, S., & Heidrich, W. (2011,
December). Displacement interpolation using Lagrangian mass transport.
In ACM Transactions on Graphics (TOG) (Vol. 30, No. 6, p. 158). ACM.
See Also
--------
- ot.bregman.sinkhorn : Entropic regularized OT ot.optim.cg : General
- regularized OT"""
+ ot.bregman.sinkhorn : Entropic regularized OT
+ ot.optim.cg : General regularized OT
+ """
# convert to numpy if list
a, b, M = list_to_array(a, b, M)
@@ -340,22 +344,23 @@ def emd2(a, b, M, processes=1,
r"""Solves the Earth Movers distance problem and returns the loss
.. math::
- \min_\gamma <\gamma,M>_F
+ \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F
- s.t. \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} = \mathbf{a}
- \gamma^T 1= b
+ \gamma^T \mathbf{1} = \mathbf{b}
+
+ \gamma \geq 0
- \gamma\geq 0
where :
- - M is the metric cost matrix
- - a and b are the sample weights
+ - :math:`\mathbf{M}` is the metric cost matrix
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights
.. note:: This function is backend-compatible and will work on arrays
from all compatible backends.
- Uses the algorithm proposed in [1]_
+ Uses the algorithm proposed in :ref:`[1] <references-emd2>`.
Parameters
----------
@@ -405,9 +410,10 @@ def emd2(a, b, M, processes=1,
>>> ot.emd2(a,b,M)
0.0
+
+ .. _references-emd2:
References
----------
-
.. [1] Bonneel, N., Van De Panne, M., Paris, S., & Heidrich, W.
(2011, December). Displacement interpolation using Lagrangian mass
transport. In ACM Transactions on Graphics (TOG) (Vol. 30, No. 6, p.
@@ -416,7 +422,8 @@ def emd2(a, b, M, processes=1,
See Also
--------
ot.bregman.sinkhorn : Entropic regularized OT
- ot.optim.cg : General regularized OT"""
+ ot.optim.cg : General regularized OT
+ """
a, b, M = list_to_array(a, b, M)
@@ -508,29 +515,35 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
Solves the free support (locations of the barycenters are optimized, not the weights) Wasserstein barycenter problem (i.e. the weighted Frechet mean for the 2-Wasserstein distance), formally:
.. math::
- \min_X \sum_{i=1}^N w_i W_2^2(b, X, a_i, X_i)
+ \min_\mathbf{X} \quad \sum_{i=1}^N w_i W_2^2(\mathbf{b}, \mathbf{X}, \mathbf{a}_i, \mathbf{X}_i)
where :
- :math:`w \in \mathbb{(0, 1)}^{N}`'s are the barycenter weights and sum to one
- - the :math:`a_i \in \mathbb{R}^{k_i}` are the empirical measures weights and sum to one for each :math:`i`
- - the :math:`X_i \in \mathbb{R}^{k_i, d}` are the empirical measures atoms locations
- - :math:`b \in \mathbb{R}^{k}` is the desired weights vector of the barycenter
+ - the :math:`\mathbf{a}_i \in \mathbb{R}^{k_i}` are the empirical measures weights and sum to one for each :math:`i`
+ - the :math:`\mathbf{X}_i \in \mathbb{R}^{k_i, d}` are the empirical measures atoms locations
+ - :math:`\mathbf{b} \in \mathbb{R}^{k}` is the desired weights vector of the barycenter
- This problem is considered in [1] (Algorithm 2). There are two differences with the following codes:
+ This problem is considered in :ref:`[1] <references-free-support-barycenter>` (Algorithm 2).
+ There are two differences with the following codes:
- we do not optimize over the weights
- - we do not do line search for the locations updates, we use i.e. theta = 1 in [1] (Algorithm 2). This can be seen as a discrete implementation of the fixed-point algorithm of [2] proposed in the continuous setting.
+ - we do not do line search for the locations updates, we use i.e. :math:`\theta = 1` in
+ :ref:`[1] <references-free-support-barycenter>` (Algorithm 2). This can be seen as a discrete
+ implementation of the fixed-point algorithm of
+ :ref:`[2] <references-free-support-barycenter>` proposed in the continuous setting.
Parameters
----------
measures_locations : list of N (k_i,d) numpy.ndarray
- The discrete support of a measure supported on k_i locations of a d-dimensional space (k_i can be different for each element of the list)
+ The discrete support of a measure supported on :math:`k_i` locations of a `d`-dimensional space
+ (:math:`k_i` can be different for each element of the list)
measures_weights : list of N (k_i,) numpy.ndarray
- Numpy arrays where each numpy array has k_i non-negatives values summing to one representing the weights of each discrete input measure
+ Numpy arrays where each numpy array has :math:`k_i` non-negatives values summing to one
+ representing the weights of each discrete input measure
X_init : (k,d) np.ndarray
- Initialization of the support locations (on k atoms) of the barycenter
+ Initialization of the support locations (on `k` atoms) of the barycenter
b : (k,) np.ndarray
Initialization of the weights of the barycenter (non-negatives, sum to 1)
weights : (N,) np.ndarray
@@ -554,9 +567,10 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
X : (k,d) np.ndarray
Support locations (on k atoms) of the barycenter
+
+ .. _references-free-support-barycenter:
References
----------
-
.. [1] Cuturi, Marco, and Arnaud Doucet. "Fast computation of Wasserstein barycenters." International Conference on Machine Learning. 2014.
.. [2] Álvarez-Esteban, Pedro C., et al. "A fixed-point approach to barycenters in Wasserstein space." Journal of Mathematical Analysis and Applications 441.2 (2016): 744-762.
diff --git a/ot/optim.py b/ot/optim.py
index 6456c03..cc286b6 100644
--- a/ot/optim.py
+++ b/ot/optim.py
@@ -162,7 +162,8 @@ def cg(a, b, M, reg, f, df, G0=None, numItermax=200, numItermaxEmd=100000,
The function solves the following optimization problem:
.. math::
- \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg} \cdot f(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg} \cdot f(\gamma)
s.t. \ \gamma \mathbf{1} &= \mathbf{a}
@@ -309,7 +310,8 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10,
The function solves the following optimization problem:
.. math::
- \gamma = \mathop{\arg \min}_\gamma <\gamma, \mathbf{M}>_F + \mathrm{reg_1}\cdot\Omega(\gamma) + \mathrm{reg_2}\cdot f(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg_1}\cdot\Omega(\gamma) + \mathrm{reg_2}\cdot f(\gamma)
s.t. \ \gamma \mathbf{1} &= \mathbf{a}
@@ -452,7 +454,7 @@ def solve_1d_linesearch_quad(a, b, c):
.. math::
- \mathop{\arg \min}_{0 \leq x \leq 1} f(x) = ax^{2} + bx + c
+ \mathop{\arg \min}_{0 \leq x \leq 1} \quad f(x) = ax^{2} + bx + c
Parameters
----------
diff --git a/ot/partial.py b/ot/partial.py
index 814d779..b7093e4 100755
--- a/ot/partial.py
+++ b/ot/partial.py
@@ -20,13 +20,16 @@ def partial_wasserstein_lagrange(a, b, M, reg_m=None, nb_dummies=1, log=False,
The function considers the following problem:
.. math::
- \gamma = \arg\min_\gamma <\gamma,(M-\lambda)>_F
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, (\mathbf{M} - \lambda) \rangle_F
- s.t.
- \gamma\geq 0 \\
- \gamma 1 \leq a\\
- \gamma^T 1 \leq b\\
- 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\}
+ .. math::
+ s.t. \ \gamma \mathbf{1} &\leq \mathbf{a}
+
+ \gamma^T \mathbf{1} &\leq \mathbf{b}
+
+ \gamma &\geq 0
+
+ \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
or equivalently (see Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X.
@@ -34,33 +37,32 @@ def partial_wasserstein_lagrange(a, b, M, reg_m=None, nb_dummies=1, log=False,
metrics. Foundations of Computational Mathematics, 18(1), 1-44.)
.. math::
- \gamma = \arg\min_\gamma <\gamma,M>_F + \sqrt(\lambda/2)
- (\|\gamma 1 - a\|_1 + \|\gamma^T 1 - b\|_1)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \sqrt{\frac{\lambda}{2} (\|\gamma \mathbf{1} - \mathbf{a}\|_1 + \|\gamma^T \mathbf{1} - \mathbf{b}\|_1)}
- s.t.
- \gamma\geq 0 \\
+ s.t. \ \gamma \geq 0
where :
- - M is the metric cost matrix
- - a and b are source and target unbalanced distributions
- - :math:`\lambda` is the lagragian cost. Tuning its value allows attaining
- a given mass to be transported m
+ - :math:`\mathbf{M}` is the metric cost matrix
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target unbalanced distributions
+ - :math:`\lambda` is the lagrangian cost. Tuning its value allows attaining
+ a given mass to be transported `m`
- The formulation of the problem has been proposed in [28]_
+ The formulation of the problem has been proposed in :ref:`[28] <references-partial-wasserstein-lagrange>`
Parameters
----------
a : np.ndarray (dim_a,)
- Unnormalized histogram of dimension dim_a
+ Unnormalized histogram of dimension `dim_a`
b : np.ndarray (dim_b,)
- Unnormalized histograms of dimension dim_b
+ Unnormalized histograms of dimension `dim_b`
M : np.ndarray (dim_a, dim_b)
cost matrix for the quadratic cost
reg_m : float, optional
- Lagragian cost
+ Lagrangian cost
nb_dummies : int, optional, default:1
number of reservoir points to be added (to avoid numerical
instabilities, increase its value if an error is raised)
@@ -69,6 +71,7 @@ def partial_wasserstein_lagrange(a, b, M, reg_m=None, nb_dummies=1, log=False,
**kwargs : dict
parameters can be directly passed to the emd solver
+
.. warning::
When dealing with a large number of points, the EMD solver may face
some instabilities, especially when the mass associated to the dummy
@@ -77,7 +80,7 @@ def partial_wasserstein_lagrange(a, b, M, reg_m=None, nb_dummies=1, log=False,
Returns
-------
- gamma : (dim_a x dim_b) ndarray
+ gamma : (dim_a, dim_b) ndarray
Optimal transportation matrix for the given parameters
log : dict
log dictionary returned only if `log` is `True`
@@ -97,9 +100,10 @@ def partial_wasserstein_lagrange(a, b, M, reg_m=None, nb_dummies=1, log=False,
array([[0.1, 0. ],
[0. , 0. ]])
+
+ .. _references-partial-wasserstein-lagrange:
References
----------
-
.. [28] Caffarelli, L. A., & McCann, R. J. (2010) Free boundaries in
optimal transport and Monge-Ampere obstacle problems. Annals of
mathematics, 673-730.
@@ -162,27 +166,30 @@ def partial_wasserstein(a, b, M, m=None, nb_dummies=1, log=False, **kwargs):
The function considers the following problem:
.. math::
- \gamma = \arg\min_\gamma <\gamma,M>_F
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F
+
+ .. math::
+ s.t. \ \gamma \mathbf{1} &\leq \mathbf{a}
+
+ \gamma^T \mathbf{1} &\leq \mathbf{b}
+
+ \gamma &\geq 0
- s.t.
- \gamma\geq 0 \\
- \gamma 1 \leq a\\
- \gamma^T 1 \leq b\\
- 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\}
+ \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
where :
- - M is the metric cost matrix
- - a and b are source and target unbalanced distributions
- - m is the amount of mass to be transported
+ - :math:`\mathbf{M}` is the metric cost matrix
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target unbalanced distributions
+ - `m` is the amount of mass to be transported
Parameters
----------
a : np.ndarray (dim_a,)
- Unnormalized histogram of dimension dim_a
+ Unnormalized histogram of dimension `dim_a`
b : np.ndarray (dim_b,)
- Unnormalized histograms of dimension dim_b
+ Unnormalized histograms of dimension `dim_b`
M : np.ndarray (dim_a, dim_b)
cost matrix for the quadratic cost
m : float, optional
@@ -205,7 +212,7 @@ def partial_wasserstein(a, b, M, m=None, nb_dummies=1, log=False, **kwargs):
Returns
-------
- :math:`gamma` : (dim_a x dim_b) ndarray
+ gamma : (dim_a, dim_b) ndarray
Optimal transportation matrix for the given parameters
log : dict
log dictionary returned only if `log` is `True`
@@ -278,27 +285,30 @@ def partial_wasserstein2(a, b, M, m=None, nb_dummies=1, log=False, **kwargs):
The function considers the following problem:
.. math::
- \gamma = \arg\min_\gamma <\gamma,M>_F
+ \gamma = \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F
- s.t.
- \gamma\geq 0 \\
- \gamma 1 \leq a\\
- \gamma^T 1 \leq b\\
- 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\}
+ .. math::
+ s.t. \ \gamma \mathbf{1} &\leq \mathbf{a}
+
+ \gamma^T \mathbf{1} &\leq \mathbf{b}
+
+ \gamma &\geq 0
+
+ \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
where :
- - M is the metric cost matrix
- - a and b are source and target unbalanced distributions
- - m is the amount of mass to be transported
+ - :math:`\mathbf{M}` is the metric cost matrix
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target unbalanced distributions
+ - `m` is the amount of mass to be transported
Parameters
----------
a : np.ndarray (dim_a,)
- Unnormalized histogram of dimension dim_a
+ Unnormalized histogram of dimension `dim_a`
b : np.ndarray (dim_b,)
- Unnormalized histograms of dimension dim_b
+ Unnormalized histograms of dimension `dim_b`
M : np.ndarray (dim_a, dim_b)
cost matrix for the quadratic cost
m : float, optional
@@ -321,8 +331,8 @@ def partial_wasserstein2(a, b, M, m=None, nb_dummies=1, log=False, **kwargs):
Returns
-------
- :math:`gamma` : (dim_a x dim_b) ndarray
- Optimal transportation matrix for the given parameters
+ GW: float
+ partial GW discrepancy
log : dict
log dictionary returned only if `log` is `True`
@@ -360,8 +370,8 @@ def partial_wasserstein2(a, b, M, m=None, nb_dummies=1, log=False, **kwargs):
def gwgrad_partial(C1, C2, T):
- """Compute the GW gradient. Note: we can not use the trick in [12]_ as
- the marginals may not sum to 1.
+ """Compute the GW gradient. Note: we can not use the trick in :ref:`[12] <references-gwgrad-partial>`
+ as the marginals may not sum to 1.
Parameters
----------
@@ -379,6 +389,8 @@ def gwgrad_partial(C1, C2, T):
numpy.array of shape (n_p+nb_dummies, n_u)
gradient
+
+ .. _references-gwgrad-partial:
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
@@ -425,22 +437,25 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
The function considers the following problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F
+
+ .. math::
+ s.t. \ \gamma \mathbf{1} &\leq \mathbf{a}
+
+ \gamma^T \mathbf{1} &\leq \mathbf{b}
- s.t. \gamma 1 \leq a \\
- \gamma^T 1 \leq b \\
- \gamma\geq 0 \\
- 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\} \\
+ \gamma &\geq 0
+
+ \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
where :
- - M is the metric cost matrix
- - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)
- =\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are the sample weights
- - m is the amount of mass to be transported
+ - :math:`\mathbf{M}` is the metric cost matrix
+ - :math:`\Omega` is the entropic regularization term, :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights
+ - `m` is the amount of mass to be transported
- The formulation of the problem has been proposed in [29]_
+ The formulation of the problem has been proposed in :ref:`[29] <references-partial-gromov-wasserstein>`
Parameters
@@ -454,7 +469,7 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
q : ndarray, shape (nt,)
Distribution in the target space
m : float, optional
- Amount of mass to be transported (default: min (|p|_1, |q|_1))
+ Amount of mass to be transported (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
nb_dummies : int, optional
Number of dummy points to add (avoid instabilities in the EMD solver)
G0 : ndarray, shape (ns, nt), optional
@@ -476,7 +491,7 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
Returns
-------
- gamma : (dim_a x dim_b) ndarray
+ gamma : (dim_a, dim_b) ndarray
Optimal transportation matrix for the given parameters
log : dict
log dictionary returned only if `log` is `True`
@@ -503,6 +518,8 @@ def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
[0. , 0. , 0.25, 0. ],
[0. , 0. , 0. , 0. ]])
+
+ .. _references-partial-gromov-wasserstein:
References
----------
.. [29] Chapel, L., Alaya, M., Gasso, G. (2020). "Partial Optimal
@@ -597,22 +614,25 @@ def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
The function considers the following problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F
+ GW = \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F
+
+ .. math::
+ s.t. \ \gamma \mathbf{1} &\leq \mathbf{a}
+
+ \gamma^T \mathbf{1} &\leq \mathbf{b}
+
+ \gamma &\geq 0
- s.t. \gamma 1 \leq a \\
- \gamma^T 1 \leq b \\
- \gamma\geq 0 \\
- 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\} \\
+ \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
where :
- - M is the metric cost matrix
- - :math:`\Omega` is the entropic regularization term
- :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are the sample weights
- - m is the amount of mass to be transported
+ - :math:`\mathbf{M}` is the metric cost matrix
+ - :math:`\Omega` is the entropic regularization term, :math:`\Omega(\gamma) = \sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights
+ - `m` is the amount of mass to be transported
- The formulation of the problem has been proposed in [29]_
+ The formulation of the problem has been proposed in :ref:`[29] <references-partial-gromov-wasserstein2>`
Parameters
@@ -626,7 +646,7 @@ def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
q : ndarray, shape (nt,)
Distribution in the target space
m : float, optional
- Amount of mass to be transported (default: min (|p|_1, |q|_1))
+ Amount of mass to be transported (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
nb_dummies : int, optional
Number of dummy points to add (avoid instabilities in the EMD solver)
G0 : ndarray, shape (ns, nt), optional
@@ -655,7 +675,7 @@ def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
Returns
-------
- partial_gw_dist : (dim_a x dim_b) ndarray
+ partial_gw_dist : float
partial GW discrepancy
log : dict
log dictionary returned only if `log` is `True`
@@ -676,6 +696,8 @@ def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
>>> np.round(partial_gromov_wasserstein2(C1, C2, a, b, m=0.25),2)
0.0
+
+ .. _references-partial-gromov-wasserstein2:
References
----------
.. [29] Chapel, L., Alaya, M., Gasso, G. (2020). "Partial Optimal
@@ -706,30 +728,29 @@ def entropic_partial_wasserstein(a, b, M, reg, m=None, numItermax=1000,
The function considers the following problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F + \mathrm{reg} \cdot\Omega(\gamma)
- s.t. \gamma 1 \leq a \\
- \gamma^T 1 \leq b \\
- \gamma\geq 0 \\
- 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\} \\
+ s.t. \gamma \mathbf{1} &\leq \mathbf{a} \\
+ \gamma^T \mathbf{1} &\leq \mathbf{b} \\
+ \gamma &\geq 0 \\
+ \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\} \\
where :
- - M is the metric cost matrix
- - :math:`\Omega` is the entropic regularization term
- :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are the sample weights
- - m is the amount of mass to be transported
+ - :math:`\mathbf{M}` is the metric cost matrix
+ - :math:`\Omega` is the entropic regularization term, :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights
+ - `m` is the amount of mass to be transported
- The formulation of the problem has been proposed in [3]_ (prop. 5)
+ The formulation of the problem has been proposed in :ref:`[3] <references-entropic-partial-wasserstein>` (prop. 5)
Parameters
----------
a : np.ndarray (dim_a,)
- Unnormalized histogram of dimension dim_a
+ Unnormalized histogram of dimension `dim_a`
b : np.ndarray (dim_b,)
- Unnormalized histograms of dimension dim_b
+ Unnormalized histograms of dimension `dim_b`
M : np.ndarray (dim_a, dim_b)
cost matrix
reg : float
@@ -748,7 +769,7 @@ def entropic_partial_wasserstein(a, b, M, reg, m=None, numItermax=1000,
Returns
-------
- gamma : (dim_a x dim_b) ndarray
+ gamma : (dim_a, dim_b) ndarray
Optimal transportation matrix for the given parameters
log : dict
log dictionary returned only if `log` is `True`
@@ -764,6 +785,8 @@ def entropic_partial_wasserstein(a, b, M, reg, m=None, numItermax=1000,
array([[0.06, 0.02],
[0.01, 0. ]])
+
+ .. _references-entropic-partial-wasserstein:
References
----------
.. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G.
@@ -838,32 +861,34 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None,
numItermax=1000, tol=1e-7, log=False,
verbose=False):
r"""
- Returns the partial Gromov-Wasserstein transport between (C1,p) and (C2,q)
+ Returns the partial Gromov-Wasserstein transport between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
The function solves the following optimization problem:
.. math::
- GW = \arg\min_{\gamma} \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})\cdot
- \gamma_{i,j}\cdot\gamma_{k,l} + reg\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_{\gamma} \quad \sum_{i,j,k,l}
+ L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l})\cdot
+ \gamma_{i,j}\cdot\gamma_{k,l} + \mathrm{reg} \cdot\Omega(\gamma)
+
+ .. math::
+ s.t. \ \gamma &\geq 0
+
+ \gamma \mathbf{1} &\leq \mathbf{a}
- s.t.
- \gamma\geq 0 \\
- \gamma 1 \leq a\\
- \gamma^T 1 \leq b\\
- 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\}
+ \gamma^T \mathbf{1} &\leq \mathbf{b}
+
+ \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
where :
- - C1 is the metric cost matrix in the source space
- - C2 is the metric cost matrix in the target space
- - p and q are the sample weights
- - L : quadratic loss function
- - :math:`\Omega` is the entropic regularization term
- :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - m is the amount of mass to be transported
+ - :math:`\mathbf{C_1}` is the metric cost matrix in the source space
+ - :math:`\mathbf{C_2}` is the metric cost matrix in the target space
+ - :math:`\mathbf{p}` and :math:`\mathbf{q}` are the sample weights
+ - `L`: quadratic loss function
+ - :math:`\Omega` is the entropic regularization term, :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - `m` is the amount of mass to be transported
- The formulation of the GW problem has been proposed in [12]_ and the
- partial GW in [29]_.
+ The formulation of the GW problem has been proposed in :ref:`[12] <references-entropic-partial-gromov-wassertein>` and the partial GW in :ref:`[29] <references-entropic-partial-gromov-wassertein>`
Parameters
----------
@@ -878,7 +903,7 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None,
reg: float
entropic regularization parameter
m : float, optional
- Amount of mass to be transported (default: min (|p|_1, |q|_1))
+ Amount of mass to be transported (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
G0 : ndarray, shape (ns, nt), optional
Initialisation of the transportation matrix
numItermax : int, optional
@@ -913,17 +938,20 @@ def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None,
Returns
-------
- :math: `gamma` : (dim_a x dim_b) ndarray
+ :math: `gamma` : (dim_a, dim_b) ndarray
Optimal transportation matrix for the given parameters
log : dict
log dictionary returned only if `log` is `True`
+
+ .. _references-entropic-partial-gromov-wassertein:
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
- .. [29] Chapel, L., Alaya, M., Gasso, G. (2020). "Partial Optimal
+
+ .. [29] Chapel, L., Alaya, M., Gasso, G. (2020). "Partial Optimal
Transport with Applications on Positive-Unlabeled Learning".
NeurIPS.
@@ -977,33 +1005,33 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None,
numItermax=1000, tol=1e-7, log=False,
verbose=False):
r"""
- Returns the partial Gromov-Wasserstein discrepancy between (C1,p) and
- (C2,q)
+ Returns the partial Gromov-Wasserstein discrepancy between :math:`(\mathbf{C_1}, \mathbf{p})` and :math:`(\mathbf{C_2}, \mathbf{q})`
The function solves the following optimization problem:
.. math::
- GW = \arg\min_{\gamma} \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})\cdot
- \gamma_{i,j}\cdot\gamma_{k,l} + reg\cdot\Omega(\gamma)
+ GW = \min_{\gamma} \quad \sum_{i,j,k,l} L(\mathbf{C_1}_{i,k}, \mathbf{C_2}_{j,l})\cdot
+ \gamma_{i,j}\cdot\gamma_{k,l} + \mathrm{reg} \cdot\Omega(\gamma)
+
+ .. math::
+ s.t. \ \gamma &\geq 0
+
+ \gamma \mathbf{1} &\leq \mathbf{a}
+
+ \gamma^T \mathbf{1} &\leq \mathbf{b}
- s.t.
- \gamma\geq 0 \\
- \gamma 1 \leq a\\
- \gamma^T 1 \leq b\\
- 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\}
+ \mathbf{1}^T \gamma^T \mathbf{1} = m &\leq \min\{\|\mathbf{a}\|_1, \|\mathbf{b}\|_1\}
where :
- - C1 is the metric cost matrix in the source space
- - C2 is the metric cost matrix in the target space
- - p and q are the sample weights
- - L : quadratic loss function
- - :math:`\Omega` is the entropic regularization term
- :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - m is the amount of mass to be transported
+ - :math:`\mathbf{C_1}` is the metric cost matrix in the source space
+ - :math:`\mathbf{C_2}` is the metric cost matrix in the target space
+ - :math:`\mathbf{p}` and :math:`\mathbf{q}` are the sample weights
+ - `L` : quadratic loss function
+ - :math:`\Omega` is the entropic regularization term, :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - `m` is the amount of mass to be transported
- The formulation of the GW problem has been proposed in [12]_ and the
- partial GW in [29]_.
+ The formulation of the GW problem has been proposed in :ref:`[12] <references-entropic-partial-gromov-wassertein2>` and the partial GW in :ref:`[29] <references-entropic-partial-gromov-wassertein2>`
Parameters
@@ -1019,7 +1047,7 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None,
reg: float
entropic regularization parameter
m : float, optional
- Amount of mass to be transported (default: min (|p|_1, |q|_1))
+ Amount of mass to be transported (default: :math:`\min\{\|\mathbf{p}\|_1, \|\mathbf{q}\|_1\}`)
G0 : ndarray, shape (ns, nt), optional
Initialisation of the transportation matrix
numItermax : int, optional
@@ -1052,11 +1080,14 @@ def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None,
>>> np.round(entropic_partial_gromov_wasserstein2(C1, C2, a, b,50), 2)
1.87
+
+ .. _references-entropic-partial-gromov-wassertein2:
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
"Gromov-Wasserstein averaging of kernel and distance matrices."
International Conference on Machine Learning (ICML). 2016.
+
.. [29] Chapel, L., Alaya, M., Gasso, G. (2020). "Partial Optimal
Transport with Applications on Positive-Unlabeled Learning".
NeurIPS.
diff --git a/ot/plot.py b/ot/plot.py
index ad436b4..3e3bed7 100644
--- a/ot/plot.py
+++ b/ot/plot.py
@@ -18,10 +18,10 @@ from matplotlib import gridspec
def plot1D_mat(a, b, M, title=''):
- """ Plot matrix M with the source and target 1D distribution
+ """ Plot matrix :math:`\mathbf{M}` with the source and target 1D distribution
- Creates a subplot with the source distribution a on the left and
- target distribution b on the tot. The matrix M is shown in between.
+ Creates a subplot with the source distribution :math:`\mathbf{a}` on the left and
+ target distribution :math:`\mathbf{b}` on the top. The matrix :math:`\mathbf{M}` is shown in between.
Parameters
@@ -61,10 +61,10 @@ def plot1D_mat(a, b, M, title=''):
def plot2D_samples_mat(xs, xt, G, thr=1e-8, **kwargs):
- """ Plot matrix M in 2D with lines using alpha values
+ """ Plot matrix :math:`\mathbf{G}` in 2D with lines using alpha values
Plot lines between source and target 2D samples with a color
- proportional to the value of the matrix G between samples.
+ proportional to the value of the matrix :math:`\mathbf{G}` between samples.
Parameters
diff --git a/ot/sliced.py b/ot/sliced.py
index d3dc3f2..7c09111 100644
--- a/ot/sliced.py
+++ b/ot/sliced.py
@@ -17,7 +17,7 @@ from .utils import list_to_array
def get_random_projections(d, n_projections, seed=None, backend=None, type_as=None):
r"""
- Generates n_projections samples from the uniform on the unit sphere of dimension d-1: :math:`\mathcal{U}(\mathcal{S}^{d-1})`
+ Generates n_projections samples from the uniform on the unit sphere of dimension :math:`d-1`: :math:`\mathcal{U}(\mathcal{S}^{d-1})`
Parameters
----------
@@ -67,11 +67,12 @@ def sliced_wasserstein_distance(X_s, X_t, a=None, b=None, n_projections=50, p=2,
Computes a Monte-Carlo approximation of the p-Sliced Wasserstein distance
.. math::
- \mathcal{SWD}_p(\mu, \nu) = \underset{\theta \sim \mathcal{U}(\mathbb{S}^{d-1})}{\mathbb{E}}[\mathcal{W}_p^p(\theta_\# \mu, \theta_\# \nu)]^{\frac{1}{p}}
+ \mathcal{SWD}_p(\mu, \nu) = \underset{\theta \sim \mathcal{U}(\mathbb{S}^{d-1})}{\mathbb{E}}\left(\mathcal{W}_p^p(\theta_\# \mu, \theta_\# \nu)\right)^{\frac{1}{p}}
+
where :
- - :math:`\theta_\# \mu` stands for the pushforwars of the projection :math:`\mathbb{R}^d \ni X \mapsto \langle \theta, X \rangle`
+ - :math:`\theta_\# \mu` stands for the pushforwards of the projection :math:`X \in \mathbb{R}^d \mapsto \langle \theta, X \rangle`
Parameters
diff --git a/ot/smooth.py b/ot/smooth.py
index ea26bae..6855005 100644
--- a/ot/smooth.py
+++ b/ot/smooth.py
@@ -47,15 +47,24 @@ from scipy.optimize import minimize
def projection_simplex(V, z=1, axis=None):
- """ Projection of x onto the simplex, scaled by z
+ r""" Projection of :math:`\mathbf{V}` onto the simplex, scaled by `z`
- P(x; z) = argmin_{y >= 0, sum(y) = z} ||y - x||^2
+ .. math::
+ P\left(\mathbf{V}, z\right) = \mathop{\arg \min}_{\substack{\mathbf{y} >= 0 \\ \sum_i \mathbf{y}_i = z}} \quad \|\mathbf{y} - \mathbf{V}\|^2
+
+ Parameters
+ ----------
+ V: ndarray, rank 2
z: float or array
- If array, len(z) must be compatible with V
+ If array, len(z) must be compatible with :math:`\mathbf{V}`
axis: None or int
- - axis=None: project V by P(V.ravel(); z)
- - axis=1: project each V[i] by P(V[i]; z[i])
- - axis=0: project each V[:, j] by P(V[:, j]; z[j])
+ - axis=None: project :math:`\mathbf{V}` by :math:`P(\mathbf{V}.\mathrm{ravel}(), z)`
+ - axis=1: project each :math:`\mathbf{V}_i` by :math:`P(\mathbf{V}_i, z_i)`
+ - axis=0: project each :math:`\mathbf{V}_{:, j}` by :math:`P(\mathbf{V}_{:, j}, z_j)`
+
+ Returns
+ -------
+ projection: ndarray, shape :math:`\mathbf{V}`.shape
"""
if axis == 1:
n_features = V.shape[1]
@@ -77,12 +86,12 @@ def projection_simplex(V, z=1, axis=None):
class Regularization(object):
- """Base class for Regularization objects
+ r"""Base class for Regularization objects
Notes
-----
- This class is not intended for direct use but as aparent for true
- regularizatiojn implementation.
+ This class is not intended for direct use but as apparent for true
+ regularization implementation.
"""
def __init__(self, gamma=1.0):
@@ -98,40 +107,48 @@ class Regularization(object):
self.gamma = gamma
def delta_Omega(X):
- """
- Compute delta_Omega(X[:, j]) for each X[:, j].
- delta_Omega(x) = sup_{y >= 0} y^T x - Omega(y).
+ r"""
+ Compute :math:`\delta_\Omega(\mathbf{X}_{:, j})` for each :math:`\mathbf{X}_{:, j}`.
+
+ .. math::
+ \delta_\Omega(\mathbf{x}) = \sup_{\mathbf{y} >= 0} \
+ \mathbf{y}^T \mathbf{x} - \Omega(\mathbf{y})
Parameters
----------
- X: array, shape = len(a) x len(b)
+ X: array, shape = (len(a), len(b))
Input array.
Returns
-------
- v: array, len(b)
- Values: v[j] = delta_Omega(X[:, j])
- G: array, len(a) x len(b)
- Gradients: G[:, j] = nabla delta_Omega(X[:, j])
+ v: array, (len(b), )
+ Values: :math:`\mathbf{v}_j = \delta_\Omega(\mathbf{X}_{:, j})`
+ G: array, (len(a), len(b))
+ Gradients: :math:`\mathbf{G}_{:, j} = \nabla \delta_\Omega(\mathbf{X}_{:, j})`
"""
raise NotImplementedError
def max_Omega(X, b):
- """
- Compute max_Omega_j(X[:, j]) for each X[:, j].
- max_Omega_j(x) = sup_{y >= 0, sum(y) = 1} y^T x - Omega(b[j] y) / b[j].
+ r"""
+ Compute :math:`\mathrm{max}_{\Omega, j}(\mathbf{X}_{:, j})` for each :math:`\mathbf{X}_{:, j}`.
+
+ .. math::
+ \mathrm{max}_{\Omega, j}(\mathbf{x}) =
+ \sup_{\substack{\mathbf{y} >= 0 \ \sum_i \mathbf{y}_i = 1}}
+ \mathbf{y}^T \mathbf{x} - \frac{1}{\mathbf{b}_j} \Omega(\mathbf{b}_j \mathbf{y})
Parameters
----------
- X: array, shape = len(a) x len(b)
+ X: array, shape = (len(a), len(b))
Input array.
+ b: array, shape = (len(b), )
Returns
-------
- v: array, len(b)
- Values: v[j] = max_Omega_j(X[:, j])
- G: array, len(a) x len(b)
- Gradients: G[:, j] = nabla max_Omega_j(X[:, j])
+ v: array, (len(b), )
+ Values: :math:`\mathbf{v}_j = \mathrm{max}_{\Omega, j}(\mathbf{X}_{:, j})`
+ G: array, (len(a), len(b))
+ Gradients: :math:`\mathbf{G}_{:, j} = \nabla \mathrm{max}_{\Omega, j}(\mathbf{X}_{:, j})`
"""
raise NotImplementedError
@@ -192,7 +209,7 @@ class SquaredL2(Regularization):
def dual_obj_grad(alpha, beta, a, b, C, regul):
- """
+ r"""
Compute objective value and gradients of dual objective.
Parameters
@@ -203,19 +220,19 @@ def dual_obj_grad(alpha, beta, a, b, C, regul):
a: array, shape = len(a)
b: array, shape = len(b)
Input histograms (should be non-negative and sum to 1).
- C: array, shape = len(a) x len(b)
+ C: array, shape = (len(a), len(b))
Ground cost matrix.
regul: Regularization object
- Should implement a delta_Omega(X) method.
+ Should implement a `delta_Omega(X)` method.
Returns
-------
obj: float
Objective value (higher is better).
grad_alpha: array, shape = len(a)
- Gradient w.r.t. alpha.
+ Gradient w.r.t. `alpha`.
grad_beta: array, shape = len(b)
- Gradient w.r.t. beta.
+ Gradient w.r.t. `beta`.
"""
obj = np.dot(alpha, a) + np.dot(beta, b)
grad_alpha = a.copy()
@@ -242,13 +259,13 @@ def solve_dual(a, b, C, regul, method="L-BFGS-B", tol=1e-3, max_iter=500,
Parameters
----------
- a: array, shape = len(a)
- b: array, shape = len(b)
+ a: array, shape = (len(a), )
+ b: array, shape = (len(b), )
Input histograms (should be non-negative and sum to 1).
- C: array, shape = len(a) x len(b)
+ C: array, shape = (len(a), len(b))
Ground cost matrix.
regul: Regularization object
- Should implement a delta_Omega(X) method.
+ Should implement a `delta_Omega(X)` method.
method: str
Solver to be used (passed to `scipy.optimize.minimize`).
tol: float
@@ -258,8 +275,8 @@ def solve_dual(a, b, C, regul, method="L-BFGS-B", tol=1e-3, max_iter=500,
Returns
-------
- alpha: array, shape = len(a)
- beta: array, shape = len(b)
+ alpha: array, shape = (len(a), )
+ beta: array, shape = (len(b), )
Dual potentials.
"""
@@ -302,10 +319,10 @@ def semi_dual_obj_grad(alpha, a, b, C, regul):
a: array, shape = len(a)
b: array, shape = len(b)
Input histograms (should be non-negative and sum to 1).
- C: array, shape = len(a) x len(b)
+ C: array, shape = (len(a), len(b))
Ground cost matrix.
regul: Regularization object
- Should implement a max_Omega(X) method.
+ Should implement a `max_Omega(X)` method.
Returns
-------
@@ -337,13 +354,13 @@ def solve_semi_dual(a, b, C, regul, method="L-BFGS-B", tol=1e-3, max_iter=500,
Parameters
----------
- a: array, shape = len(a)
- b: array, shape = len(b)
+ a: array, shape = (len(a), )
+ b: array, shape = (len(b), )
Input histograms (should be non-negative and sum to 1).
- C: array, shape = len(a) x len(b)
+ C: array, shape = (len(a), len(b))
Ground cost matrix.
regul: Regularization object
- Should implement a max_Omega(X) method.
+ Should implement a `max_Omega(X)` method.
method: str
Solver to be used (passed to `scipy.optimize.minimize`).
tol: float
@@ -353,7 +370,7 @@ def solve_semi_dual(a, b, C, regul, method="L-BFGS-B", tol=1e-3, max_iter=500,
Returns
-------
- alpha: array, shape = len(a)
+ alpha: array, shape = (len(a), )
Semi-dual potentials.
"""
@@ -371,7 +388,7 @@ def solve_semi_dual(a, b, C, regul, method="L-BFGS-B", tol=1e-3, max_iter=500,
def get_plan_from_dual(alpha, beta, C, regul):
- """
+ r"""
Retrieve optimal transportation plan from optimal dual potentials.
Parameters
@@ -379,14 +396,14 @@ def get_plan_from_dual(alpha, beta, C, regul):
alpha: array, shape = len(a)
beta: array, shape = len(b)
Optimal dual potentials.
- C: array, shape = len(a) x len(b)
+ C: array, shape = (len(a), len(b))
Ground cost matrix.
regul: Regularization object
- Should implement a delta_Omega(X) method.
+ Should implement a `delta_Omega(X)` method.
Returns
-------
- T: array, shape = len(a) x len(b)
+ T: array, shape = (len(a), len(b))
Optimal transportation plan.
"""
X = alpha[:, np.newaxis] + beta - C
@@ -394,7 +411,7 @@ def get_plan_from_dual(alpha, beta, C, regul):
def get_plan_from_semi_dual(alpha, b, C, regul):
- """
+ r"""
Retrieve optimal transportation plan from optimal semi-dual potentials.
Parameters
@@ -403,14 +420,14 @@ def get_plan_from_semi_dual(alpha, b, C, regul):
Optimal semi-dual potentials.
b: array, shape = len(b)
Second input histogram (should be non-negative and sum to 1).
- C: array, shape = len(a) x len(b)
+ C: array, shape = (len(a), len(b))
Ground cost matrix.
regul: Regularization object
- Should implement a delta_Omega(X) method.
+ Should implement a `delta_Omega(X)` method.
Returns
-------
- T: array, shape = len(a) x len(b)
+ T: array, shape = (len(a), len(b))
Optimal transportation plan.
"""
X = alpha[:, np.newaxis] - C
@@ -422,19 +439,21 @@ def smooth_ot_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=1e-9,
r"""
Solve the regularized OT problem in the dual and return the OT matrix
- The function solves the smooth relaxed dual formulation (7) in [17]_ :
+ The function solves the smooth relaxed dual formulation (7) in
+ :ref:`[17] <references-smooth-ot-dual>`:
.. math::
- \max_{\alpha,\beta}\quad a^T\alpha+b^T\beta-\sum_j\delta_\Omega(\alpha+\beta_j-\mathbf{m}_j)
+ \max_{\alpha,\beta}\quad \mathbf{a}^T\alpha + \mathbf{b}^T\beta -
+ \sum_j \delta_\Omega \left(\alpha+\beta_j-\mathbf{m}_j \right)
where :
- - :math:`\mathbf{m}_j` is the jth column of the cost matrix
+ - :math:`\mathbf{m}_j` is the j-th column of the cost matrix
- :math:`\delta_\Omega` is the convex conjugate of the regularization term :math:`\Omega`
- - a and b are source and target weights (sum to 1)
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
The OT matrix can is reconstructed from the gradient of :math:`\delta_\Omega`
- (See [17]_ Proposition 1).
+ (See :ref:`[17] <references-smooth-ot-dual>` Proposition 1).
The optimization algorithm is using gradient decent (L-BFGS by default).
@@ -444,15 +463,19 @@ def smooth_ot_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=1e-9,
samples weights in the source domain
b : np.ndarray (nt,) or np.ndarray (nt,nbb)
samples in the target domain, compute sinkhorn with multiple targets
- and fixed M if b is a matrix (return OT loss + dual variables in log)
+ and fixed :math:`\mathbf{M}` if :math:`\mathbf{b}` is a matrix
+ (return OT loss + dual variables in log)
M : np.ndarray (ns,nt)
loss matrix
reg : float
Regularization term >0
reg_type : str
- Regularization type, can be the following (default ='l2'):
- - 'kl' : Kullback Leibler (~ Neg-entropy used in sinkhorn [2]_)
- - 'l2' : Squared Euclidean regularization
+ Regularization type, can be the following (default ='l2'):
+
+ - 'kl' : Kullback Leibler (~ Neg-entropy used in sinkhorn
+ :ref:`[2] <references-smooth-ot-dual>`)
+
+ - 'l2' : Squared Euclidean regularization
method : str
Solver to use for scipy.optimize.minimize
numItermax : int, optional
@@ -467,15 +490,15 @@ def smooth_ot_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=1e-9,
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : (ns, nt) ndarray
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
+ .. _references-smooth-ot-dual:
References
----------
-
.. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
.. [17] Blondel, M., Seguy, V., & Rolet, A. (2018). Smooth and Sparse Optimal Transport. Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS).
@@ -514,21 +537,23 @@ def smooth_ot_semi_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=
r"""
Solve the regularized OT problem in the semi-dual and return the OT matrix
- The function solves the smooth relaxed dual formulation (10) in [17]_ :
+ The function solves the smooth relaxed dual formulation (10) in
+ :ref:`[17] <references-smooth-ot-semi-dual>`:
.. math::
- \max_{\alpha}\quad a^T\alpha-OT_\Omega^*(\alpha,b)
+ \max_{\alpha}\quad \mathbf{a}^T\alpha- \mathrm{OT}_\Omega^*(\alpha, \mathbf{b})
where :
.. math::
- OT_\Omega^*(\alpha,b)=\sum_j b_j
+ \mathrm{OT}_\Omega^*(\alpha,b)=\sum_j \mathbf{b}_j
- - :math:`\mathbf{m}_j` is the jth column of the cost matrix
- - :math:`OT_\Omega^*(\alpha,b)` is defined in Eq. (9) in [17]
- - a and b are source and target weights (sum to 1)
+ - :math:`\mathbf{m}_j` is the j-th column of the cost matrix
+ - :math:`\mathrm{OT}_\Omega^*(\alpha,b)` is defined in Eq. (9) in
+ :ref:`[17] <references-smooth-ot-semi-dual>`
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
- The OT matrix can is reconstructed using [17]_ Proposition 2.
+ The OT matrix can is reconstructed using :ref:`[17] <references-smooth-ot-semi-dual>` Proposition 2.
The optimization algorithm is using gradient decent (L-BFGS by default).
@@ -538,15 +563,19 @@ def smooth_ot_semi_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=
samples weights in the source domain
b : np.ndarray (nt,) or np.ndarray (nt,nbb)
samples in the target domain, compute sinkhorn with multiple targets
- and fixed M if b is a matrix (return OT loss + dual variables in log)
+ and fixed:math:`\mathbf{M}` if :math:`\mathbf{b}` is a matrix
+ (return OT loss + dual variables in log)
M : np.ndarray (ns,nt)
loss matrix
reg : float
Regularization term >0
reg_type : str
- Regularization type, can be the following (default ='l2'):
- - 'kl' : Kullback Leibler (~ Neg-entropy used in sinkhorn [2]_)
- - 'l2' : Squared Euclidean regularization
+ Regularization type, can be the following (default ='l2'):
+
+ - 'kl' : Kullback Leibler (~ Neg-entropy used in sinkhorn
+ :ref:`[2] <references-smooth-ot-semi-dual>`)
+
+ - 'l2' : Squared Euclidean regularization
method : str
Solver to use for scipy.optimize.minimize
numItermax : int, optional
@@ -561,15 +590,15 @@ def smooth_ot_semi_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : (ns, nt) ndarray
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
+ .. _references-smooth-ot-semi-dual:
References
----------
-
.. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
.. [17] Blondel, M., Seguy, V., & Rolet, A. (2018). Smooth and Sparse Optimal Transport. Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS).
diff --git a/ot/stochastic.py b/ot/stochastic.py
index 13ed9cc..693675f 100644
--- a/ot/stochastic.py
+++ b/ot/stochastic.py
@@ -18,22 +18,25 @@ import numpy as np
def coordinate_grad_semi_dual(b, M, reg, beta, i):
r'''
- Compute the coordinate gradient update for regularized discrete distributions for (i, :)
+ Compute the coordinate gradient update for regularized discrete distributions for :math:`(i, :)`
The function computes the gradient of the semi dual problem:
.. math::
- \max_v \sum_i (\sum_j v_j * b_j - reg * log(\sum_j exp((v_j - M_{i,j})/reg) * b_j)) * a_i
+ \max_\mathbf{v} \ \sum_i \mathbf{a}_i \left[ \sum_j \mathbf{v}_j \mathbf{b}_j - \mathrm{reg}
+ \cdot \log \left( \sum_j \mathbf{b}_j
+ \exp \left( \frac{\mathbf{v}_j - \mathbf{M}_{i,j}}{\mathrm{reg}}
+ \right) \right) \right]
Where :
- - M is the (ns,nt) metric cost matrix
- - v is a dual variable in R^J
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
+ - :math:`\mathbf{v}` is a dual variable in :math:`\mathbb{R}^{nt}`
- reg is the regularization term
- - a and b are source and target weights (sum to 1)
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
The algorithm used for solving the problem is the ASGD & SAG algorithms
- as proposed in [18]_ [alg.1 & alg.2]
+ as proposed in :ref:`[18] <references-coordinate-grad-semi-dual>` [alg.1 & alg.2]
Parameters
@@ -47,7 +50,7 @@ def coordinate_grad_semi_dual(b, M, reg, beta, i):
v : ndarray, shape (nt,)
Dual variable.
i : int
- Picked number i.
+ Picked number `i`.
Returns
-------
@@ -74,12 +77,10 @@ def coordinate_grad_semi_dual(b, M, reg, beta, i):
[4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
+ .. _references-coordinate-grad-semi-dual:
References
----------
- [Genevay et al., 2016] :
- Stochastic Optimization for Large-scale Optimal Transport,
- Advances in Neural Information Processing Systems (2016),
- arXiv preprint arxiv:1605.08527.
+ .. [18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) Stochastic Optimization for Large-scale Optimal Transport. Advances in Neural Information Processing Systems (2016).
'''
r = M[i, :] - beta
exp_beta = np.exp(-r / reg) * b
@@ -88,29 +89,29 @@ def coordinate_grad_semi_dual(b, M, reg, beta, i):
def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
- r'''
- Compute the SAG algorithm to solve the regularized discrete measures
- optimal transport max problem
+ r"""
+ Compute the SAG algorithm to solve the regularized discrete measures optimal transport max problem
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg} \cdot\Omega(\gamma)
- s.t. \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} = \mathbf{a}
- \gamma^T 1 = b
+ \gamma^T \mathbf{1} = \mathbf{b}
\gamma \geq 0
Where :
- - M is the (ns,nt) metric cost matrix
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
- :math:`\Omega` is the entropic regularization term with :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target weights (sum to 1)
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
The algorithm used for solving the problem is the SAG algorithm
- as proposed in [18]_ [alg.1]
+ as proposed in :ref:`[18] <references-sag-entropic-transport>` [alg.1]
Parameters
@@ -131,7 +132,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
Returns
-------
- v : ndarray, shape (nt,)
+ v : ndarray, shape (`nt`,)
Dual variable.
Examples
@@ -154,14 +155,12 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
[2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01],
[4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
+
+ .. _references-sag-entropic-transport:
References
----------
-
- [Genevay et al., 2016] :
- Stochastic Optimization for Large-scale Optimal Transport,
- Advances in Neural Information Processing Systems (2016),
- arXiv preprint arxiv:1605.08527.
- '''
+ .. [18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) Stochastic Optimization for Large-scale Optimal Transport. Advances in Neural Information Processing Systems (2016).
+ """
if lr is None:
lr = 1. / max(a / reg)
@@ -187,22 +186,23 @@ def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg}\cdot\Omega(\gamma)
- s.t. \gamma 1 = a
+ s.t. \gamma \mathbf{1} = \mathbf{a}
- \gamma^T 1= b
+ \gamma^T \mathbf{1} = \mathbf{b}
\gamma \geq 0
Where :
- - M is the (ns,nt) metric cost matrix
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
- :math:`\Omega` is the entropic regularization term with :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target weights (sum to 1)
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
The algorithm used for solving the problem is the ASGD algorithm
- as proposed in [18]_ [alg.2]
+ as proposed in :ref:`[18] <references-averaged-sgd-entropic-transport>` [alg.2]
Parameters
@@ -220,7 +220,7 @@ def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
Returns
-------
- ave_v : ndarray, shape (nt,)
+ ave_v : ndarray, shape (`nt`,)
dual variable
Examples
@@ -243,13 +243,11 @@ def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
[2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01],
[4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
+
+ .. _references-averaged-sgd-entropic-transport:
References
----------
-
- [Genevay et al., 2016] :
- Stochastic Optimization for Large-scale Optimal Transport,
- Advances in Neural Information Processing Systems (2016),
- arXiv preprint arxiv:1605.08527.
+ .. [18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) Stochastic Optimization for Large-scale Optimal Transport. Advances in Neural Information Processing Systems (2016).
'''
if lr is None:
@@ -271,20 +269,21 @@ def c_transform_entropic(b, M, reg, beta):
r'''
The goal is to recover u from the c-transform.
- The function computes the c_transform of a dual variable from the other
+ The function computes the c-transform of a dual variable from the other
dual variable:
.. math::
- u = v^{c,reg} = -reg \sum_j exp((v - M)/reg) b_j
+ \mathbf{u} = \mathbf{v}^{c,reg} = - \mathrm{reg} \sum_j \mathbf{b}_j
+ \exp\left( \frac{\mathbf{v} - \mathbf{M}}{\mathrm{reg}} \right)
Where :
- - M is the (ns,nt) metric cost matrix
- - u, v are dual variables in R^IxR^J
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
+ - :math:`\mathbf{u}`, :math:`\mathbf{v}` are dual variables in :math:`\mathbb{R}^{ns} \times \mathbb{R}^{nt}`
- reg is the regularization term
It is used to recover an optimal u from optimal v solving the semi dual
- problem, see Proposition 2.1 of [18]_
+ problem, see Proposition 2.1 of :ref:`[18] <references-c-transform-entropic>`
Parameters
@@ -300,7 +299,7 @@ def c_transform_entropic(b, M, reg, beta):
Returns
-------
- u : ndarray, shape (ns,)
+ u : ndarray, shape (`ns`,)
Dual variable.
Examples
@@ -323,13 +322,11 @@ def c_transform_entropic(b, M, reg, beta):
[2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01],
[4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
+
+ .. _references-c-transform-entropic:
References
----------
-
- [Genevay et al., 2016] :
- Stochastic Optimization for Large-scale Optimal Transport,
- Advances in Neural Information Processing Systems (2016),
- arXiv preprint arxiv:1605.08527.
+ .. [18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) Stochastic Optimization for Large-scale Optimal Transport. Advances in Neural Information Processing Systems (2016).
'''
n_source = np.shape(M)[0]
@@ -345,27 +342,28 @@ def c_transform_entropic(b, M, reg, beta):
def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
log=False):
r'''
- Compute the transportation matrix to solve the regularized discrete
- measures optimal transport max problem
+ Compute the transportation matrix to solve the regularized discrete measures optimal transport max problem
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg} \cdot\Omega(\gamma)
- s.t. \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} = \mathbf{a}
- \gamma^T 1= b
+ \gamma^T \mathbf{1} = \mathbf{b}
\gamma \geq 0
Where :
- - M is the (ns,nt) metric cost matrix
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
- :math:`\Omega` is the entropic regularization term with :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target weights (sum to 1)
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
+
The algorithm used for solving the problem is the SAG or ASGD algorithms
- as proposed in [18]_
+ as proposed in :ref:`[18] <references-solve-semi-dual-entropic>`
Parameters
@@ -419,13 +417,11 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
[2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01],
[4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
+
+ .. _references-solve-semi-dual-entropic:
References
----------
-
- [Genevay et al., 2016] :
- Stochastic Optimization for Large-scale Optimal Transport,
- Advances in Neural Information Processing Systems (2016),
- arXiv preprint arxiv:1605.08527.
+ .. [18] Genevay, A., Cuturi, M., Peyré, G. & Bach, F. (2016) Stochastic Optimization for Large-scale Optimal Transport. Advances in Neural Information Processing Systems (2016).
'''
if method.lower() == "sag":
@@ -459,26 +455,30 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
r'''
Computes the partial gradient of the dual optimal transport problem.
- For each (i,j) in a batch of coordinates, the partial gradients are :
+ For each :math:`(i,j)` in a batch of coordinates, the partial gradients are :
.. math::
- \partial_{u_i} F = u_i * b_s/l_{v} - \sum_{j \in B_v} exp((u_i + v_j - M_{i,j})/reg) * a_i * b_j
+ \partial_{\mathbf{u}_i} F = \frac{b_s}{l_v} \mathbf{u}_i -
+ \sum_{j \in B_v} \mathbf{a}_i \mathbf{b}_j
+ \exp\left( \frac{\mathbf{u}_i + \mathbf{v}_j - \mathbf{M}_{i,j}}{\mathrm{reg}} \right)
- \partial_{v_j} F = v_j * b_s/l_{u} - \sum_{i \in B_u} exp((u_i + v_j - M_{i,j})/reg) * a_i * b_j
+ \partial_{\mathbf{v}_j} F = \frac{b_s}{l_u} \mathbf{v}_j -
+ \sum_{i \in B_u} \mathbf{a}_i \mathbf{b}_j
+ \exp\left( \frac{\mathbf{u}_i + \mathbf{v}_j - \mathbf{M}_{i,j}}{\mathrm{reg}} \right)
Where :
- - M is the (ns,nt) metric cost matrix
- - u, v are dual variables in R^ixR^J
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
+ - :math:`\mathbf{u}`, :math:`\mathbf{v}` are dual variables in :math:`\mathbb{R}^{ns} \times \mathbb{R}^{nt}`
- reg is the regularization term
- :math:`B_u` and :math:`B_v` are lists of index
- - :math:`b_s` is the size of the batchs :math:`B_u` and :math:`B_v`
- - :math:`l_u` and :math:`l_v` are the lenghts of :math:`B_u` and :math:`B_v`
- - a and b are source and target weights (sum to 1)
+ - :math:`b_s` is the size of the batches :math:`B_u` and :math:`B_v`
+ - :math:`l_u` and :math:`l_v` are the lengths of :math:`B_u` and :math:`B_v`
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
The algorithm used for solving the dual problem is the SGD algorithm
- as proposed in [19]_ [alg.1]
+ as proposed in :ref:`[19] <references-batch-grad-dual>` [alg.1]
Parameters
@@ -504,7 +504,7 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
Returns
-------
- grad : ndarray, shape (ns,)
+ grad : ndarray, shape (`ns`,)
partial grad F
Examples
@@ -533,12 +533,11 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
[5.06266486e-02, 2.16230494e-03, 2.26215141e-03, 6.81514609e-04],
[6.06713990e-02, 3.98139808e-02, 5.46829338e-02, 8.62371424e-06]])
+
+ .. _references-batch-grad-dual:
References
----------
-
- [Seguy et al., 2018] :
- International Conference on Learning Representation (2018),
- arXiv preprint arxiv:1711.02283.
+ .. [19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A.& Blondel, M. Large-scale Optimal Transport and Mapping Estimation. International Conference on Learning Representation (2018)
'''
G = - (np.exp((alpha[batch_alpha, None] + beta[None, batch_beta] -
M[batch_alpha, :][:, batch_beta]) / reg) *
@@ -555,25 +554,25 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
r'''
- Compute the sgd algorithm to solve the regularized discrete measures
- optimal transport dual problem
+ Compute the sgd algorithm to solve the regularized discrete measures optimal transport dual problem
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg} \cdot\Omega(\gamma)
- s.t. \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} = \mathbf{a}
- \gamma^T 1= b
+ \gamma^T \mathbf{1} = \mathbf{b}
\gamma \geq 0
Where :
- - M is the (ns,nt) metric cost matrix
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
- :math:`\Omega` is the entropic regularization term with :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target weights (sum to 1)
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
Parameters
----------
@@ -632,9 +631,7 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
References
----------
- [Seguy et al., 2018] :
- International Conference on Learning Representation (2018),
- arXiv preprint arxiv:1711.02283.
+ .. [19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A.& Blondel, M. Large-scale Optimal Transport and Mapping Estimation. International Conference on Learning Representation (2018)
'''
n_source = np.shape(M)[0]
@@ -657,25 +654,25 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
log=False):
r'''
- Compute the transportation matrix to solve the regularized discrete measures
- optimal transport dual problem
+ Compute the transportation matrix to solve the regularized discrete measures optimal transport dual problem
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+ \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F +
+ \mathrm{reg} \cdot\Omega(\gamma)
- s.t. \gamma 1 = a
+ s.t. \ \gamma \mathbf{1} = \mathbf{a}
- \gamma^T 1= b
+ \gamma^T \mathbf{1} = \mathbf{b}
\gamma \geq 0
Where :
- - M is the (ns,nt) metric cost matrix
- - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target weights (sum to 1)
+ - :math:`\mathbf{M}` is the (`ns`, `nt`) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term with :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target weights (sum to 1)
Parameters
----------
@@ -736,10 +733,7 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
References
----------
-
- [Seguy et al., 2018] :
- International Conference on Learning Representation (2018),
- arXiv preprint arxiv:1711.02283.
+ .. [19] Seguy, V., Bhushan Damodaran, B., Flamary, R., Courty, N., Rolet, A.& Blondel, M. Large-scale Optimal Transport and Mapping Estimation. International Conference on Learning Representation (2018)
'''
opt_alpha, opt_beta = sgd_entropic_regularization(a, b, M, reg, batch_size,
diff --git a/ot/unbalanced.py b/ot/unbalanced.py
index 6a61aa1..15e180b 100644
--- a/ot/unbalanced.py
+++ b/ot/unbalanced.py
@@ -23,29 +23,31 @@ def sinkhorn_unbalanced(a, b, M, reg, reg_m, method='sinkhorn', numItermax=1000,
The function solves the following optimization problem:
.. math::
- W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + reg_m KL(\gamma 1, a) + reg_m KL(\gamma^T 1, b)
+ W = \min_\gamma \ \langle \gamma, \mathbf{M} \rangle_F + \mathrm{reg}\cdot\Omega(\gamma) +
+ \mathrm{reg_m} \cdot \mathrm{KL}(\gamma \mathbf{1}, \mathbf{a}) +
+ \mathrm{reg_m} \cdot \mathrm{KL}(\gamma^T \mathbf{1}, \mathbf{b})
s.t.
- \gamma\geq 0
+ \gamma \geq 0
+
where :
- - M is the (dim_a, dim_b) metric cost matrix
- - :math:`\Omega` is the entropic regularization
- term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target unbalanced distributions
+ - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term, :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target unbalanced distributions
- KL is the Kullback-Leibler divergence
The algorithm used for solving the problem is the generalized
- Sinkhorn-Knopp matrix scaling algorithm as proposed in [10, 23]_
+ Sinkhorn-Knopp matrix scaling algorithm as proposed in :ref:`[10, 25] <references-sinkhorn-unbalanced>`
Parameters
----------
a : np.ndarray (dim_a,)
- Unnormalized histogram of dimension dim_a
+ Unnormalized histogram of dimension `dim_a`
b : np.ndarray (dim_b,) or np.ndarray (dim_b, n_hists)
- One or multiple unnormalized histograms of dimension dim_b
- If many, compute all the OT distances (a, b_i)
+ One or multiple unnormalized histograms of dimension `dim_b`.
+ If many, compute all the OT distances :math:`(\mathbf{a}, \mathbf{b}_i)_i`
M : np.ndarray (dim_a, dim_b)
loss matrix
reg : float
@@ -68,14 +70,14 @@ def sinkhorn_unbalanced(a, b, M, reg, reg_m, method='sinkhorn', numItermax=1000,
Returns
-------
if n_hists == 1:
- gamma : (dim_a x dim_b) ndarray
+ - gamma : (dim_a, dim_b) ndarray
Optimal transportation matrix for the given parameters
- log : dict
+ - log : dict
log dictionary returned only if `log` is `True`
else:
- ot_distance : (n_hists,) ndarray
- the OT distance between `a` and each of the histograms `b_i`
- log : dict
+ - ot_distance : (n_hists,) ndarray
+ the OT distance between :math:`\mathbf{a}` and each of the histograms :math:`\mathbf{b}_i`
+ - log : dict
log dictionary returned only if `log` is `True`
Examples
@@ -90,9 +92,9 @@ def sinkhorn_unbalanced(a, b, M, reg, reg_m, method='sinkhorn', numItermax=1000,
[0.18807035, 0.51122823]])
+ .. _references-sinkhorn-unbalanced:
References
----------
-
.. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal
Transport, Advances in Neural Information Processing Systems
(NIPS) 26, 2013
@@ -111,11 +113,11 @@ def sinkhorn_unbalanced(a, b, M, reg, reg_m, method='sinkhorn', numItermax=1000,
See Also
--------
- ot.unbalanced.sinkhorn_knopp_unbalanced : Unbalanced Classic Sinkhorn [10]
+ ot.unbalanced.sinkhorn_knopp_unbalanced : Unbalanced Classic Sinkhorn :ref:`[10] <references-sinkhorn-unbalanced>`
ot.unbalanced.sinkhorn_stabilized_unbalanced:
- Unbalanced Stabilized sinkhorn [9][10]
+ Unbalanced Stabilized sinkhorn :ref:`[9, 10] <references-sinkhorn-unbalanced>`
ot.unbalanced.sinkhorn_reg_scaling_unbalanced:
- Unbalanced Sinkhorn with epslilon scaling [9][10]
+ Unbalanced Sinkhorn with epslilon scaling :ref:`[9, 10] <references-sinkhorn-unbalanced>`
"""
@@ -151,29 +153,30 @@ def sinkhorn_unbalanced2(a, b, M, reg, reg_m, method='sinkhorn',
The function solves the following optimization problem:
.. math::
- W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + reg_m KL(\gamma 1, a) + reg_m KL(\gamma^T 1, b)
+ W = \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F + \mathrm{reg}\cdot\Omega(\gamma) +
+ \mathrm{reg_m} \cdot \mathrm{KL}(\gamma \mathbf{1}, \mathbf{a}) +
+ \mathrm{reg_m} \cdot \mathrm{KL}(\gamma^T \mathbf{1}, \mathbf{b})
s.t.
\gamma\geq 0
where :
- - M is the (dim_a, dim_b) metric cost matrix
- - :math:`\Omega` is the entropic regularization term
- :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target unbalanced distributions
+ - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term, :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target unbalanced distributions
- KL is the Kullback-Leibler divergence
The algorithm used for solving the problem is the generalized
- Sinkhorn-Knopp matrix scaling algorithm as proposed in [10, 23]_
+ Sinkhorn-Knopp matrix scaling algorithm as proposed in :ref:`[10, 25] <references-sinkhorn-unbalanced2>`
Parameters
----------
a : np.ndarray (dim_a,)
- Unnormalized histogram of dimension dim_a
+ Unnormalized histogram of dimension `dim_a`
b : np.ndarray (dim_b,) or np.ndarray (dim_b, n_hists)
- One or multiple unnormalized histograms of dimension dim_b
- If many, compute all the OT distances (a, b_i)
+ One or multiple unnormalized histograms of dimension `dim_b`.
+ If many, compute all the OT distances :math:`(\mathbf{a}, \mathbf{b}_i)_i`
M : np.ndarray (dim_a, dim_b)
loss matrix
reg : float
@@ -196,7 +199,7 @@ def sinkhorn_unbalanced2(a, b, M, reg, reg_m, method='sinkhorn',
Returns
-------
ot_distance : (n_hists,) ndarray
- the OT distance between `a` and each of the histograms `b_i`
+ the OT distance between :math:`\mathbf{a}` and each of the histograms :math:`\mathbf{b}_i`
log : dict
log dictionary returned only if `log` is `True`
@@ -211,10 +214,9 @@ def sinkhorn_unbalanced2(a, b, M, reg, reg_m, method='sinkhorn',
array([0.31912866])
-
+ .. _references-sinkhorn-unbalanced2:
References
----------
-
.. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal
Transport, Advances in Neural Information Processing Systems
(NIPS) 26, 2013
@@ -232,9 +234,9 @@ def sinkhorn_unbalanced2(a, b, M, reg, reg_m, method='sinkhorn',
See Also
--------
- ot.unbalanced.sinkhorn_knopp : Unbalanced Classic Sinkhorn [10]
- ot.unbalanced.sinkhorn_stabilized: Unbalanced Stabilized sinkhorn [9][10]
- ot.unbalanced.sinkhorn_reg_scaling: Unbalanced Sinkhorn with epslilon scaling [9][10]
+ ot.unbalanced.sinkhorn_knopp : Unbalanced Classic Sinkhorn :ref:`[10] <references-sinkhorn-unbalanced2>`
+ ot.unbalanced.sinkhorn_stabilized: Unbalanced Stabilized sinkhorn :ref:`[9, 10] <references-sinkhorn-unbalanced2>`
+ ot.unbalanced.sinkhorn_reg_scaling: Unbalanced Sinkhorn with epslilon scaling :ref:`[9, 10] <references-sinkhorn-unbalanced2>`
"""
b = np.asarray(b, dtype=np.float64)
@@ -270,26 +272,29 @@ def sinkhorn_knopp_unbalanced(a, b, M, reg, reg_m, numItermax=1000,
The function solves the following optimization problem:
.. math::
- W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + \reg_m KL(\gamma 1, a) + \reg_m KL(\gamma^T 1, b)
+ W = \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F + \mathrm{reg}\cdot\Omega(\gamma) +
+ \mathrm{reg_m} \cdot \mathrm{KL}(\gamma \mathbf{1}, \mathbf{a}) +
+ \mathrm{reg_m} \cdot \mathrm{KL}(\gamma^T \mathbf{1}, \mathbf{b})
s.t.
- \gamma\geq 0
+ \gamma \geq 0
+
where :
- - M is the (dim_a, dim_b) metric cost matrix
- - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target unbalanced distributions
+ - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term, :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target unbalanced distributions
- KL is the Kullback-Leibler divergence
- The algorithm used for solving the problem is the generalized Sinkhorn-Knopp matrix scaling algorithm as proposed in [10, 23]_
+ The algorithm used for solving the problem is the generalized Sinkhorn-Knopp matrix scaling algorithm as proposed in :ref:`[10, 25] <references-sinkhorn-knopp-unbalanced>`
Parameters
----------
a : np.ndarray (dim_a,)
- Unnormalized histogram of dimension dim_a
+ Unnormalized histogram of dimension `dim_a`
b : np.ndarray (dim_b,) or np.ndarray (dim_b, n_hists)
- One or multiple unnormalized histograms of dimension dim_b
+ One or multiple unnormalized histograms of dimension `dim_b`
If many, compute all the OT distances (a, b_i)
M : np.ndarray (dim_a, dim_b)
loss matrix
@@ -310,15 +315,16 @@ def sinkhorn_knopp_unbalanced(a, b, M, reg, reg_m, numItermax=1000,
Returns
-------
if n_hists == 1:
- gamma : (dim_a x dim_b) ndarray
+ - gamma : (dim_a, dim_b) ndarray
Optimal transportation matrix for the given parameters
- log : dict
+ - log : dict
log dictionary returned only if `log` is `True`
else:
- ot_distance : (n_hists,) ndarray
- the OT distance between `a` and each of the histograms `b_i`
- log : dict
+ - ot_distance : (n_hists,) ndarray
+ the OT distance between :math:`\mathbf{a}` and each of the histograms :math:`\mathbf{b}_i`
+ - log : dict
log dictionary returned only if `log` is `True`
+
Examples
--------
@@ -330,9 +336,10 @@ def sinkhorn_knopp_unbalanced(a, b, M, reg, reg_m, numItermax=1000,
array([[0.51122823, 0.18807035],
[0.18807035, 0.51122823]])
+
+ .. _references-sinkhorn-knopp-unbalanced:
References
----------
-
.. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016).
Scaling algorithms for unbalanced transport problems. arXiv preprint
arXiv:1607.05816.
@@ -445,32 +452,34 @@ def sinkhorn_stabilized_unbalanced(a, b, M, reg, reg_m, tau=1e5, numItermax=1000
problem and return the loss
The function solves the following optimization problem using log-domain
- stabilization as proposed in [10]:
+ stabilization as proposed in :ref:`[10] <references-sinkhorn-stabilized-unbalanced>`:
.. math::
- W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + reg_m KL(\gamma 1, a) + reg_m KL(\gamma^T 1, b)
+ W = \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F + \mathrm{reg}\cdot\Omega(\gamma) +
+ \mathrm{reg_m} \cdot \mathrm{KL}(\gamma \mathbf{1}, \mathbf{a}) +
+ \mathrm{reg_m} \cdot \mathrm{KL}(\gamma^T \mathbf{1}, \mathbf{b})
s.t.
- \gamma\geq 0
+ \gamma \geq 0
+
where :
- - M is the (dim_a, dim_b) metric cost matrix
- - :math:`\Omega` is the entropic regularization
- term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target unbalanced distributions
+ - :math:`\mathbf{M}` is the (`dim_a`, `dim_b`) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term, :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\mathbf{a}` and :math:`\mathbf{b}` are source and target unbalanced distributions
- KL is the Kullback-Leibler divergence
The algorithm used for solving the problem is the generalized
- Sinkhorn-Knopp matrix scaling algorithm as proposed in [10, 23]_
+ Sinkhorn-Knopp matrix scaling algorithm as proposed in :ref:`[10, 25] <references-sinkhorn-stabilized-unbalanced>`
Parameters
----------
a : np.ndarray (dim_a,)
- Unnormalized histogram of dimension dim_a
+ Unnormalized histogram of dimension `dim_a`
b : np.ndarray (dim_b,) or np.ndarray (dim_b, n_hists)
- One or multiple unnormalized histograms of dimension dim_b
- If many, compute all the OT distances (a, b_i)
+ One or multiple unnormalized histograms of dimension `dim_b`.
+ If many, compute all the OT distances :math:`(\mathbf{a}, \mathbf{b}_i)_i`
M : np.ndarray (dim_a, dim_b)
loss matrix
reg : float
@@ -492,14 +501,14 @@ def sinkhorn_stabilized_unbalanced(a, b, M, reg, reg_m, tau=1e5, numItermax=1000
Returns
-------
if n_hists == 1:
- gamma : (dim_a x dim_b) ndarray
+ - gamma : (dim_a, dim_b) ndarray
Optimal transportation matrix for the given parameters
- log : dict
+ - log : dict
log dictionary returned only if `log` is `True`
else:
- ot_distance : (n_hists,) ndarray
- the OT distance between `a` and each of the histograms `b_i`
- log : dict
+ - ot_distance : (n_hists,) ndarray
+ the OT distance between :math:`\mathbf{a}` and each of the histograms :math:`\mathbf{b}_i`
+ - log : dict
log dictionary returned only if `log` is `True`
Examples
--------
@@ -512,9 +521,10 @@ def sinkhorn_stabilized_unbalanced(a, b, M, reg, reg_m, tau=1e5, numItermax=1000
array([[0.51122823, 0.18807035],
[0.18807035, 0.51122823]])
+
+ .. _references-sinkhorn-stabilized-unbalanced:
References
----------
-
.. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016).
Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
@@ -654,29 +664,27 @@ def sinkhorn_stabilized_unbalanced(a, b, M, reg, reg_m, tau=1e5, numItermax=1000
def barycenter_unbalanced_stabilized(A, M, reg, reg_m, weights=None, tau=1e3,
numItermax=1000, stopThr=1e-6,
verbose=False, log=False):
- r"""Compute the entropic unbalanced wasserstein barycenter of A with stabilization.
+ r"""Compute the entropic unbalanced wasserstein barycenter of :math:`\mathbf{A}` with stabilization.
The function solves the following optimization problem:
.. math::
- \mathbf{a} = arg\min_\mathbf{a} \sum_i Wu_{reg}(\mathbf{a},\mathbf{a}_i)
+ \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \quad \sum_i W_{u_{reg}}(\mathbf{a},\mathbf{a}_i)
where :
- - :math:`Wu_{reg}(\cdot,\cdot)` is the unbalanced entropic regularized
- Wasserstein distance (see ot.unbalanced.sinkhorn_unbalanced)
- - :math:`\mathbf{a}_i` are training distributions in the columns of
- matrix :math:`\mathbf{A}`
- - reg and :math:`\mathbf{M}` are respectively the regularization term and
- the cost matrix for OT
+ - :math:`W_{u_{reg}}(\cdot,\cdot)` is the unbalanced entropic regularized Wasserstein distance (see :py:func:`ot.unbalanced.sinkhorn_unbalanced`)
+ - :math:`\mathbf{a}_i` are training distributions in the columns of matrix :math:`\mathbf{A}`
+ - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT
- reg_mis the marginal relaxation hyperparameter
- The algorithm used for solving the problem is the generalized
- Sinkhorn-Knopp matrix scaling algorithm as proposed in [10]_
+
+ The algorithm used for solving the problem is the generalized
+ Sinkhorn-Knopp matrix scaling algorithm as proposed in :ref:`[10] <references-barycenter-unbalanced-stabilized>`
Parameters
----------
A : np.ndarray (dim, n_hists)
- `n_hists` training distributions a_i of dimension dim
+ `n_hists` training distributions :math:`\mathbf{a}_i` of dimension `dim`
M : np.ndarray (dim, dim)
ground metric matrix for OT.
reg : float
@@ -706,9 +714,9 @@ def barycenter_unbalanced_stabilized(A, M, reg, reg_m, weights=None, tau=1e3,
log dictionary return only if log==True in parameters
+ .. _references-barycenter-unbalanced-stabilized:
References
----------
-
.. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré,
G. (2015). Iterative Bregman projections for regularized transportation
problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
@@ -806,29 +814,27 @@ def barycenter_unbalanced_stabilized(A, M, reg, reg_m, weights=None, tau=1e3,
def barycenter_unbalanced_sinkhorn(A, M, reg, reg_m, weights=None,
numItermax=1000, stopThr=1e-6,
verbose=False, log=False):
- r"""Compute the entropic unbalanced wasserstein barycenter of A.
+ r"""Compute the entropic unbalanced wasserstein barycenter of :math:`\mathbf{A}`.
- The function solves the following optimization problem with a
+ The function solves the following optimization problem with :math:`\mathbf{a}`
.. math::
- \mathbf{a} = arg\min_\mathbf{a} \sum_i Wu_{reg}(\mathbf{a},\mathbf{a}_i)
+ \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \quad \sum_i W_{u_{reg}}(\mathbf{a},\mathbf{a}_i)
where :
- - :math:`Wu_{reg}(\cdot,\cdot)` is the unbalanced entropic regularized
- Wasserstein distance (see ot.unbalanced.sinkhorn_unbalanced)
- - :math:`\mathbf{a}_i` are training distributions in the columns of matrix
- :math:`\mathbf{A}`
- - reg and :math:`\mathbf{M}` are respectively the regularization term and
- the cost matrix for OT
+ - :math:`W_{u_{reg}}(\cdot,\cdot)` is the unbalanced entropic regularized Wasserstein distance (see :py:func:`ot.unbalanced.sinkhorn_unbalanced`)
+ - :math:`\mathbf{a}_i` are training distributions in the columns of matrix :math:`\mathbf{A}`
+ - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT
- reg_mis the marginal relaxation hyperparameter
+
The algorithm used for solving the problem is the generalized
- Sinkhorn-Knopp matrix scaling algorithm as proposed in [10]_
+ Sinkhorn-Knopp matrix scaling algorithm as proposed in :ref:`[10] <references-barycenter-unbalanced-sinkhorn>`
Parameters
----------
A : np.ndarray (dim, n_hists)
- `n_hists` training distributions a_i of dimension dim
+ `n_hists` training distributions :math:`\mathbf{a}_i` of dimension `dim`
M : np.ndarray (dim, dim)
ground metric matrix for OT.
reg : float
@@ -856,9 +862,9 @@ def barycenter_unbalanced_sinkhorn(A, M, reg, reg_m, weights=None,
log dictionary return only if log==True in parameters
+ .. _references-barycenter-unbalanced-sinkhorn:
References
----------
-
.. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G.
(2015). Iterative Bregman projections for regularized transportation
problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
@@ -936,29 +942,27 @@ def barycenter_unbalanced_sinkhorn(A, M, reg, reg_m, weights=None,
def barycenter_unbalanced(A, M, reg, reg_m, method="sinkhorn", weights=None,
numItermax=1000, stopThr=1e-6,
verbose=False, log=False, **kwargs):
- r"""Compute the entropic unbalanced wasserstein barycenter of A.
+ r"""Compute the entropic unbalanced wasserstein barycenter of :math:`\mathbf{A}`.
- The function solves the following optimization problem with a
+ The function solves the following optimization problem with :math:`\mathbf{a}`
.. math::
- \mathbf{a} = arg\min_\mathbf{a} \sum_i Wu_{reg}(\mathbf{a},\mathbf{a}_i)
+ \mathbf{a} = \mathop{\arg \min}_\mathbf{a} \quad \sum_i W_{u_{reg}}(\mathbf{a},\mathbf{a}_i)
where :
- - :math:`Wu_{reg}(\cdot,\cdot)` is the unbalanced entropic regularized
- Wasserstein distance (see ot.unbalanced.sinkhorn_unbalanced)
- - :math:`\mathbf{a}_i` are training distributions in the columns of matrix
- :math:`\mathbf{A}`
- - reg and :math:`\mathbf{M}` are respectively the regularization term and
- the cost matrix for OT
+ - :math:`W_{u_{reg}}(\cdot,\cdot)` is the unbalanced entropic regularized Wasserstein distance (see :py:func:`ot.unbalanced.sinkhorn_unbalanced`)
+ - :math:`\mathbf{a}_i` are training distributions in the columns of matrix :math:`\mathbf{A}`
+ - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT
- reg_mis the marginal relaxation hyperparameter
+
The algorithm used for solving the problem is the generalized
- Sinkhorn-Knopp matrix scaling algorithm as proposed in [10]_
+ Sinkhorn-Knopp matrix scaling algorithm as proposed in :ref:`[10] <references-barycenter-unbalanced>`
Parameters
----------
A : np.ndarray (dim, n_hists)
- `n_hists` training distributions a_i of dimension dim
+ `n_hists` training distributions :math:`\mathbf{a}_i` of dimension `dim`
M : np.ndarray (dim, dim)
ground metric matrix for OT.
reg : float
@@ -986,9 +990,9 @@ def barycenter_unbalanced(A, M, reg, reg_m, method="sinkhorn", weights=None,
log dictionary return only if log==True in parameters
+ .. _references-barycenter-unbalanced:
References
----------
-
.. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G.
(2015). Iterative Bregman projections for regularized transportation
problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
diff --git a/ot/utils.py b/ot/utils.py
index 0608aee..c878563 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -21,26 +21,26 @@ __time_tic_toc = time.time()
def tic():
- """ Python implementation of Matlab tic() function """
+ r""" Python implementation of Matlab tic() function """
global __time_tic_toc
__time_tic_toc = time.time()
def toc(message='Elapsed time : {} s'):
- """ Python implementation of Matlab toc() function """
+ r""" Python implementation of Matlab toc() function """
t = time.time()
print(message.format(t - __time_tic_toc))
return t - __time_tic_toc
def toq():
- """ Python implementation of Julia toc() function """
+ r""" Python implementation of Julia toc() function """
t = time.time()
return t - __time_tic_toc
def kernel(x1, x2, method='gaussian', sigma=1, **kwargs):
- """Compute kernel matrix"""
+ r"""Compute kernel matrix"""
nx = get_backend(x1, x2)
@@ -50,13 +50,13 @@ def kernel(x1, x2, method='gaussian', sigma=1, **kwargs):
def laplacian(x):
- """Compute Laplacian matrix"""
+ r"""Compute Laplacian matrix"""
L = np.diag(np.sum(x, axis=0)) - x
return L
def list_to_array(*lst):
- """ Convert a list if in numpy format """
+ r""" Convert a list if in numpy format """
if len(lst) > 1:
return [np.array(a) if isinstance(a, list) else a for a in lst]
else:
@@ -64,17 +64,18 @@ def list_to_array(*lst):
def proj_simplex(v, z=1):
- r""" compute the closest point (orthogonal projection) on the
- generalized (n-1)-simplex of a vector v wrt. to the Euclidean
+ r"""Compute the closest point (orthogonal projection) on the
+ generalized `(n-1)`-simplex of a vector :math:`\mathbf{v}` wrt. to the Euclidean
distance, thus solving:
+
.. math::
- \mathcal{P}(w) \in arg\min_\gamma || \gamma - v ||_2
+ \mathcal{P}(w) \in \mathop{\arg \min}_\gamma \| \gamma - \mathbf{v} \|_2
- s.t. \gamma^T 1= z
+ s.t. \ \gamma^T \mathbf{1} = z
- \gamma\geq 0
+ \gamma \geq 0
- If v is a 2d array, compute all the projections wrt. axis 0
+ If :math:`\mathbf{v}` is a 2d array, compute all the projections wrt. axis 0
.. note:: This function is backend-compatible and will work on arrays
from all compatible backends.
@@ -87,7 +88,7 @@ def proj_simplex(v, z=1):
Returns
-------
- h : ndarray, shape (n,d)
+ h : ndarray, shape (`n`, `d`)
Array of projections on the simplex
"""
nx = get_backend(v)
@@ -116,26 +117,24 @@ def proj_simplex(v, z=1):
def unif(n):
- """ return a uniform histogram of length n (simplex)
+ r"""
+ Return a uniform histogram of length `n` (simplex).
Parameters
----------
-
n : int
number of bins in the histogram
Returns
-------
- h : np.array (n,)
- histogram of length n such that h_i=1/n for all i
-
-
+ h : np.array (`n`,)
+ histogram of length `n` such that :math:`\forall i, \mathbf{h}_i = \frac{1}{n}`
"""
return np.ones((n,)) / n
def clean_zeros(a, b, M):
- """ Remove all components with zeros weights in a and b
+ r""" Remove all components with zeros weights in :math:`\mathbf{a}` and :math:`\mathbf{b}`
"""
M2 = M[a > 0, :][:, b > 0].copy() # copy force c style matrix (froemd)
a2 = a[a > 0]
@@ -144,8 +143,8 @@ def clean_zeros(a, b, M):
def euclidean_distances(X, Y, squared=False):
- """
- Considering the rows of X (and Y=X) as vectors, compute the
+ r"""
+ Considering the rows of :math:`\mathbf{X}` (and :math:`\mathbf{Y} = \mathbf{X}`) as vectors, compute the
distance matrix between each pair of vectors.
.. note:: This function is backend-compatible and will work on arrays
@@ -153,14 +152,14 @@ def euclidean_distances(X, Y, squared=False):
Parameters
----------
- X : {array-like}, shape (n_samples_1, n_features)
- Y : {array-like}, shape (n_samples_2, n_features)
+ X : array-like, shape (n_samples_1, n_features)
+ Y : array-like, shape (n_samples_2, n_features)
squared : boolean, optional
Return squared Euclidean distances.
Returns
-------
- distances : {array}, shape (n_samples_1, n_samples_2)
+ distances : array-like, shape (`n_samples_1`, `n_samples_2`)
"""
nx = get_backend(X, Y)
@@ -184,7 +183,7 @@ def euclidean_distances(X, Y, squared=False):
def dist(x1, x2=None, metric='sqeuclidean', p=2):
- """Compute distance between samples in x1 and x2
+ r"""Compute distance between samples in :math:`\mathbf{x_1}` and :math:`\mathbf{x_2}`
.. note:: This function is backend-compatible and will work on arrays
from all compatible backends.
@@ -193,9 +192,9 @@ def dist(x1, x2=None, metric='sqeuclidean', p=2):
----------
x1 : array-like, shape (n1,d)
- matrix with n1 samples of size d
+ matrix with `n1` samples of size `d`
x2 : array-like, shape (n2,d), optional
- matrix with n2 samples of size d (if None then x2=x1)
+ matrix with `n2` samples of size `d` (if None then :math:`\mathbf{x_2} = \mathbf{x_1}`)
metric : str | callable, optional
'sqeuclidean' or 'euclidean' on all backends. On numpy the function also
accepts from the scipy.spatial.distance.cdist function : 'braycurtis',
@@ -208,7 +207,7 @@ def dist(x1, x2=None, metric='sqeuclidean', p=2):
Returns
-------
- M : array-like, shape (n1, n2)
+ M : array-like, shape (`n1`, `n2`)
distance matrix computed with given metric
"""
@@ -226,7 +225,7 @@ def dist(x1, x2=None, metric='sqeuclidean', p=2):
def dist0(n, method='lin_square'):
- """Compute standard cost matrices of size (n, n) for OT problems
+ r"""Compute standard cost matrices of size (`n`, `n`) for OT problems
Parameters
----------
@@ -235,11 +234,11 @@ def dist0(n, method='lin_square'):
method : str, optional
Type of loss matrix chosen from:
- * 'lin_square' : linear sampling between 0 and n-1, quadratic loss
+ * 'lin_square' : linear sampling between 0 and `n-1`, quadratic loss
Returns
-------
- M : ndarray, shape (n1,n2)
+ M : ndarray, shape (`n1`, `n2`)
Distance matrix computed with given metric.
"""
res = 0
@@ -250,7 +249,7 @@ def dist0(n, method='lin_square'):
def cost_normalization(C, norm=None):
- """ Apply normalization to the loss matrix
+ r""" Apply normalization to the loss matrix
Parameters
----------
@@ -262,7 +261,7 @@ def cost_normalization(C, norm=None):
Returns
-------
- C : ndarray, shape (n1, n2)
+ C : ndarray, shape (`n1`, `n2`)
The input cost matrix normalized according to given norm.
"""
@@ -284,23 +283,23 @@ def cost_normalization(C, norm=None):
def dots(*args):
- """ dots function for multiple matrix multiply """
+ r""" dots function for multiple matrix multiply """
return reduce(np.dot, args)
def label_normalization(y, start=0):
- """ Transform labels to start at a given value
+ r""" Transform labels to start at a given value
Parameters
----------
y : array-like, shape (n, )
The vector of labels to be normalized.
start : int
- Desired value for the smallest label in y (default=0)
+ Desired value for the smallest label in :math:`\mathbf{y}` (default=0)
Returns
-------
- y : array-like, shape (n1, )
+ y : array-like, shape (`n1`, )
The input vector of labels normalized according to given start value.
"""
@@ -311,14 +310,14 @@ def label_normalization(y, start=0):
def parmap(f, X, nprocs="default"):
- """ paralell map for multiprocessing.
+ r""" parallel map for multiprocessing.
The function has been deprecated and only performs a regular map.
"""
return list(map(f, X))
def check_params(**kwargs):
- """check_params: check whether some parameters are missing
+ r"""check_params: check whether some parameters are missing
"""
missing_params = []
@@ -339,14 +338,14 @@ def check_params(**kwargs):
def check_random_state(seed):
- """Turn seed into a np.random.RandomState instance
+ r"""Turn `seed` into a np.random.RandomState instance
Parameters
----------
seed : None | int | instance of RandomState
- If seed is None, return the RandomState singleton used by np.random.
- If seed is an int, return a new RandomState instance seeded with seed.
- If seed is already a RandomState instance, return it.
+ If `seed` is None, return the RandomState singleton used by np.random.
+ If `seed` is an int, return a new RandomState instance seeded with `seed`.
+ If `seed` is already a RandomState instance, return it.
Otherwise raise ValueError.
"""
if seed is None or seed is np.random:
@@ -360,18 +359,21 @@ def check_random_state(seed):
class deprecated(object):
- """Decorator to mark a function or class as deprecated.
+ r"""Decorator to mark a function or class as deprecated.
deprecated class from scikit-learn package
https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/deprecation.py
Issue a warning when the function is called/the class is instantiated and
adds a warning to the docstring.
The optional extra argument will be appended to the deprecation message
- and the docstring. Note: to use this with the default value for extra, put
- in an empty of parentheses:
- >>> from ot.deprecation import deprecated # doctest: +SKIP
- >>> @deprecated() # doctest: +SKIP
- ... def some_function(): pass # doctest: +SKIP
+ and the docstring.
+
+ .. note::
+ To use this with the default value for extra, use empty parentheses:
+
+ >>> from ot.deprecation import deprecated # doctest: +SKIP
+ >>> @deprecated() # doctest: +SKIP
+ ... def some_function(): pass # doctest: +SKIP
Parameters
----------
@@ -386,7 +388,7 @@ class deprecated(object):
self.extra = extra
def __call__(self, obj):
- """Call method
+ r"""Call method
Parameters
----------
obj : object
@@ -417,7 +419,7 @@ class deprecated(object):
return cls
def _decorate_fun(self, fun):
- """Decorate function fun"""
+ r"""Decorate function fun"""
msg = "Function %s is deprecated" % fun.__name__
if self.extra:
@@ -443,7 +445,7 @@ class deprecated(object):
def _is_deprecated(func):
- """Helper to check if func is wraped by our deprecated decorator"""
+ r"""Helper to check if func is wraped by our deprecated decorator"""
if sys.version_info < (3, 5):
raise NotImplementedError("This is only available for python3.5 "
"or above")
@@ -457,7 +459,7 @@ def _is_deprecated(func):
class BaseEstimator(object):
- """Base class for most objects in POT
+ r"""Base class for most objects in POT
Code adapted from sklearn BaseEstimator class
@@ -470,7 +472,7 @@ class BaseEstimator(object):
@classmethod
def _get_param_names(cls):
- """Get parameter names for the estimator"""
+ r"""Get parameter names for the estimator"""
# fetch the constructor or the original constructor before
# deprecation wrapping if any
@@ -497,7 +499,7 @@ class BaseEstimator(object):
return sorted([p.name for p in parameters])
def get_params(self, deep=True):
- """Get parameters for this estimator.
+ r"""Get parameters for this estimator.
Parameters
----------
@@ -534,7 +536,7 @@ class BaseEstimator(object):
return out
def set_params(self, **params):
- """Set the parameters of this estimator.
+ r"""Set the parameters of this estimator.
The method works on simple estimators as well as on nested objects
(such as pipelines). The latter have parameters of the form
@@ -574,7 +576,7 @@ class BaseEstimator(object):
class UndefinedParameter(Exception):
- """
+ r"""
Aim at raising an Exception when a undefined parameter is called
"""