diff options
author | Rémi Flamary <remi.flamary@gmail.com> | 2019-06-06 17:22:05 +0200 |
---|---|---|
committer | Rémi Flamary <remi.flamary@gmail.com> | 2019-06-06 17:22:05 +0200 |
commit | 1171f7e39742c207dad6ab5fd15f59ed62f8f4a5 (patch) | |
tree | 349903e661fa9ae42ed1179221f4b9f585948c7e /ot | |
parent | 0fc6938dc15e8888b0a73fa4b6a421f39f0e0697 (diff) |
start documentation ot
Diffstat (limited to 'ot')
-rw-r--r-- | ot/__init__.py | 16 | ||||
-rw-r--r-- | ot/da.py | 8 | ||||
-rw-r--r-- | ot/gpu/__init__.py | 6 | ||||
-rw-r--r-- | ot/lp/__init__.py | 47 | ||||
-rw-r--r-- | ot/lp/emd_wrap.pyx | 11 |
5 files changed, 60 insertions, 28 deletions
diff --git a/ot/__init__.py b/ot/__init__.py index b74b924..6d6dc75 100644 --- a/ot/__init__.py +++ b/ot/__init__.py @@ -1,6 +1,20 @@ -"""Python Optimal Transport toolbox +""" + +This is the main module of the POT toolbox. It provides easy access to +a number of functions described below. + +### FAQ + +#### How to compute the Wasserstein distance ? +.. warning:: + The list of automatically imported sub-modules is as follows: + :py:mod:`ot.lp`, :py:mod:`ot.bregman`, :py:mod:`ot.optim` + :py:mod:`ot.utils`, :py:mod:`ot.datasets`, + :py:mod:`ot.gromov`, :py:mod:`ot.smooth` + :py:mod:`ot.stochastic` + The other sub-modules are not imported due to additional dependencies. """ @@ -41,15 +41,15 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, where : - M is the (ns,nt) metric cost matrix - - :math:`\Omega_e` is the entropic regularization term - :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - :math:`\Omega_g` is the group lasso regulaization term + - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e + (\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega_g` is the group lasso regularization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` where :math:`\mathcal{I}_c` are the index of samples from class c in the source domain. - a and b are source and target weights (sum to 1) - The algorithm used for solving the problem is the generalised conditional + The algorithm used for solving the problem is the generalized conditional gradient as proposed in [5]_ [7]_ diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py index deda6b1..6a2afcf 100644 --- a/ot/gpu/__init__.py +++ b/ot/gpu/__init__.py @@ -5,11 +5,15 @@ This module provides GPU implementation for several OT solvers and utility functions. The GPU backend in handled by `cupy <https://cupy.chainer.org/>`_. +.. warning:: + Note that by default the module is not import in :mod:`ot`. in order to + use it you need to import :mod:`ot.gpu` . + By default, the functions in this module accept and return numpy arrays in order to proide drop-in replacement for the other POT function but the transfer between CPU en GPU comes with a significant overhead. -In order to get the best erformances, we recommend to give only cupy +In order to get the best performances, we recommend to give only cupy arrays to the functions and desactivate the conversion to numpy of the result of the function with parameter ``to_numpy=False``. diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 02cbd8c..ed6fa52 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -1,6 +1,9 @@ # -*- coding: utf-8 -*- """ Solvers for the original linear program OT problem + + + """ # Author: Remi Flamary <remi.flamary@unice.fr> @@ -37,26 +40,30 @@ def emd(a, b, M, numItermax=100000, log=False): - M is the metric cost matrix - a and b are the sample weights + .. warning:: + Note that the M matrix needs to be a C-order numpy.array in float64 + format. + Uses the algorithm proposed in [1]_ Parameters ---------- - a : (ns,) ndarray, float64 - Source histogram (uniform weigth if empty list) - b : (nt,) ndarray, float64 - Target histogram (uniform weigth if empty list) - M : (ns,nt) ndarray, float64 - loss matrix + a : (ns,) numpy.ndarray, float64 + Source histogram (uniform weight if empty list) + b : (nt,) numpy.ndarray, float64 + Target histogram (uniform weight if empty list) + M : (ns,nt) numpy.ndarray, float64 + Loss matrix (c-order array with type float64) numItermax : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. - log: boolean, optional (default=False) + log: bool, optional (default=False) If True, returns a dictionary containing the cost and dual variables. Otherwise returns only the optimal transportation matrix. Returns ------- - gamma: (ns x nt) ndarray + gamma: (ns x nt) numpy.ndarray Optimal transportation matrix for the given parameters log: dict If input log is true, a dictionary containing the cost and dual @@ -128,16 +135,20 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), - M is the metric cost matrix - a and b are the sample weights + .. warning:: + Note that the M matrix needs to be a C-order numpy.array in float64 + format. + Uses the algorithm proposed in [1]_ Parameters ---------- - a : (ns,) ndarray, float64 - Source histogram (uniform weigth if empty list) - b : (nt,) ndarray, float64 - Target histogram (uniform weigth if empty list) - M : (ns,nt) ndarray, float64 - loss matrix + a : (ns,) numpy.ndarray, float64 + Source histogram (uniform weight if empty list) + b : (nt,) numpy.ndarray, float64 + Target histogram (uniform weight if empty list) + M : (ns,nt) numpy.ndarray, float64 + Loss matrix (c-order array with type float64) numItermax : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. @@ -151,7 +162,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), ------- gamma: (ns x nt) ndarray Optimal transportation matrix for the given parameters - log: dict + log: dictnp If input log is true, a dictionary containing the cost and dual variables and exit status @@ -231,9 +242,9 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None Parameters ---------- - measures_locations : list of (k_i,d) np.ndarray + measures_locations : list of (k_i,d) numpy.ndarray The discrete support of a measure supported on k_i locations of a d-dimensional space (k_i can be different for each element of the list) - measures_weights : list of (k_i,) np.ndarray + measures_weights : list of (k_i,) numpy.ndarray Numpy arrays where each numpy array has k_i non-negatives values summing to one representing the weights of each discrete input measure X_init : (k,d) np.ndarray @@ -246,7 +257,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None numItermax : int, optional Max number of iterations stopThr : float, optional - Stop threshol on error (>0) + Stop threshold on error (>0) verbose : bool, optional Print information along iterations log : bool, optional diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 83ee6aa..edb5f7c 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -55,13 +55,16 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod - M is the metric cost matrix - a and b are the sample weights + .. warning:: + Note that the M matrix needs to be a C-order :py.cls:`numpy.array` + Parameters ---------- - a : (ns,) ndarray, float64 + a : (ns,) numpy.ndarray, float64 source histogram - b : (nt,) ndarray, float64 + b : (nt,) numpy.ndarray, float64 target histogram - M : (ns,nt) ndarray, float64 + M : (ns,nt) numpy.ndarray, float64 loss matrix max_iter : int The maximum number of iterations before stopping the optimization @@ -70,7 +73,7 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod Returns ------- - gamma: (ns x nt) ndarray + gamma: (ns x nt) numpy.ndarray Optimal transportation matrix for the given parameters """ |