summaryrefslogtreecommitdiff
path: root/ot
diff options
context:
space:
mode:
authorRémi Flamary <remi.flamary@gmail.com>2019-06-06 17:22:05 +0200
committerRémi Flamary <remi.flamary@gmail.com>2019-06-06 17:22:05 +0200
commit1171f7e39742c207dad6ab5fd15f59ed62f8f4a5 (patch)
tree349903e661fa9ae42ed1179221f4b9f585948c7e /ot
parent0fc6938dc15e8888b0a73fa4b6a421f39f0e0697 (diff)
start documentation ot
Diffstat (limited to 'ot')
-rw-r--r--ot/__init__.py16
-rw-r--r--ot/da.py8
-rw-r--r--ot/gpu/__init__.py6
-rw-r--r--ot/lp/__init__.py47
-rw-r--r--ot/lp/emd_wrap.pyx11
5 files changed, 60 insertions, 28 deletions
diff --git a/ot/__init__.py b/ot/__init__.py
index b74b924..6d6dc75 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -1,6 +1,20 @@
-"""Python Optimal Transport toolbox
+"""
+
+This is the main module of the POT toolbox. It provides easy access to
+a number of functions described below.
+
+### FAQ
+
+#### How to compute the Wasserstein distance ?
+.. warning::
+ The list of automatically imported sub-modules is as follows:
+ :py:mod:`ot.lp`, :py:mod:`ot.bregman`, :py:mod:`ot.optim`
+ :py:mod:`ot.utils`, :py:mod:`ot.datasets`,
+ :py:mod:`ot.gromov`, :py:mod:`ot.smooth`
+ :py:mod:`ot.stochastic`
+ The other sub-modules are not imported due to additional dependencies.
"""
diff --git a/ot/da.py b/ot/da.py
index 479e698..83f9027 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -41,15 +41,15 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
where :
- M is the (ns,nt) metric cost matrix
- - :math:`\Omega_e` is the entropic regularization term
- :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - :math:`\Omega_g` is the group lasso regulaization term
+ - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e
+ (\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\Omega_g` is the group lasso regularization term
:math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1`
where :math:`\mathcal{I}_c` are the index of samples from class c
in the source domain.
- a and b are source and target weights (sum to 1)
- The algorithm used for solving the problem is the generalised conditional
+ The algorithm used for solving the problem is the generalized conditional
gradient as proposed in [5]_ [7]_
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
index deda6b1..6a2afcf 100644
--- a/ot/gpu/__init__.py
+++ b/ot/gpu/__init__.py
@@ -5,11 +5,15 @@ This module provides GPU implementation for several OT solvers and utility
functions. The GPU backend in handled by `cupy
<https://cupy.chainer.org/>`_.
+.. warning::
+ Note that by default the module is not import in :mod:`ot`. in order to
+ use it you need to import :mod:`ot.gpu` .
+
By default, the functions in this module accept and return numpy arrays
in order to proide drop-in replacement for the other POT function but
the transfer between CPU en GPU comes with a significant overhead.
-In order to get the best erformances, we recommend to give only cupy
+In order to get the best performances, we recommend to give only cupy
arrays to the functions and desactivate the conversion to numpy of the
result of the function with parameter ``to_numpy=False``.
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 02cbd8c..ed6fa52 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -1,6 +1,9 @@
# -*- coding: utf-8 -*-
"""
Solvers for the original linear program OT problem
+
+
+
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
@@ -37,26 +40,30 @@ def emd(a, b, M, numItermax=100000, log=False):
- M is the metric cost matrix
- a and b are the sample weights
+ .. warning::
+ Note that the M matrix needs to be a C-order numpy.array in float64
+ format.
+
Uses the algorithm proposed in [1]_
Parameters
----------
- a : (ns,) ndarray, float64
- Source histogram (uniform weigth if empty list)
- b : (nt,) ndarray, float64
- Target histogram (uniform weigth if empty list)
- M : (ns,nt) ndarray, float64
- loss matrix
+ a : (ns,) numpy.ndarray, float64
+ Source histogram (uniform weight if empty list)
+ b : (nt,) numpy.ndarray, float64
+ Target histogram (uniform weight if empty list)
+ M : (ns,nt) numpy.ndarray, float64
+ Loss matrix (c-order array with type float64)
numItermax : int, optional (default=100000)
The maximum number of iterations before stopping the optimization
algorithm if it has not converged.
- log: boolean, optional (default=False)
+ log: bool, optional (default=False)
If True, returns a dictionary containing the cost and dual
variables. Otherwise returns only the optimal transportation matrix.
Returns
-------
- gamma: (ns x nt) ndarray
+ gamma: (ns x nt) numpy.ndarray
Optimal transportation matrix for the given parameters
log: dict
If input log is true, a dictionary containing the cost and dual
@@ -128,16 +135,20 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
- M is the metric cost matrix
- a and b are the sample weights
+ .. warning::
+ Note that the M matrix needs to be a C-order numpy.array in float64
+ format.
+
Uses the algorithm proposed in [1]_
Parameters
----------
- a : (ns,) ndarray, float64
- Source histogram (uniform weigth if empty list)
- b : (nt,) ndarray, float64
- Target histogram (uniform weigth if empty list)
- M : (ns,nt) ndarray, float64
- loss matrix
+ a : (ns,) numpy.ndarray, float64
+ Source histogram (uniform weight if empty list)
+ b : (nt,) numpy.ndarray, float64
+ Target histogram (uniform weight if empty list)
+ M : (ns,nt) numpy.ndarray, float64
+ Loss matrix (c-order array with type float64)
numItermax : int, optional (default=100000)
The maximum number of iterations before stopping the optimization
algorithm if it has not converged.
@@ -151,7 +162,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
-------
gamma: (ns x nt) ndarray
Optimal transportation matrix for the given parameters
- log: dict
+ log: dictnp
If input log is true, a dictionary containing the cost and dual
variables and exit status
@@ -231,9 +242,9 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
Parameters
----------
- measures_locations : list of (k_i,d) np.ndarray
+ measures_locations : list of (k_i,d) numpy.ndarray
The discrete support of a measure supported on k_i locations of a d-dimensional space (k_i can be different for each element of the list)
- measures_weights : list of (k_i,) np.ndarray
+ measures_weights : list of (k_i,) numpy.ndarray
Numpy arrays where each numpy array has k_i non-negatives values summing to one representing the weights of each discrete input measure
X_init : (k,d) np.ndarray
@@ -246,7 +257,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
numItermax : int, optional
Max number of iterations
stopThr : float, optional
- Stop threshol on error (>0)
+ Stop threshold on error (>0)
verbose : bool, optional
Print information along iterations
log : bool, optional
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index 83ee6aa..edb5f7c 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -55,13 +55,16 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
- M is the metric cost matrix
- a and b are the sample weights
+ .. warning::
+ Note that the M matrix needs to be a C-order :py.cls:`numpy.array`
+
Parameters
----------
- a : (ns,) ndarray, float64
+ a : (ns,) numpy.ndarray, float64
source histogram
- b : (nt,) ndarray, float64
+ b : (nt,) numpy.ndarray, float64
target histogram
- M : (ns,nt) ndarray, float64
+ M : (ns,nt) numpy.ndarray, float64
loss matrix
max_iter : int
The maximum number of iterations before stopping the optimization
@@ -70,7 +73,7 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
Returns
-------
- gamma: (ns x nt) ndarray
+ gamma: (ns x nt) numpy.ndarray
Optimal transportation matrix for the given parameters
"""