summaryrefslogtreecommitdiff
path: root/ot
diff options
context:
space:
mode:
Diffstat (limited to 'ot')
-rw-r--r--ot/__init__.py52
-rw-r--r--ot/bregman.py227
-rw-r--r--ot/da.py8
-rw-r--r--ot/datasets.py32
-rw-r--r--ot/dr.py63
-rw-r--r--ot/gpu/__init__.py6
-rw-r--r--ot/gpu/bregman.py11
-rw-r--r--ot/gromov.py514
-rw-r--r--ot/lp/__init__.py351
-rw-r--r--ot/lp/emd_wrap.pyx100
-rw-r--r--ot/optim.py32
-rw-r--r--ot/plot.py12
-rw-r--r--ot/stochastic.py407
-rw-r--r--ot/unbalanced.py519
-rw-r--r--ot/utils.py55
15 files changed, 1672 insertions, 717 deletions
diff --git a/ot/__init__.py b/ot/__init__.py
index b74b924..35ae6fc 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -1,6 +1,46 @@
-"""Python Optimal Transport toolbox
+"""
+
+This is the main module of the POT toolbox. It provides easy access to
+a number of sub-modules and functions described below.
+
+.. note::
+
+
+ Here is a list of the submodules and short description of what they contain.
+
+ - :any:`ot.lp` contains OT solvers for the exact (Linear Program) OT problems.
+ - :any:`ot.bregman` contains OT solvers for the entropic OT problems using
+ Bregman projections.
+ - :any:`ot.lp` contains OT solvers for the exact (Linear Program) OT problems.
+ - :any:`ot.smooth` contains OT solvers for the regularized (l2 and kl) smooth OT
+ problems.
+ - :any:`ot.gromov` contains solvers for Gromov-Wasserstein and Fused Gromov
+ Wasserstein problems.
+ - :any:`ot.optim` contains generic solvers OT based optimization problems
+ - :any:`ot.da` contains classes and function related to Monge mapping
+ estimation and Domain Adaptation (DA).
+ - :any:`ot.gpu` contains GPU (cupy) implementation of some OT solvers
+ - :any:`ot.dr` contains Dimension Reduction (DR) methods such as Wasserstein
+ Discriminant Analysis.
+ - :any:`ot.utils` contains utility functions such as distance computation and
+ timing.
+ - :any:`ot.datasets` contains toy dataset generation functions.
+ - :any:`ot.plot` contains visualization functions
+ - :any:`ot.stochastic` contains stochastic solvers for regularized OT.
+ - :any:`ot.unbalanced` contains solvers for regularized unbalanced OT.
+
+.. warning::
+ The list of automatically imported sub-modules is as follows:
+ :py:mod:`ot.lp`, :py:mod:`ot.bregman`, :py:mod:`ot.optim`
+ :py:mod:`ot.utils`, :py:mod:`ot.datasets`,
+ :py:mod:`ot.gromov`, :py:mod:`ot.smooth`
+ :py:mod:`ot.stochastic`
+ The following sub-modules are not imported due to additional dependencies:
+ - :any:`ot.dr` : depends on :code:`pymanopt` and :code:`autograd`.
+ - :any:`ot.gpu` : depends on :code:`cupy` and a CUDA GPU.
+ - :any:`ot.plot` : depends on :code:`matplotlib`
"""
@@ -20,10 +60,12 @@ from . import da
from . import gromov
from . import smooth
from . import stochastic
+from . import unbalanced
# OT functions
-from .lp import emd, emd2
+from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d
from .bregman import sinkhorn, sinkhorn2, barycenter
+from .unbalanced import sinkhorn_unbalanced, barycenter_unbalanced
from .da import sinkhorn_lpl1_mm
# utils functions
@@ -31,6 +73,8 @@ from .utils import dist, unif, tic, toc, toq
__version__ = "0.5.1"
-__all__ = ["emd", "emd2", "sinkhorn", "sinkhorn2", "utils", 'datasets',
+__all__ = ["emd", "emd2", 'emd_1d', "sinkhorn", "sinkhorn2", "utils", 'datasets',
'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov',
- 'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim']
+ 'emd_1d', 'emd2_1d', 'wasserstein_1d',
+ 'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim',
+ 'sinkhorn_unbalanced', "barycenter_unbalanced"]
diff --git a/ot/bregman.py b/ot/bregman.py
index 321712b..70e4208 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -16,7 +16,7 @@ from .utils import unif, dist
def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
stopThr=1e-9, verbose=False, log=False, **kwargs):
- u"""
+ r"""
Solve the entropic regularization optimal transport problem and return the OT matrix
The function solves the following optimization problem:
@@ -40,12 +40,12 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (ns,)
samples weights in the source domain
- b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+ b : ndarray, shape (nt,) or ndarray, shape (nt, nbb)
samples in the target domain, compute sinkhorn with multiple targets
and fixed M if b is a matrix (return OT loss + dual variables in log)
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (ns, nt)
loss matrix
reg : float
Regularization term >0
@@ -64,7 +64,7 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (ns, nt)
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
@@ -73,12 +73,12 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
--------
>>> import ot
- >>> a=[.5,.5]
- >>> b=[.5,.5]
- >>> M=[[0.,1.],[1.,0.]]
- >>> ot.sinkhorn(a,b,M,1)
- array([[ 0.36552929, 0.13447071],
- [ 0.13447071, 0.36552929]])
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.], [1., 0.]]
+ >>> ot.sinkhorn(a, b, M, 1)
+ array([[0.36552929, 0.13447071],
+ [0.13447071, 0.36552929]])
References
@@ -131,7 +131,7 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
stopThr=1e-9, verbose=False, log=False, **kwargs):
- u"""
+ r"""
Solve the entropic regularization optimal transport problem and return the loss
The function solves the following optimization problem:
@@ -155,12 +155,12 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (ns,)
samples weights in the source domain
- b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+ b : ndarray, shape (nt,) or ndarray, shape (nt, nbb)
samples in the target domain, compute sinkhorn with multiple targets
and fixed M if b is a matrix (return OT loss + dual variables in log)
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (ns, nt)
loss matrix
reg : float
Regularization term >0
@@ -176,7 +176,6 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
log : bool, optional
record log if True
-
Returns
-------
W : (nt) ndarray or float
@@ -188,11 +187,11 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
--------
>>> import ot
- >>> a=[.5,.5]
- >>> b=[.5,.5]
- >>> M=[[0.,1.],[1.,0.]]
- >>> ot.sinkhorn2(a,b,M,1)
- array([ 0.26894142])
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.], [1., 0.]]
+ >>> ot.sinkhorn2(a, b, M, 1)
+ array([0.26894142])
@@ -241,14 +240,14 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
b = np.asarray(b, dtype=np.float64)
if len(b.shape) < 2:
- b = b.reshape((-1, 1))
+ b = b[:, None]
return sink()
def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
stopThr=1e-9, verbose=False, log=False, **kwargs):
- """
+ r"""
Solve the entropic regularization optimal transport problem and return the OT matrix
The function solves the following optimization problem:
@@ -272,12 +271,12 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (ns,)
samples weights in the source domain
- b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+ b : ndarray, shape (nt,) or ndarray, shape (nt, nbb)
samples in the target domain, compute sinkhorn with multiple targets
and fixed M if b is a matrix (return OT loss + dual variables in log)
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (ns, nt)
loss matrix
reg : float
Regularization term >0
@@ -290,10 +289,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
log : bool, optional
record log if True
-
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (ns, nt)
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
@@ -302,12 +300,12 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
--------
>>> import ot
- >>> a=[.5,.5]
- >>> b=[.5,.5]
- >>> M=[[0.,1.],[1.,0.]]
- >>> ot.sinkhorn(a,b,M,1)
- array([[ 0.36552929, 0.13447071],
- [ 0.13447071, 0.36552929]])
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.], [1., 0.]]
+ >>> ot.sinkhorn(a, b, M, 1)
+ array([[0.36552929, 0.13447071],
+ [0.13447071, 0.36552929]])
References
@@ -422,7 +420,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=False):
- """
+ r"""
Solve the entropic regularization optimal transport problem and return the OT matrix
The algorithm used is based on the paper
@@ -453,12 +451,12 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (ns,)
samples weights in the source domain
- b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+ b : ndarray, shape (nt,) or ndarray, shape (nt, nbb)
samples in the target domain, compute sinkhorn with multiple targets
and fixed M if b is a matrix (return OT loss + dual variables in log)
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (ns, nt)
loss matrix
reg : float
Regularization term >0
@@ -469,10 +467,9 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=
log : bool, optional
record log if True
-
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (ns, nt)
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
@@ -481,12 +478,12 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=
--------
>>> import ot
- >>> a=[.5,.5]
- >>> b=[.5,.5]
- >>> M=[[0.,1.],[1.,0.]]
- >>> ot.bregman.greenkhorn(a,b,M,1)
- array([[ 0.36552929, 0.13447071],
- [ 0.13447071, 0.36552929]])
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.], [1., 0.]]
+ >>> ot.bregman.greenkhorn(a, b, M, 1)
+ array([[0.36552929, 0.13447071],
+ [0.13447071, 0.36552929]])
References
@@ -576,7 +573,7 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=
def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
warmstart=None, verbose=False, print_period=20, log=False, **kwargs):
- """
+ r"""
Solve the entropic regularization OT problem with log stabilization
The function solves the following optimization problem:
@@ -602,11 +599,11 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (ns,)
samples weights in the source domain
- b : np.ndarray (nt,)
+ b : ndarray, shape (nt,)
samples in the target domain
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (ns, nt)
loss matrix
reg : float
Regularization term >0
@@ -623,10 +620,9 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
log : bool, optional
record log if True
-
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (ns, nt)
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
@@ -639,8 +635,8 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
>>> b=[.5,.5]
>>> M=[[0.,1.],[1.,0.]]
>>> ot.bregman.sinkhorn_stabilized(a,b,M,1)
- array([[ 0.36552929, 0.13447071],
- [ 0.13447071, 0.36552929]])
+ array([[0.36552929, 0.13447071],
+ [0.13447071, 0.36552929]])
References
@@ -769,10 +765,14 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
cpt = cpt + 1
- # print('err=',err,' cpt=',cpt)
if log:
- log['logu'] = alpha / reg + np.log(u)
- log['logv'] = beta / reg + np.log(v)
+ if nbb:
+ alpha = alpha[:, None]
+ beta = beta[:, None]
+ logu = alpha / reg + np.log(u)
+ logv = beta / reg + np.log(v)
+ log['logu'] = logu
+ log['logv'] = logv
log['alpha'] = alpha + reg * np.log(u)
log['beta'] = beta + reg * np.log(v)
log['warmstart'] = (log['alpha'], log['beta'])
@@ -796,7 +796,7 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInnerItermax=100,
tau=1e3, stopThr=1e-9, warmstart=None, verbose=False, print_period=10, log=False, **kwargs):
- """
+ r"""
Solve the entropic regularization optimal transport problem with log
stabilization and epsilon scaling.
@@ -823,11 +823,11 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (ns,)
samples weights in the source domain
- b : np.ndarray (nt,)
+ b : ndarray, shape (nt,)
samples in the target domain
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (ns, nt)
loss matrix
reg : float
Regularization term >0
@@ -835,7 +835,7 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
thershold for max value in u or v for log scaling
tau : float
thershold for max value in u or v for log scaling
- warmstart : tible of vectors
+ warmstart : tuple of vectors
if given then sarting values for alpha an beta log scalings
numItermax : int, optional
Max number of iterations
@@ -850,10 +850,9 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
log : bool, optional
record log if True
-
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (ns, nt)
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
@@ -862,12 +861,12 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
--------
>>> import ot
- >>> a=[.5,.5]
- >>> b=[.5,.5]
- >>> M=[[0.,1.],[1.,0.]]
- >>> ot.bregman.sinkhorn_epsilon_scaling(a,b,M,1)
- array([[ 0.36552929, 0.13447071],
- [ 0.13447071, 0.36552929]])
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.], [1., 0.]]
+ >>> ot.bregman.sinkhorn_epsilon_scaling(a, b, M, 1)
+ array([[0.36552929, 0.13447071],
+ [0.13447071, 0.36552929]])
References
@@ -989,7 +988,7 @@ def projC(gamma, q):
def barycenter(A, M, reg, weights=None, numItermax=1000,
stopThr=1e-4, verbose=False, log=False):
- """Compute the entropic regularized wasserstein barycenter of distributions A
+ r"""Compute the entropic regularized wasserstein barycenter of distributions A
The function solves the following optimization problem:
@@ -1006,13 +1005,13 @@ def barycenter(A, M, reg, weights=None, numItermax=1000,
Parameters
----------
- A : np.ndarray (d,n)
+ A : ndarray, shape (d,n)
n training distributions a_i of size d
- M : np.ndarray (d,d)
+ M : ndarray, shape (d,d)
loss matrix for OT
reg : float
Regularization term >0
- weights : np.ndarray (n,)
+ weights : ndarray, shape (n,)
Weights of each histogram a_i on the simplex (barycentric coodinates)
numItermax : int, optional
Max number of iterations
@@ -1084,7 +1083,7 @@ def barycenter(A, M, reg, weights=None, numItermax=1000,
def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1e-9, stabThr=1e-30, verbose=False, log=False):
- """Compute the entropic regularized wasserstein barycenter of distributions A
+ r"""Compute the entropic regularized wasserstein barycenter of distributions A
where A is a collection of 2D images.
The function solves the following optimization problem:
@@ -1102,11 +1101,11 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1
Parameters
----------
- A : np.ndarray (n,w,h)
+ A : ndarray, shape (n, w, h)
n distributions (2D images) of size w x h
reg : float
Regularization term >0
- weights : np.ndarray (n,)
+ weights : ndarray, shape (n,)
Weights of each image on the simplex (barycentric coodinates)
numItermax : int, optional
Max number of iterations
@@ -1119,15 +1118,13 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1
log : bool, optional
record log if True
-
Returns
-------
- a : (w,h) ndarray
+ a : ndarray, shape (w, h)
2D Wasserstein barycenter
log : dict
log dictionary return only if log==True in parameters
-
References
----------
@@ -1195,7 +1192,7 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1
def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
stopThr=1e-3, verbose=False, log=False):
- """
+ r"""
Compute the unmixing of an observation with a given dictionary using Wasserstein distance
The function solve the following optimization problem:
@@ -1217,15 +1214,15 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
Parameters
----------
- a : np.ndarray (d)
+ a : ndarray, shape (d)
observed distribution
- D : np.ndarray (d,n)
+ D : ndarray, shape (d, n)
dictionary matrix
- M : np.ndarray (d,d)
+ M : ndarray, shape (d, d)
loss matrix
- M0 : np.ndarray (n,n)
+ M0 : ndarray, shape (n, n)
loss matrix
- h0 : np.ndarray (n,)
+ h0 : ndarray, shape (n,)
prior on h
reg : float
Regularization term >0 (Wasserstein data fitting)
@@ -1245,7 +1242,7 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
Returns
-------
- a : (d,) ndarray
+ a : ndarray, shape (d,)
Wasserstein barycenter
log : dict
log dictionary return only if log==True in parameters
@@ -1302,7 +1299,7 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
- '''
+ r'''
Solve the entropic regularization optimal transport problem and return the
OT matrix from empirical data
@@ -1325,15 +1322,15 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numI
Parameters
----------
- X_s : np.ndarray (ns, d)
+ X_s : ndarray, shape (ns, d)
samples in the source domain
- X_t : np.ndarray (nt, d)
+ X_t : ndarray, shape (nt, d)
samples in the target domain
reg : float
Regularization term >0
- a : np.ndarray (ns,)
+ a : ndarray, shape (ns,)
samples weights in the source domain
- b : np.ndarray (nt,)
+ b : ndarray, shape (nt,)
samples weights in the target domain
numItermax : int, optional
Max number of iterations
@@ -1347,7 +1344,7 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numI
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (ns, nt)
Regularized optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
@@ -1360,10 +1357,9 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numI
>>> reg = 0.1
>>> X_s = np.reshape(np.arange(n_s), (n_s, 1))
>>> X_t = np.reshape(np.arange(0, n_t), (n_t, 1))
- >>> emp_sinkhorn = empirical_sinkhorn(X_s, X_t, reg, verbose=False)
- >>> print(emp_sinkhorn)
- >>> [[4.99977301e-01 2.26989344e-05]
- [2.26989344e-05 4.99977301e-01]]
+ >>> empirical_sinkhorn(X_s, X_t, reg, verbose=False) # doctest: +NORMALIZE_WHITESPACE
+ array([[4.99977301e-01, 2.26989344e-05],
+ [2.26989344e-05, 4.99977301e-01]])
References
@@ -1392,7 +1388,7 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numI
def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
- '''
+ r'''
Solve the entropic regularization optimal transport problem from empirical
data and return the OT loss
@@ -1416,15 +1412,15 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', num
Parameters
----------
- X_s : np.ndarray (ns, d)
+ X_s : ndarray, shape (ns, d)
samples in the source domain
- X_t : np.ndarray (nt, d)
+ X_t : ndarray, shape (nt, d)
samples in the target domain
reg : float
Regularization term >0
- a : np.ndarray (ns,)
+ a : ndarray, shape (ns,)
samples weights in the source domain
- b : np.ndarray (nt,)
+ b : ndarray, shape (nt,)
samples weights in the target domain
numItermax : int, optional
Max number of iterations
@@ -1438,7 +1434,7 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', num
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (ns, nt)
Regularized optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
@@ -1451,9 +1447,8 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', num
>>> reg = 0.1
>>> X_s = np.reshape(np.arange(n_s), (n_s, 1))
>>> X_t = np.reshape(np.arange(0, n_t), (n_t, 1))
- >>> loss_sinkhorn = empirical_sinkhorn2(X_s, X_t, reg, verbose=False)
- >>> print(loss_sinkhorn)
- >>> [4.53978687e-05]
+ >>> empirical_sinkhorn2(X_s, X_t, reg, verbose=False)
+ array([4.53978687e-05])
References
@@ -1482,7 +1477,7 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', num
def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
- '''
+ r'''
Compute the sinkhorn divergence loss from empirical data
The function solves the following optimization problems and return the
@@ -1525,15 +1520,15 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
Parameters
----------
- X_s : np.ndarray (ns, d)
+ X_s : ndarray, shape (ns, d)
samples in the source domain
- X_t : np.ndarray (nt, d)
+ X_t : ndarray, shape (nt, d)
samples in the target domain
reg : float
Regularization term >0
- a : np.ndarray (ns,)
+ a : ndarray, shape (ns,)
samples weights in the source domain
- b : np.ndarray (nt,)
+ b : ndarray, shape (nt,)
samples weights in the target domain
numItermax : int, optional
Max number of iterations
@@ -1544,30 +1539,26 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
log : bool, optional
record log if True
-
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (ns, nt)
Regularized optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
Examples
--------
-
>>> n_s = 2
>>> n_t = 4
>>> reg = 0.1
>>> X_s = np.reshape(np.arange(n_s), (n_s, 1))
>>> X_t = np.reshape(np.arange(0, n_t), (n_t, 1))
- >>> emp_sinkhorn_div = empirical_sinkhorn_divergence(X_s, X_t, reg)
- >>> print(emp_sinkhorn_div)
- >>> [2.99977435]
+ >>> empirical_sinkhorn_divergence(X_s, X_t, reg) # doctest: +ELLIPSIS
+ array([1.499...])
References
----------
-
.. [23] Aude Genevay, Gabriel Peyré, Marco Cuturi, Learning Generative Models with Sinkhorn Divergences, Proceedings of the Twenty-First International Conference on Artficial Intelligence and Statistics, (AISTATS) 21, 2018
'''
if log:
diff --git a/ot/da.py b/ot/da.py
index 479e698..83f9027 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -41,15 +41,15 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
where :
- M is the (ns,nt) metric cost matrix
- - :math:`\Omega_e` is the entropic regularization term
- :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - :math:`\Omega_g` is the group lasso regulaization term
+ - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e
+ (\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\Omega_g` is the group lasso regularization term
:math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1`
where :math:`\mathcal{I}_c` are the index of samples from class c
in the source domain.
- a and b are source and target weights (sum to 1)
- The algorithm used for solving the problem is the generalised conditional
+ The algorithm used for solving the problem is the generalized conditional
gradient as proposed in [5]_ [7]_
diff --git a/ot/datasets.py b/ot/datasets.py
index e76e75d..ba0cfd9 100644
--- a/ot/datasets.py
+++ b/ot/datasets.py
@@ -17,7 +17,6 @@ def make_1D_gauss(n, m, s):
Parameters
----------
-
n : int
number of bins in the histogram
m : float
@@ -25,12 +24,10 @@ def make_1D_gauss(n, m, s):
s : float
standard deviaton of the gaussian distribution
-
Returns
-------
- h : np.array (n,)
- 1D histogram for a gaussian distribution
-
+ h : ndarray (n,)
+ 1D histogram for a gaussian distribution
"""
x = np.arange(n, dtype=np.float64)
h = np.exp(-(x - m)**2 / (2 * s**2))
@@ -44,16 +41,15 @@ def get_1D_gauss(n, m, sigma):
def make_2D_samples_gauss(n, m, sigma, random_state=None):
- """return n samples drawn from 2D gaussian N(m,sigma)
+ """Return n samples drawn from 2D gaussian N(m,sigma)
Parameters
----------
-
n : int
number of samples to make
- m : np.array (2,)
+ m : ndarray, shape (2,)
mean value of the gaussian distribution
- sigma : np.array (2,2)
+ sigma : ndarray, shape (2, 2)
covariance matrix of the gaussian distribution
random_state : int, RandomState instance or None, optional (default=None)
If int, random_state is the seed used by the random number generator;
@@ -63,9 +59,8 @@ def make_2D_samples_gauss(n, m, sigma, random_state=None):
Returns
-------
- X : np.array (n,2)
- n samples drawn from N(m,sigma)
-
+ X : ndarray, shape (n, 2)
+ n samples drawn from N(m, sigma).
"""
generator = check_random_state(random_state)
@@ -86,11 +81,10 @@ def get_2D_samples_gauss(n, m, sigma, random_state=None):
def make_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs):
- """ dataset generation for classification problems
+ """Dataset generation for classification problems
Parameters
----------
-
dataset : str
type of classification problem (see code)
n : int
@@ -105,13 +99,11 @@ def make_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs):
Returns
-------
- X : np.array (n,d)
- n observation of size d
- y : np.array (n,)
- labels of the samples
-
+ X : ndarray, shape (n, d)
+ n observation of size d
+ y : ndarray, shape (n,)
+ labels of the samples.
"""
-
generator = check_random_state(random_state)
if dataset.lower() == '3gauss':
diff --git a/ot/dr.py b/ot/dr.py
index d30ab30..680dabf 100644
--- a/ot/dr.py
+++ b/ot/dr.py
@@ -1,6 +1,12 @@
# -*- coding: utf-8 -*-
"""
Dimension reduction with optimal transport
+
+
+.. warning::
+ Note that by default the module is not import in :mod:`ot`. In order to
+ use it you need to explicitely import :mod:`ot.dr`
+
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
@@ -43,30 +49,25 @@ def split_classes(X, y):
def fda(X, y, p=2, reg=1e-16):
- """
- Fisher Discriminant Analysis
-
+ """Fisher Discriminant Analysis
Parameters
----------
- X : numpy.ndarray (n,d)
- Training samples
- y : np.ndarray (n,)
- labels for training samples
+ X : ndarray, shape (n, d)
+ Training samples.
+ y : ndarray, shape (n,)
+ Labels for training samples.
p : int, optional
- size of dimensionnality reduction
+ Size of dimensionnality reduction.
reg : float, optional
Regularization term >0 (ridge regularization)
-
Returns
-------
- P : (d x p) ndarray
+ P : ndarray, shape (d, p)
Optimal transportation matrix for the given parameters
- proj : fun
+ proj : callable
projection function including mean centering
-
-
"""
mx = np.mean(X)
@@ -124,37 +125,33 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None):
Parameters
----------
- X : numpy.ndarray (n,d)
- Training samples
- y : np.ndarray (n,)
- labels for training samples
+ X : ndarray, shape (n, d)
+ Training samples.
+ y : ndarray, shape (n,)
+ Labels for training samples.
p : int, optional
- size of dimensionnality reduction
+ Size of dimensionnality reduction.
reg : float, optional
Regularization term >0 (entropic regularization)
- solver : str, optional
- None for steepest decsent or 'TrustRegions' for trust regions algorithm
- else shoudl be a pymanopt.solvers
- P0 : numpy.ndarray (d,p)
- Initial starting point for projection
+ solver : None | str, optional
+ None for steepest descent or 'TrustRegions' for trust regions algorithm
+ else should be a pymanopt.solvers
+ P0 : ndarray, shape (d, p)
+ Initial starting point for projection.
verbose : int, optional
- Print information along iterations
-
-
+ Print information along iterations.
Returns
-------
- P : (d x p) ndarray
+ P : ndarray, shape (d, p)
Optimal transportation matrix for the given parameters
- proj : fun
- projection function including mean centering
-
+ proj : callable
+ Projection function including mean centering.
References
----------
-
- .. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016). Wasserstein Discriminant Analysis. arXiv preprint arXiv:1608.08063.
-
+ .. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016).
+ Wasserstein Discriminant Analysis. arXiv preprint arXiv:1608.08063.
""" # noqa
mx = np.mean(X)
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
index deda6b1..1ab95bb 100644
--- a/ot/gpu/__init__.py
+++ b/ot/gpu/__init__.py
@@ -5,11 +5,15 @@ This module provides GPU implementation for several OT solvers and utility
functions. The GPU backend in handled by `cupy
<https://cupy.chainer.org/>`_.
+.. warning::
+ Note that by default the module is not import in :mod:`ot`. In order to
+ use it you need to explicitely import :mod:`ot.gpu` .
+
By default, the functions in this module accept and return numpy arrays
in order to proide drop-in replacement for the other POT function but
the transfer between CPU en GPU comes with a significant overhead.
-In order to get the best erformances, we recommend to give only cupy
+In order to get the best performances, we recommend to give only cupy
arrays to the functions and desactivate the conversion to numpy of the
result of the function with parameter ``to_numpy=False``.
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
index 978b307..2e2df83 100644
--- a/ot/gpu/bregman.py
+++ b/ot/gpu/bregman.py
@@ -70,17 +70,6 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
log : dict
log dictionary return only if log==True in parameters
- Examples
- --------
-
- >>> import ot
- >>> a=[.5,.5]
- >>> b=[.5,.5]
- >>> M=[[0.,1.],[1.,0.]]
- >>> ot.sinkhorn(a,b,M,1)
- array([[ 0.36552929, 0.13447071],
- [ 0.13447071, 0.36552929]])
-
References
----------
diff --git a/ot/gromov.py b/ot/gromov.py
index ca96b31..699ae4c 100644
--- a/ot/gromov.py
+++ b/ot/gromov.py
@@ -1,9 +1,6 @@
-
# -*- coding: utf-8 -*-
"""
Gromov-Wasserstein transport method
-
-
"""
# Author: Erwan Vautier <erwan.vautier@gmail.com>
@@ -22,7 +19,7 @@ from .optim import cg
def init_matrix(C1, C2, p, q, loss_fun='square_loss'):
- """ Return loss matrices and tensors for Gromov-Wasserstein fast computation
+ """Return loss matrices and tensors for Gromov-Wasserstein fast computation
Returns the value of \mathcal{L}(C1,C2) \otimes T with the selected loss
function as the loss function of Gromow-Wasserstein discrepancy.
@@ -51,29 +48,27 @@ def init_matrix(C1, C2, p, q, loss_fun='square_loss'):
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
+ Metric costfr matrix in the target space
T : ndarray, shape (ns, nt)
- Coupling between source and target spaces
+ Coupling between source and target spaces
p : ndarray, shape (ns,)
-
Returns
-------
-
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
+ h2(C) matrix in Eq. (6)
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
@@ -114,31 +109,29 @@ def init_matrix(C1, C2, p, q, loss_fun='square_loss'):
def tensor_product(constC, hC1, hC2, T):
- """ Return the tensor for Gromov-Wasserstein fast computation
+ """Return the tensor for Gromov-Wasserstein fast computation
The tensor is computed as described in Proposition 1 Eq. (6) in [12].
Parameters
----------
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
-
+ h2(C) matrix in Eq. (6)
Returns
-------
-
tens : ndarray, shape (ns, nt)
- \mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
+ \mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
A = -np.dot(hC1, T).dot(hC2.T)
@@ -148,32 +141,31 @@ def tensor_product(constC, hC1, hC2, T):
def gwloss(constC, hC1, hC2, T):
- """ Return the Loss for Gromov-Wasserstein
+ """Return the Loss for Gromov-Wasserstein
The loss is computed as described in Proposition 1 Eq. (6) in [12].
Parameters
----------
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
+ h2(C) matrix in Eq. (6)
T : ndarray, shape (ns, nt)
- Current value of transport matrix T
+ Current value of transport matrix T
Returns
-------
-
loss : float
- Gromov Wasserstein loss
+ Gromov Wasserstein loss
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
@@ -183,32 +175,31 @@ def gwloss(constC, hC1, hC2, T):
def gwggrad(constC, hC1, hC2, T):
- """ Return the gradient for Gromov-Wasserstein
+ """Return the gradient for Gromov-Wasserstein
The gradient is computed as described in Proposition 2 in [12].
Parameters
----------
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
+ h2(C) matrix in Eq. (6)
T : ndarray, shape (ns, nt)
- Current value of transport matrix T
+ Current value of transport matrix T
Returns
-------
-
grad : ndarray, shape (ns, nt)
Gromov Wasserstein gradient
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
return 2 * tensor_product(constC, hC1, hC2,
@@ -222,19 +213,19 @@ def update_square_loss(p, lambdas, T, Cs):
Parameters
----------
- p : ndarray, shape (N,)
- masses in the targeted barycenter
+ p : ndarray, shape (N,)
+ Masses in the targeted barycenter.
lambdas : list of float
- list of the S spaces' weights
- T : list of S np.ndarray(ns,N)
- the S Ts couplings calculated at each iteration
+ List of the S spaces' weights.
+ T : list of S np.ndarray of shape (ns,N)
+ The S Ts couplings calculated at each iteration.
Cs : list of S ndarray, shape(ns,ns)
- Metric cost matrices
+ Metric cost matrices.
Returns
----------
- C : ndarray, shape (nt,nt)
- updated C matrix
+ C : ndarray, shape (nt, nt)
+ Updated C matrix.
"""
tmpsum = sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s])
for s in range(len(T))])
@@ -251,12 +242,12 @@ def update_kl_loss(p, lambdas, T, Cs):
Parameters
----------
p : ndarray, shape (N,)
- weights in the targeted barycenter
+ Weights in the targeted barycenter.
lambdas : list of the S spaces' weights
- T : list of S np.ndarray(ns,N)
- the S Ts couplings calculated at each iteration
+ T : list of S np.ndarray of shape (ns,N)
+ The S Ts couplings calculated at each iteration.
Cs : list of S ndarray, shape(ns,ns)
- Metric cost matrices
+ Metric cost matrices.
Returns
----------
@@ -277,27 +268,27 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs
The function solves the following optimization problem:
.. math::
- \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+ - H : entropy
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- p : ndarray, shape (ns,)
- distribution in the source space
- q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string
+ Metric costfr matrix in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space
+ q : ndarray, shape (nt,)
+ Distribution in the target space
+ loss_fun : str
loss function used for the solver either 'square_loss' or 'kl_loss'
max_iter : int, optional
@@ -312,15 +303,15 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs
If True the steps of the line-search is found via an armijo research. Else closed form is used.
If there is convergence issues use False.
**kwargs : dict
- parameters can be directly pased to the ot.optim.cg solver
+ parameters can be directly passed to the ot.optim.cg solver
Returns
-------
T : ndarray, shape (ns, nt)
- coupling between the two spaces that minimizes :
+ Doupling between the two spaces that minimizes:
\sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
log : dict
- convergence information and loss
+ Convergence information and loss.
References
----------
@@ -355,31 +346,37 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs
def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, log=False, **kwargs):
"""
Computes the FGW transport between two graphs see [24]
+
.. math::
- \gamma = arg\min_\gamma (1-\alpha)*<\gamma,M>_F + alpha* \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ \gamma = arg\min_\gamma (1-\\alpha)*<\gamma,M>_F + \\alpha* \sum_{i,j,k,l}
+ L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+
s.t. \gamma 1 = p
\gamma^T 1= q
\gamma\geq 0
+
where :
- M is the (ns,nt) metric cost matrix
- :math:`f` is the regularization term ( and df is its gradient)
- a and b are source and target weights (sum to 1)
- L is a loss function to account for the misfit between the similarity matrices
- The algorithm used for solving the problem is conditional gradient as discussed in [1]_
+
+ The algorithm used for solving the problem is conditional gradient as discussed in [24]_
+
Parameters
----------
- M : ndarray, shape (ns, nt)
- Metric cost matrix between features across domains
+ M : ndarray, shape (ns, nt)
+ Metric cost matrix between features across domains
C1 : ndarray, shape (ns, ns)
- Metric cost matrix respresentative of the structure in the source space
+ Metric cost matrix representative of the structure in the source space
C2 : ndarray, shape (nt, nt)
- Metric cost matrix espresentative of the structure in the target space
- p : ndarray, shape (ns,)
- distribution in the source space
- q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string,optional
- loss function used for the solver
+ Metric cost matrix representative of the structure in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space
+ q : ndarray, shape (nt,)
+ Distribution in the target space
+ loss_fun : str, optional
+ Loss function used for the solver
max_iter : int, optional
Max number of iterations
tol : float, optional
@@ -392,19 +389,21 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5,
If True the steps of the line-search is found via an armijo research. Else closed form is used.
If there is convergence issues use False.
**kwargs : dict
- parameters can be directly pased to the ot.optim.cg solver
+ parameters can be directly passed to the ot.optim.cg solver
+
Returns
-------
- gamma : (ns x nt) ndarray
- Optimal transportation matrix for the given parameters
+ gamma : ndarray, shape (ns, nt)
+ Optimal transportation matrix for the given parameters.
log : dict
- log dictionary return only if log==True in parameters
+ Log dictionary return only if log==True in parameters.
+
References
----------
.. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
- and Courty Nicolas
- "Optimal Transport for structured data with application on graphs"
- International Conference on Machine Learning (ICML). 2019.
+ and Courty Nicolas "Optimal Transport for structured data with
+ application on graphs", International Conference on Machine Learning
+ (ICML). 2019.
"""
constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
@@ -428,31 +427,37 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5,
def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, log=False, **kwargs):
"""
Computes the FGW distance between two graphs see [24]
+
.. math::
- \gamma = arg\min_\gamma (1-\alpha)*<\gamma,M>_F + alpha* \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ \min_\gamma (1-\\alpha)*<\gamma,M>_F + \\alpha* \sum_{i,j,k,l}
+ L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+
+
s.t. \gamma 1 = p
\gamma^T 1= q
\gamma\geq 0
+
where :
- M is the (ns,nt) metric cost matrix
- :math:`f` is the regularization term ( and df is its gradient)
- a and b are source and target weights (sum to 1)
- L is a loss function to account for the misfit between the similarity matrices
The algorithm used for solving the problem is conditional gradient as discussed in [1]_
+
Parameters
----------
- M : ndarray, shape (ns, nt)
- Metric cost matrix between features across domains
+ M : ndarray, shape (ns, nt)
+ Metric cost matrix between features across domains
C1 : ndarray, shape (ns, ns)
- Metric cost matrix respresentative of the structure in the source space
+ Metric cost matrix respresentative of the structure in the source space.
C2 : ndarray, shape (nt, nt)
- Metric cost matrix espresentative of the structure in the target space
+ Metric cost matrix espresentative of the structure in the target space.
p : ndarray, shape (ns,)
- distribution in the source space
+ Distribution in the source space.
q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string,optional
- loss function used for the solver
+ Distribution in the target space.
+ loss_fun : str, optional
+ Loss function used for the solver.
max_iter : int, optional
Max number of iterations
tol : float, optional
@@ -460,22 +465,24 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
+ Record log if True.
armijo : bool, optional
- If True the steps of the line-search is found via an armijo research. Else closed form is used.
- If there is convergence issues use False.
+ If True the steps of the line-search is found via an armijo research.
+ Else closed form is used. If there is convergence issues use False.
**kwargs : dict
- parameters can be directly pased to the ot.optim.cg solver
+ Parameters can be directly pased to the ot.optim.cg solver.
+
Returns
-------
- gamma : (ns x nt) ndarray
- Optimal transportation matrix for the given parameters
+ gamma : ndarray, shape (ns, nt)
+ Optimal transportation matrix for the given parameters.
log : dict
- log dictionary return only if log==True in parameters
+ Log dictionary return only if log==True in parameters.
+
References
----------
.. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
- and Courty Nicolas
+ and Courty Nicolas
"Optimal Transport for structured data with application on graphs"
International Conference on Machine Learning (ICML). 2019.
"""
@@ -506,29 +513,28 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwarg
The function solves the following optimization problem:
.. math::
- \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+ - H : entropy
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- p : ndarray, shape (ns,)
- distribution in the source space
+ Metric cost matrix in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space.
q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string
+ Distribution in the target space.
+ loss_fun : str
loss function used for the solver either 'square_loss' or 'kl_loss'
-
max_iter : int, optional
Max number of iterations
tol : float, optional
@@ -540,6 +546,7 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwarg
armijo : bool, optional
If True the steps of the line-search is found via an armijo research. Else closed form is used.
If there is convergence issues use False.
+
Returns
-------
gw_dist : float
@@ -587,56 +594,55 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
The function solves the following optimization problem:
.. math::
- \GW = arg\min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+ GW = arg\min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
- s.t. \GW 1 = p
+ s.t. T 1 = p
- \GW^T 1= q
+ T^T 1= q
- \GW\geq 0
+ T\geq 0
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+ - H : entropy
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
+ Metric costfr matrix in the target space
p : ndarray, shape (ns,)
- distribution in the source space
+ Distribution in the source space
q : ndarray, shape (nt,)
- distribution in the target space
+ Distribution in the target space
loss_fun : string
- loss function used for the solver either 'square_loss' or 'kl_loss'
+ Loss function used for the solver either 'square_loss' or 'kl_loss'
epsilon : float
Regularization term >0
max_iter : int, optional
- Max number of iterations
+ Max number of iterations
tol : float, optional
Stop threshold on error (>0)
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
+ Record log if True.
Returns
-------
T : ndarray, shape (ns, nt)
- coupling between the two spaces that minimizes :
- \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+ Optimal coupling between the two spaces
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
@@ -695,28 +701,28 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
The function solves the following optimization problem:
.. math::
- \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+ GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+ - H : entropy
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
+ Metric costfr matrix in the target space
p : ndarray, shape (ns,)
- distribution in the source space
+ Distribution in the source space
q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string
- loss function used for the solver either 'square_loss' or 'kl_loss'
+ Distribution in the target space
+ loss_fun : str
+ Loss function used for the solver either 'square_loss' or 'kl_loss'
epsilon : float
Regularization term >0
max_iter : int, optional
@@ -726,7 +732,7 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
+ Record log if True.
Returns
-------
@@ -736,11 +742,10 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
-
gw, logv = entropic_gromov_wasserstein(
C1, C2, p, q, loss_fun, epsilon, max_iter, tol, verbose, log=True)
@@ -762,29 +767,31 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
The function solves the following optimization problem:
.. math::
- C = argmin_C\in R^{NxN} \sum_s \lambda_s GW(C,Cs,p,ps)
+ C = argmin_{C\in R^{NxN}} \sum_s \lambda_s GW(C,C_s,p,p_s)
Where :
- Cs : metric cost matrix
- ps : distribution
+ - :math:`C_s` : metric cost matrix
+ - :math:`p_s` : distribution
Parameters
----------
- N : Integer
- Size of the targeted barycenter
- Cs : list of S np.ndarray(ns,ns)
- Metric cost matrices
- ps : list of S np.ndarray(ns,)
- sample weights in the S spaces
- p : ndarray, shape(N,)
- weights in the targeted barycenter
+ N : int
+ Size of the targeted barycenter
+ Cs : list of S np.ndarray of shape (ns,ns)
+ Metric cost matrices
+ ps : list of S np.ndarray of shape (ns,)
+ Sample weights in the S spaces
+ p : ndarray, shape(N,)
+ Weights in the targeted barycenter
lambdas : list of float
- list of the S spaces' weights
- loss_fun : tensor-matrix multiplication function based on specific loss function
- update : function(p,lambdas,T,Cs) that updates C according to a specific Kernel
- with the S Ts couplings calculated at each iteration
+ List of the S spaces' weights.
+ loss_fun : callable
+ Tensor-matrix multiplication function based on specific loss function.
+ update : callable
+ function(p,lambdas,T,Cs) that updates C according to a specific Kernel
+ with the S Ts couplings calculated at each iteration
epsilon : float
Regularization term >0
max_iter : int, optional
@@ -792,11 +799,11 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
tol : float, optional
Stop threshol on error (>0)
verbose : bool, optional
- Print information along iterations
+ Print information along iterations.
log : bool, optional
- record log if True
- init_C : bool, ndarray, shape(N,N)
- random initial value for the C matrix provided by user
+ Record log if True.
+ init_C : bool | ndarray, shape (N, N)
+ Random initial value for the C matrix provided by user.
Returns
-------
@@ -806,9 +813,8 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
-
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
S = len(Cs)
@@ -818,6 +824,7 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
# Initialization of C : random SPD matrix (if not provided by user)
if init_C is None:
+ # XXX use random state
xalea = np.random.randn(N, 2)
C = dist(xalea, xalea)
C /= C.max()
@@ -829,7 +836,7 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
error = []
- while(err > tol and cpt < max_iter):
+ while (err > tol) and (cpt < max_iter):
Cprev = C
T = [entropic_gromov_wasserstein(Cs[s], C, ps[s], p, loss_fun, epsilon,
@@ -873,37 +880,36 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
.. math::
C = argmin_C\in R^NxN \sum_s \lambda_s GW(C,Cs,p,ps)
-
Where :
- Cs : metric cost matrix
- ps : distribution
+ - Cs : metric cost matrix
+ - ps : distribution
Parameters
----------
- N : Integer
- Size of the targeted barycenter
- Cs : list of S np.ndarray(ns,ns)
- Metric cost matrices
- ps : list of S np.ndarray(ns,)
- sample weights in the S spaces
- p : ndarray, shape(N,)
- weights in the targeted barycenter
+ N : int
+ Size of the targeted barycenter
+ Cs : list of S np.ndarray of shape (ns, ns)
+ Metric cost matrices
+ ps : list of S np.ndarray of shape (ns,)
+ Sample weights in the S spaces
+ p : ndarray, shape (N,)
+ Weights in the targeted barycenter
lambdas : list of float
- list of the S spaces' weights
+ List of the S spaces' weights
loss_fun : tensor-matrix multiplication function based on specific loss function
update : function(p,lambdas,T,Cs) that updates C according to a specific Kernel
with the S Ts couplings calculated at each iteration
max_iter : int, optional
Max number of iterations
tol : float, optional
- Stop threshol on error (>0)
+ Stop threshol on error (>0).
verbose : bool, optional
- Print information along iterations
+ Print information along iterations.
log : bool, optional
- record log if True
- init_C : bool, ndarray, shape(N,N)
- random initial value for the C matrix provided by user
+ Record log if True.
+ init_C : bool | ndarray, shape(N,N)
+ Random initial value for the C matrix provided by user.
Returns
-------
@@ -913,11 +919,10 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
-
S = len(Cs)
Cs = [np.asarray(Cs[s], dtype=np.float64) for s in range(S)]
@@ -925,6 +930,7 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
# Initialization of C : random SPD matrix (if not provided by user)
if init_C is None:
+ # XXX : should use a random state and not use the global seed
xalea = np.random.randn(N, 2)
C = dist(xalea, xalea)
C /= C.max()
@@ -970,47 +976,52 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_features=False,
p=None, loss_fun='square_loss', max_iter=100, tol=1e-9,
verbose=False, log=False, init_C=None, init_X=None):
- """
- Compute the fgw barycenter as presented eq (5) in [24].
+ """Compute the fgw barycenter as presented eq (5) in [24].
+
+ Parameters
----------
N : integer
Desired number of samples of the target barycenter
Ys: list of ndarray, each element has shape (ns,d)
Features of all samples
Cs : list of ndarray, each element has shape (ns,ns)
- Structure matrices of all samples
+ Structure matrices of all samples
ps : list of ndarray, each element has shape (ns,)
- masses of all samples
+ Masses of all samples.
lambdas : list of float
- list of the S spaces' weights
+ List of the S spaces' weights
alpha : float
- Alpha parameter for the fgw distance
- fixed_structure : bool
- Wether to fix the structure of the barycenter during the updates
- fixed_features : bool
- Wether to fix the feature of the barycenter during the updates
- init_C : ndarray, shape (N,N), optional
- initialization for the barycenters' structure matrix. If not set random init
- init_X : ndarray, shape (N,d), optional
- initialization for the barycenters' features. If not set random init
+ Alpha parameter for the fgw distance
+ fixed_structure : bool
+ Whether to fix the structure of the barycenter during the updates
+ fixed_features : bool
+ Whether to fix the feature of the barycenter during the updates
+ init_C : ndarray, shape (N,N), optional
+ Initialization for the barycenters' structure matrix. If not set
+ a random init is used.
+ init_X : ndarray, shape (N,d), optional
+ Initialization for the barycenters' features. If not set a
+ random init is used.
+
Returns
- ----------
- X : ndarray, shape (N,d)
+ -------
+ X : ndarray, shape (N, d)
Barycenters' features
- C : ndarray, shape (N,N)
+ C : ndarray, shape (N, N)
Barycenters' structure matrix
- log_: dictionary
- Only returned when log=True
+ log_: dict
+ Only returned when log=True. It contains the keys:
T : list of (N,ns) transport matrices
- Ms : all distance matrices between the feature of the barycenter and the other features dist(X,Ys) shape (N,ns)
+ Ms : all distance matrices between the feature of the barycenter and the
+ other features dist(X,Ys) shape (N,ns)
+
References
----------
.. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
- and Courty Nicolas
+ and Courty Nicolas
"Optimal Transport for structured data with application on graphs"
International Conference on Machine Learning (ICML). 2019.
"""
-
S = len(Cs)
d = Ys[0].shape[1] # dimension on the node features
if p is None:
@@ -1073,7 +1084,8 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_
T_temp = [t.T for t in T]
C = update_sructure_matrix(p, lambdas, T_temp, Cs)
- T = [fused_gromov_wasserstein((1 - alpha) * Ms[s], C, Cs[s], p, ps[s], loss_fun, alpha, numItermax=max_iter, stopThr=1e-5, verbose=verbose) for s in range(S)]
+ T = [fused_gromov_wasserstein((1 - alpha) * Ms[s], C, Cs[s], p, ps[s], loss_fun, alpha,
+ numItermax=max_iter, stopThr=1e-5, verbose=verbose) for s in range(S)]
# T is N,ns
err_feature = np.linalg.norm(X - Xprev.reshape(N, d))
@@ -1092,6 +1104,7 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_
print('{:5d}|{:8e}|'.format(cpt, err_feature))
cpt += 1
+
if log:
log_['T'] = T # from target to Ys
log_['p'] = p
@@ -1104,23 +1117,25 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_
def update_sructure_matrix(p, lambdas, T, Cs):
- """
- Updates C according to the L2 Loss kernel with the S Ts couplings
- calculated at each iteration
+ """Updates C according to the L2 Loss kernel with the S Ts couplings.
+
+ It is calculated at each iteration
+
Parameters
----------
- p : ndarray, shape (N,)
- masses in the targeted barycenter
+ p : ndarray, shape (N,)
+ Masses in the targeted barycenter.
lambdas : list of float
- list of the S spaces' weights
- T : list of S np.ndarray(ns,N)
- the S Ts couplings calculated at each iteration
- Cs : list of S ndarray, shape(ns,ns)
- Metric cost matrices
+ List of the S spaces' weights.
+ T : list of S ndarray of shape (ns, N)
+ The S Ts couplings calculated at each iteration.
+ Cs : list of S ndarray, shape (ns, ns)
+ Metric cost matrices.
+
Returns
- ----------
- C : ndarray, shape (nt,nt)
- updated C matrix
+ -------
+ C : ndarray, shape (nt, nt)
+ Updated C matrix.
"""
tmpsum = sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s]) for s in range(len(T))])
ppt = np.outer(p, p)
@@ -1129,22 +1144,26 @@ def update_sructure_matrix(p, lambdas, T, Cs):
def update_feature_matrix(lambdas, Ys, Ts, p):
- """
- Updates the feature with respect to the S Ts couplings. See "Solving the barycenter problem with Block Coordinate Descent (BCD)" in [24]
- calculated at each iteration
+ """Updates the feature with respect to the S Ts couplings.
+
+
+ See "Solving the barycenter problem with Block Coordinate Descent (BCD)"
+ in [24] calculated at each iteration
+
Parameters
----------
- p : ndarray, shape (N,)
- masses in the targeted barycenter
+ p : ndarray, shape (N,)
+ masses in the targeted barycenter
lambdas : list of float
- list of the S spaces' weights
+ List of the S spaces' weights
Ts : list of S np.ndarray(ns,N)
the S Ts couplings calculated at each iteration
Ys : list of S ndarray, shape(d,ns)
- The features
+ The features.
+
Returns
- ----------
- X : ndarray, shape (d,N)
+ -------
+ X : ndarray, shape (d, N)
References
----------
@@ -1153,9 +1172,8 @@ def update_feature_matrix(lambdas, Ys, Ts, p):
"Optimal Transport for structured data with application on graphs"
International Conference on Machine Learning (ICML). 2019.
"""
+ p = np.array(1. / p).reshape(-1,)
- p = np.diag(np.array(1 / p).reshape(-1,))
-
- tmpsum = sum([lambdas[s] * np.dot(Ys[s], Ts[s].T).dot(p) for s in range(len(Ts))])
+ tmpsum = sum([lambdas[s] * np.dot(Ys[s], Ts[s].T) * p[None, :] for s in range(len(Ts))])
return tmpsum
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 02cbd8c..17f1731 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -1,6 +1,9 @@
# -*- coding: utf-8 -*-
"""
Solvers for the original linear program OT problem
+
+
+
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
@@ -10,20 +13,22 @@ Solvers for the original linear program OT problem
import multiprocessing
import numpy as np
+from scipy.sparse import coo_matrix
from .import cvx
# import compiled emd
-from .emd_wrap import emd_c, check_result
+from .emd_wrap import emd_c, check_result, emd_1d_sorted
from ..utils import parmap
from .cvx import barycenter
from ..utils import dist
-__all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx']
+__all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
+ 'emd_1d', 'emd2_1d', 'wasserstein_1d']
def emd(a, b, M, numItermax=100000, log=False):
- """Solves the Earth Movers distance problem and returns the OT matrix
+ r"""Solves the Earth Movers distance problem and returns the OT matrix
.. math::
@@ -37,26 +42,30 @@ def emd(a, b, M, numItermax=100000, log=False):
- M is the metric cost matrix
- a and b are the sample weights
+ .. warning::
+ Note that the M matrix needs to be a C-order numpy.array in float64
+ format.
+
Uses the algorithm proposed in [1]_
Parameters
----------
- a : (ns,) ndarray, float64
- Source histogram (uniform weigth if empty list)
- b : (nt,) ndarray, float64
- Target histogram (uniform weigth if empty list)
- M : (ns,nt) ndarray, float64
- loss matrix
+ a : (ns,) numpy.ndarray, float64
+ Source histogram (uniform weight if empty list)
+ b : (nt,) numpy.ndarray, float64
+ Target histogram (uniform weight if empty list)
+ M : (ns,nt) numpy.ndarray, float64
+ Loss matrix (c-order array with type float64)
numItermax : int, optional (default=100000)
The maximum number of iterations before stopping the optimization
algorithm if it has not converged.
- log: boolean, optional (default=False)
+ log: bool, optional (default=False)
If True, returns a dictionary containing the cost and dual
variables. Otherwise returns only the optimal transportation matrix.
Returns
-------
- gamma: (ns x nt) ndarray
+ gamma: (ns x nt) numpy.ndarray
Optimal transportation matrix for the given parameters
log: dict
If input log is true, a dictionary containing the cost and dual
@@ -74,8 +83,8 @@ def emd(a, b, M, numItermax=100000, log=False):
>>> b=[.5,.5]
>>> M=[[0.,1.],[1.,0.]]
>>> ot.emd(a,b,M)
- array([[ 0.5, 0. ],
- [ 0. , 0.5]])
+ array([[0.5, 0. ],
+ [0. , 0.5]])
References
----------
@@ -94,7 +103,7 @@ def emd(a, b, M, numItermax=100000, log=False):
b = np.asarray(b, dtype=np.float64)
M = np.asarray(M, dtype=np.float64)
- # if empty array given then use unifor distributions
+ # if empty array given then use uniform distributions
if len(a) == 0:
a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
if len(b) == 0:
@@ -115,7 +124,7 @@ def emd(a, b, M, numItermax=100000, log=False):
def emd2(a, b, M, processes=multiprocessing.cpu_count(),
numItermax=100000, log=False, return_matrix=False):
- """Solves the Earth Movers distance problem and returns the loss
+ r"""Solves the Earth Movers distance problem and returns the loss
.. math::
\gamma = arg\min_\gamma <\gamma,M>_F
@@ -128,16 +137,20 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
- M is the metric cost matrix
- a and b are the sample weights
+ .. warning::
+ Note that the M matrix needs to be a C-order numpy.array in float64
+ format.
+
Uses the algorithm proposed in [1]_
Parameters
----------
- a : (ns,) ndarray, float64
- Source histogram (uniform weigth if empty list)
- b : (nt,) ndarray, float64
- Target histogram (uniform weigth if empty list)
- M : (ns,nt) ndarray, float64
- loss matrix
+ a : (ns,) numpy.ndarray, float64
+ Source histogram (uniform weight if empty list)
+ b : (nt,) numpy.ndarray, float64
+ Target histogram (uniform weight if empty list)
+ M : (ns,nt) numpy.ndarray, float64
+ Loss matrix (c-order array with type float64)
numItermax : int, optional (default=100000)
The maximum number of iterations before stopping the optimization
algorithm if it has not converged.
@@ -151,7 +164,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
-------
gamma: (ns x nt) ndarray
Optimal transportation matrix for the given parameters
- log: dict
+ log: dictnp
If input log is true, a dictionary containing the cost and dual
variables and exit status
@@ -187,7 +200,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
b = np.asarray(b, dtype=np.float64)
M = np.asarray(M, dtype=np.float64)
- # if empty array given then use unifor distributions
+ # if empty array given then use uniform distributions
if len(a) == 0:
a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
if len(b) == 0:
@@ -231,9 +244,9 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
Parameters
----------
- measures_locations : list of (k_i,d) np.ndarray
+ measures_locations : list of (k_i,d) numpy.ndarray
The discrete support of a measure supported on k_i locations of a d-dimensional space (k_i can be different for each element of the list)
- measures_weights : list of (k_i,) np.ndarray
+ measures_weights : list of (k_i,) numpy.ndarray
Numpy arrays where each numpy array has k_i non-negatives values summing to one representing the weights of each discrete input measure
X_init : (k,d) np.ndarray
@@ -246,7 +259,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
numItermax : int, optional
Max number of iterations
stopThr : float, optional
- Stop threshol on error (>0)
+ Stop threshold on error (>0)
verbose : bool, optional
Print information along iterations
log : bool, optional
@@ -308,4 +321,288 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
log_dict['displacement_square_norms'] = displacement_square_norms
return X, log_dict
else:
- return X \ No newline at end of file
+ return X
+
+
+def emd_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
+ log=False):
+ r"""Solves the Earth Movers distance problem between 1d measures and returns
+ the OT matrix
+
+
+ .. math::
+ \gamma = arg\min_\gamma \sum_i \sum_j \gamma_{ij} d(x_a[i], x_b[j])
+
+ s.t. \gamma 1 = a,
+ \gamma^T 1= b,
+ \gamma\geq 0
+ where :
+
+ - d is the metric
+ - x_a and x_b are the samples
+ - a and b are the sample weights
+
+ When 'minkowski' is used as a metric, :math:`d(x, y) = |x - y|^p`.
+
+ Uses the algorithm detailed in [1]_
+
+ Parameters
+ ----------
+ x_a : (ns,) or (ns, 1) ndarray, float64
+ Source dirac locations (on the real line)
+ x_b : (nt,) or (ns, 1) ndarray, float64
+ Target dirac locations (on the real line)
+ a : (ns,) ndarray, float64, optional
+ Source histogram (default is uniform weight)
+ b : (nt,) ndarray, float64, optional
+ Target histogram (default is uniform weight)
+ metric: str, optional (default='sqeuclidean')
+ Metric to be used. Only strings listed in :func:`ot.dist` are accepted.
+ Due to implementation details, this function runs faster when
+ `'sqeuclidean'`, `'cityblock'`, or `'euclidean'` metrics are used.
+ p: float, optional (default=1.0)
+ The p-norm to apply for if metric='minkowski'
+ dense: boolean, optional (default=True)
+ If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
+ Otherwise returns a sparse representation using scipy's `coo_matrix`
+ format. Due to implementation details, this function runs faster when
+ `'sqeuclidean'`, `'minkowski'`, `'cityblock'`, or `'euclidean'` metrics
+ are used.
+ log: boolean, optional (default=False)
+ If True, returns a dictionary containing the cost.
+ Otherwise returns only the optimal transportation matrix.
+
+ Returns
+ -------
+ gamma: (ns, nt) ndarray
+ Optimal transportation matrix for the given parameters
+ log: dict
+ If input log is True, a dictionary containing the cost
+
+
+ Examples
+ --------
+
+ Simple example with obvious solution. The function emd_1d accepts lists and
+ performs automatic conversion to numpy arrays
+
+ >>> import ot
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> x_a = [2., 0.]
+ >>> x_b = [0., 3.]
+ >>> ot.emd_1d(x_a, x_b, a, b)
+ array([[0. , 0.5],
+ [0.5, 0. ]])
+ >>> ot.emd_1d(x_a, x_b)
+ array([[0. , 0.5],
+ [0.5, 0. ]])
+
+ References
+ ----------
+
+ .. [1] Peyré, G., & Cuturi, M. (2017). "Computational Optimal
+ Transport", 2018.
+
+ See Also
+ --------
+ ot.lp.emd : EMD for multidimensional distributions
+ ot.lp.emd2_1d : EMD for 1d distributions (returns cost instead of the
+ transportation matrix)
+ """
+ a = np.asarray(a, dtype=np.float64)
+ b = np.asarray(b, dtype=np.float64)
+ x_a = np.asarray(x_a, dtype=np.float64)
+ x_b = np.asarray(x_b, dtype=np.float64)
+
+ assert (x_a.ndim == 1 or x_a.ndim == 2 and x_a.shape[1] == 1), \
+ "emd_1d should only be used with monodimensional data"
+ assert (x_b.ndim == 1 or x_b.ndim == 2 and x_b.shape[1] == 1), \
+ "emd_1d should only be used with monodimensional data"
+
+ # if empty array given then use uniform distributions
+ if a.ndim == 0 or len(a) == 0:
+ a = np.ones((x_a.shape[0],), dtype=np.float64) / x_a.shape[0]
+ if b.ndim == 0 or len(b) == 0:
+ b = np.ones((x_b.shape[0],), dtype=np.float64) / x_b.shape[0]
+
+ x_a_1d = x_a.reshape((-1, ))
+ x_b_1d = x_b.reshape((-1, ))
+ perm_a = np.argsort(x_a_1d)
+ perm_b = np.argsort(x_b_1d)
+
+ G_sorted, indices, cost = emd_1d_sorted(a, b,
+ x_a_1d[perm_a], x_b_1d[perm_b],
+ metric=metric, p=p)
+ G = coo_matrix((G_sorted, (perm_a[indices[:, 0]], perm_b[indices[:, 1]])),
+ shape=(a.shape[0], b.shape[0]))
+ if dense:
+ G = G.toarray()
+ if log:
+ log = {'cost': cost}
+ return G, log
+ return G
+
+
+def emd2_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
+ log=False):
+ r"""Solves the Earth Movers distance problem between 1d measures and returns
+ the loss
+
+
+ .. math::
+ \gamma = arg\min_\gamma \sum_i \sum_j \gamma_{ij} d(x_a[i], x_b[j])
+
+ s.t. \gamma 1 = a,
+ \gamma^T 1= b,
+ \gamma\geq 0
+ where :
+
+ - d is the metric
+ - x_a and x_b are the samples
+ - a and b are the sample weights
+
+ When 'minkowski' is used as a metric, :math:`d(x, y) = |x - y|^p`.
+
+ Uses the algorithm detailed in [1]_
+
+ Parameters
+ ----------
+ x_a : (ns,) or (ns, 1) ndarray, float64
+ Source dirac locations (on the real line)
+ x_b : (nt,) or (ns, 1) ndarray, float64
+ Target dirac locations (on the real line)
+ a : (ns,) ndarray, float64, optional
+ Source histogram (default is uniform weight)
+ b : (nt,) ndarray, float64, optional
+ Target histogram (default is uniform weight)
+ metric: str, optional (default='sqeuclidean')
+ Metric to be used. Only strings listed in :func:`ot.dist` are accepted.
+ Due to implementation details, this function runs faster when
+ `'sqeuclidean'`, `'minkowski'`, `'cityblock'`, or `'euclidean'` metrics
+ are used.
+ p: float, optional (default=1.0)
+ The p-norm to apply for if metric='minkowski'
+ dense: boolean, optional (default=True)
+ If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
+ Otherwise returns a sparse representation using scipy's `coo_matrix`
+ format. Only used if log is set to True. Due to implementation details,
+ this function runs faster when dense is set to False.
+ log: boolean, optional (default=False)
+ If True, returns a dictionary containing the transportation matrix.
+ Otherwise returns only the loss.
+
+ Returns
+ -------
+ loss: float
+ Cost associated to the optimal transportation
+ log: dict
+ If input log is True, a dictionary containing the Optimal transportation
+ matrix for the given parameters
+
+
+ Examples
+ --------
+
+ Simple example with obvious solution. The function emd2_1d accepts lists and
+ performs automatic conversion to numpy arrays
+
+ >>> import ot
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> x_a = [2., 0.]
+ >>> x_b = [0., 3.]
+ >>> ot.emd2_1d(x_a, x_b, a, b)
+ 0.5
+ >>> ot.emd2_1d(x_a, x_b)
+ 0.5
+
+ References
+ ----------
+
+ .. [1] Peyré, G., & Cuturi, M. (2017). "Computational Optimal
+ Transport", 2018.
+
+ See Also
+ --------
+ ot.lp.emd2 : EMD for multidimensional distributions
+ ot.lp.emd_1d : EMD for 1d distributions (returns the transportation matrix
+ instead of the cost)
+ """
+ # If we do not return G (log==False), then we should not to cast it to dense
+ # (useless overhead)
+ G, log_emd = emd_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric=metric, p=p,
+ dense=dense and log, log=True)
+ cost = log_emd['cost']
+ if log:
+ log_emd = {'G': G}
+ return cost, log_emd
+ return cost
+
+
+def wasserstein_1d(x_a, x_b, a=None, b=None, p=1.):
+ r"""Solves the p-Wasserstein distance problem between 1d measures and returns
+ the distance
+
+ .. math::
+ \min_\gamma \left( \sum_i \sum_j \gamma_{ij} \|x_a[i] - x_b[j]\|^p \right)^{1/p}
+
+ s.t. \gamma 1 = a,
+ \gamma^T 1= b,
+ \gamma\geq 0
+
+ where :
+
+ - x_a and x_b are the samples
+ - a and b are the sample weights
+
+ Uses the algorithm detailed in [1]_
+
+ Parameters
+ ----------
+ x_a : (ns,) or (ns, 1) ndarray, float64
+ Source dirac locations (on the real line)
+ x_b : (nt,) or (ns, 1) ndarray, float64
+ Target dirac locations (on the real line)
+ a : (ns,) ndarray, float64, optional
+ Source histogram (default is uniform weight)
+ b : (nt,) ndarray, float64, optional
+ Target histogram (default is uniform weight)
+ p: float, optional (default=1.0)
+ The order of the p-Wasserstein distance to be computed
+
+ Returns
+ -------
+ dist: float
+ p-Wasserstein distance
+
+
+ Examples
+ --------
+
+ Simple example with obvious solution. The function wasserstein_1d accepts
+ lists and performs automatic conversion to numpy arrays
+
+ >>> import ot
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> x_a = [2., 0.]
+ >>> x_b = [0., 3.]
+ >>> ot.wasserstein_1d(x_a, x_b, a, b)
+ 0.5
+ >>> ot.wasserstein_1d(x_a, x_b)
+ 0.5
+
+ References
+ ----------
+
+ .. [1] Peyré, G., & Cuturi, M. (2017). "Computational Optimal
+ Transport", 2018.
+
+ See Also
+ --------
+ ot.lp.emd_1d : EMD for 1d distributions
+ """
+ cost_emd = emd2_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
+ dense=False, log=False)
+ return np.power(cost_emd, 1. / p)
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index 83ee6aa..2b6c495 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -10,7 +10,10 @@ Cython linker with C solver
import numpy as np
cimport numpy as np
+from ..utils import dist
+
cimport cython
+cimport libc.math as math
import warnings
@@ -55,13 +58,16 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
- M is the metric cost matrix
- a and b are the sample weights
+ .. warning::
+ Note that the M matrix needs to be a C-order :py.cls:`numpy.array`
+
Parameters
----------
- a : (ns,) ndarray, float64
+ a : (ns,) numpy.ndarray, float64
source histogram
- b : (nt,) ndarray, float64
+ b : (nt,) numpy.ndarray, float64
target histogram
- M : (ns,nt) ndarray, float64
+ M : (ns,nt) numpy.ndarray, float64
loss matrix
max_iter : int
The maximum number of iterations before stopping the optimization
@@ -70,7 +76,7 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
Returns
-------
- gamma: (ns x nt) ndarray
+ gamma: (ns x nt) numpy.ndarray
Optimal transportation matrix for the given parameters
"""
@@ -93,3 +99,89 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
cdef int result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
return G, cost, alpha, beta, result_code
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def emd_1d_sorted(np.ndarray[double, ndim=1, mode="c"] u_weights,
+ np.ndarray[double, ndim=1, mode="c"] v_weights,
+ np.ndarray[double, ndim=1, mode="c"] u,
+ np.ndarray[double, ndim=1, mode="c"] v,
+ str metric='sqeuclidean',
+ double p=1.):
+ r"""
+ Solves the Earth Movers distance problem between sorted 1d measures and
+ returns the OT matrix and the associated cost
+
+ Parameters
+ ----------
+ u_weights : (ns,) ndarray, float64
+ Source histogram
+ v_weights : (nt,) ndarray, float64
+ Target histogram
+ u : (ns,) ndarray, float64
+ Source dirac locations (on the real line)
+ v : (nt,) ndarray, float64
+ Target dirac locations (on the real line)
+ metric: str, optional (default='sqeuclidean')
+ Metric to be used. Only strings listed in :func:`ot.dist` are accepted.
+ Due to implementation details, this function runs faster when
+ `'sqeuclidean'`, `'minkowski'`, `'cityblock'`, or `'euclidean'` metrics
+ are used.
+ p: float, optional (default=1.0)
+ The p-norm to apply for if metric='minkowski'
+
+ Returns
+ -------
+ gamma: (n, ) ndarray, float64
+ Values in the Optimal transportation matrix
+ indices: (n, 2) ndarray, int64
+ Indices of the values stored in gamma for the Optimal transportation
+ matrix
+ cost
+ cost associated to the optimal transportation
+ """
+ cdef double cost = 0.
+ cdef int n = u_weights.shape[0]
+ cdef int m = v_weights.shape[0]
+
+ cdef int i = 0
+ cdef double w_i = u_weights[0]
+ cdef int j = 0
+ cdef double w_j = v_weights[0]
+
+ cdef double m_ij = 0.
+
+ cdef np.ndarray[double, ndim=1, mode="c"] G = np.zeros((n + m - 1, ),
+ dtype=np.float64)
+ cdef np.ndarray[long, ndim=2, mode="c"] indices = np.zeros((n + m - 1, 2),
+ dtype=np.int)
+ cdef int cur_idx = 0
+ while i < n and j < m:
+ if metric == 'sqeuclidean':
+ m_ij = (u[i] - v[j]) * (u[i] - v[j])
+ elif metric == 'cityblock' or metric == 'euclidean':
+ m_ij = math.fabs(u[i] - v[j])
+ elif metric == 'minkowski':
+ m_ij = math.pow(math.fabs(u[i] - v[j]), p)
+ else:
+ m_ij = dist(u[i].reshape((1, 1)), v[j].reshape((1, 1)),
+ metric=metric)[0, 0]
+ if w_i < w_j or j == m - 1:
+ cost += m_ij * w_i
+ G[cur_idx] = w_i
+ indices[cur_idx, 0] = i
+ indices[cur_idx, 1] = j
+ i += 1
+ w_j -= w_i
+ w_i = u_weights[i]
+ else:
+ cost += m_ij * w_j
+ G[cur_idx] = w_j
+ indices[cur_idx, 0] = i
+ indices[cur_idx, 1] = j
+ j += 1
+ w_i -= w_j
+ w_j = v_weights[j]
+ cur_idx += 1
+ return G[:cur_idx], indices[:cur_idx], cost
diff --git a/ot/optim.py b/ot/optim.py
index f94aceb..0abd9e9 100644
--- a/ot/optim.py
+++ b/ot/optim.py
@@ -26,14 +26,13 @@ def line_search_armijo(f, xk, pk, gfk, old_fval,
Parameters
----------
-
- f : function
+ f : callable
loss function
- xk : np.ndarray
+ xk : ndarray
initial position
- pk : np.ndarray
+ pk : ndarray
descent direction
- gfk : np.ndarray
+ gfk : ndarray
gradient of f at xk
old_fval : float
loss value at xk
@@ -161,15 +160,15 @@ def cg(a, b, M, reg, f, df, G0=None, numItermax=200,
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (ns,)
samples weights in the source domain
- b : np.ndarray (nt,)
+ b : ndarray, shape (nt,)
samples in the target domain
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (ns, nt)
loss matrix
reg : float
Regularization term >0
- G0 : np.ndarray (ns,nt), optional
+ G0 : ndarray, shape (ns,nt), optional
initial guess (default is indep joint density)
numItermax : int, optional
Max number of iterations
@@ -299,17 +298,17 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10,
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (ns,)
samples weights in the source domain
- b : np.ndarray (nt,)
+ b : ndarrayv (nt,)
samples in the target domain
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (ns, nt)
loss matrix
reg1 : float
Entropic Regularization term >0
reg2 : float
Second Regularization term >0
- G0 : np.ndarray (ns,nt), optional
+ G0 : ndarray, shape (ns, nt), optional
initial guess (default is indep joint density)
numItermax : int, optional
Max number of iterations
@@ -326,15 +325,13 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10,
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (ns, nt)
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
-
References
----------
-
.. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
.. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567.
@@ -422,13 +419,12 @@ def solve_1d_linesearch_quad(a, b, c):
Parameters
----------
a,b,c : float
- The coefficients of the quadratic function
+ The coefficients of the quadratic function
Returns
-------
x : float
The optimal value which leads to the minimal cost
-
"""
f0 = c
df0 = b
diff --git a/ot/plot.py b/ot/plot.py
index 784a372..f403e98 100644
--- a/ot/plot.py
+++ b/ot/plot.py
@@ -1,5 +1,11 @@
"""
Functions for plotting OT matrices
+
+.. warning::
+ Note that by default the module is not import in :mod:`ot`. In order to
+ use it you need to explicitely import :mod:`ot.plot`
+
+
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
@@ -20,11 +26,11 @@ def plot1D_mat(a, b, M, title=''):
Parameters
----------
- a : np.array, shape (na,)
+ a : ndarray, shape (na,)
Source distribution
- b : np.array, shape (nb,)
+ b : ndarray, shape (nb,)
Target distribution
- M : np.array, shape (na,nb)
+ M : ndarray, shape (na, nb)
Matrix to plot
"""
na, nb = M.shape
diff --git a/ot/stochastic.py b/ot/stochastic.py
index 85c4230..13ed9cc 100644
--- a/ot/stochastic.py
+++ b/ot/stochastic.py
@@ -1,3 +1,9 @@
+"""
+Stochastic solvers for regularized OT.
+
+
+"""
+
# Author: Kilian Fatras <kilian.fatras@gmail.com>
#
# License: MIT License
@@ -11,7 +17,7 @@ import numpy as np
def coordinate_grad_semi_dual(b, M, reg, beta, i):
- '''
+ r'''
Compute the coordinate gradient update for regularized discrete distributions for (i, :)
The function computes the gradient of the semi dual problem:
@@ -32,51 +38,49 @@ def coordinate_grad_semi_dual(b, M, reg, beta, i):
Parameters
----------
-
- b : np.ndarray(nt,)
- target measure
- M : np.ndarray(ns, nt)
- cost matrix
- reg : float nu
- Regularization term > 0
- v : np.ndarray(nt,)
- dual variable
- i : number int
- picked number i
+ b : ndarray, shape (nt,)
+ Target measure.
+ M : ndarray, shape (ns, nt)
+ Cost matrix.
+ reg : float
+ Regularization term > 0.
+ v : ndarray, shape (nt,)
+ Dual variable.
+ i : int
+ Picked number i.
Returns
-------
-
- coordinate gradient : np.ndarray(nt,)
+ coordinate gradient : ndarray, shape (nt,)
Examples
--------
-
+ >>> import ot
+ >>> np.random.seed(0)
>>> n_source = 7
>>> n_target = 4
- >>> reg = 1
- >>> numItermax = 300000
>>> a = ot.utils.unif(n_source)
>>> b = ot.utils.unif(n_target)
- >>> rng = np.random.RandomState(0)
- >>> X_source = rng.randn(n_source, 2)
- >>> Y_target = rng.randn(n_target, 2)
+ >>> X_source = np.random.randn(n_source, 2)
+ >>> Y_target = np.random.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> method = "ASGD"
- >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
- method, numItermax)
- >>> print(asgd_pi)
+ >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000)
+ array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06],
+ [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03],
+ [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07],
+ [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04],
+ [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01],
+ [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01],
+ [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
+
References
----------
-
[Genevay et al., 2016] :
- Stochastic Optimization for Large-scale Optimal Transport,
- Advances in Neural Information Processing Systems (2016),
- arXiv preprint arxiv:1605.08527.
-
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
'''
-
r = M[i, :] - beta
exp_beta = np.exp(-r / reg) * b
khi = exp_beta / (np.sum(exp_beta))
@@ -84,7 +88,7 @@ def coordinate_grad_semi_dual(b, M, reg, beta, i):
def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
- '''
+ r'''
Compute the SAG algorithm to solve the regularized discrete measures
optimal transport max problem
@@ -112,42 +116,43 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
Parameters
----------
- a : np.ndarray(ns,),
- source measure
- b : np.ndarray(nt,),
- target measure
- M : np.ndarray(ns, nt),
- cost matrix
- reg : float number,
+ a : ndarray, shape (ns,),
+ Source measure.
+ b : ndarray, shape (nt,),
+ Target measure.
+ M : ndarray, shape (ns, nt),
+ Cost matrix.
+ reg : float
Regularization term > 0
- numItermax : int number
- number of iteration
- lr : float number
- learning rate
+ numItermax : int
+ Number of iteration.
+ lr : float
+ Learning rate.
Returns
-------
-
- v : np.ndarray(nt,)
- dual variable
+ v : ndarray, shape (nt,)
+ Dual variable.
Examples
--------
-
+ >>> import ot
+ >>> np.random.seed(0)
>>> n_source = 7
>>> n_target = 4
- >>> reg = 1
- >>> numItermax = 300000
>>> a = ot.utils.unif(n_source)
>>> b = ot.utils.unif(n_target)
- >>> rng = np.random.RandomState(0)
- >>> X_source = rng.randn(n_source, 2)
- >>> Y_target = rng.randn(n_target, 2)
+ >>> X_source = np.random.randn(n_source, 2)
+ >>> Y_target = np.random.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> method = "ASGD"
- >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
- method, numItermax)
- >>> print(asgd_pi)
+ >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000)
+ array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06],
+ [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03],
+ [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07],
+ [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04],
+ [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01],
+ [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01],
+ [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
References
----------
@@ -176,7 +181,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
- '''
+ r'''
Compute the ASGD algorithm to solve the regularized semi continous measures optimal transport max problem
The function solves the following optimization problem:
@@ -202,50 +207,49 @@ def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
Parameters
----------
-
- b : np.ndarray(nt,)
+ b : ndarray, shape (nt,)
target measure
- M : np.ndarray(ns, nt)
+ M : ndarray, shape (ns, nt)
cost matrix
- reg : float number
+ reg : float
Regularization term > 0
- numItermax : int number
- number of iteration
- lr : float number
- learning rate
-
+ numItermax : int
+ Number of iteration.
+ lr : float
+ Learning rate.
Returns
-------
-
- ave_v : np.ndarray(nt,)
+ ave_v : ndarray, shape (nt,)
dual variable
Examples
--------
-
+ >>> import ot
+ >>> np.random.seed(0)
>>> n_source = 7
>>> n_target = 4
- >>> reg = 1
- >>> numItermax = 300000
>>> a = ot.utils.unif(n_source)
>>> b = ot.utils.unif(n_target)
- >>> rng = np.random.RandomState(0)
- >>> X_source = rng.randn(n_source, 2)
- >>> Y_target = rng.randn(n_target, 2)
+ >>> X_source = np.random.randn(n_source, 2)
+ >>> Y_target = np.random.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> method = "ASGD"
- >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
- method, numItermax)
- >>> print(asgd_pi)
+ >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000)
+ array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06],
+ [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03],
+ [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07],
+ [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04],
+ [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01],
+ [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01],
+ [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
References
----------
[Genevay et al., 2016] :
- Stochastic Optimization for Large-scale Optimal Transport,
- Advances in Neural Information Processing Systems (2016),
- arXiv preprint arxiv:1605.08527.
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
'''
if lr is None:
@@ -264,7 +268,7 @@ def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
def c_transform_entropic(b, M, reg, beta):
- '''
+ r'''
The goal is to recover u from the c-transform.
The function computes the c_transform of a dual variable from the other
@@ -285,47 +289,47 @@ def c_transform_entropic(b, M, reg, beta):
Parameters
----------
-
- b : np.ndarray(nt,)
- target measure
- M : np.ndarray(ns, nt)
- cost matrix
+ b : ndarray, shape (nt,)
+ Target measure
+ M : ndarray, shape (ns, nt)
+ Cost matrix
reg : float
- regularization term > 0
- v : np.ndarray(nt,)
- dual variable
+ Regularization term > 0
+ v : ndarray, shape (nt,)
+ Dual variable.
Returns
-------
-
- u : np.ndarray(ns,)
- dual variable
+ u : ndarray, shape (ns,)
+ Dual variable.
Examples
--------
-
+ >>> import ot
+ >>> np.random.seed(0)
>>> n_source = 7
>>> n_target = 4
- >>> reg = 1
- >>> numItermax = 300000
>>> a = ot.utils.unif(n_source)
>>> b = ot.utils.unif(n_target)
- >>> rng = np.random.RandomState(0)
- >>> X_source = rng.randn(n_source, 2)
- >>> Y_target = rng.randn(n_target, 2)
+ >>> X_source = np.random.randn(n_source, 2)
+ >>> Y_target = np.random.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> method = "ASGD"
- >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
- method, numItermax)
- >>> print(asgd_pi)
+ >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000)
+ array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06],
+ [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03],
+ [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07],
+ [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04],
+ [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01],
+ [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01],
+ [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
References
----------
[Genevay et al., 2016] :
- Stochastic Optimization for Large-scale Optimal Transport,
- Advances in Neural Information Processing Systems (2016),
- arXiv preprint arxiv:1605.08527.
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
'''
n_source = np.shape(M)[0]
@@ -340,7 +344,7 @@ def c_transform_entropic(b, M, reg, beta):
def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
log=False):
- '''
+ r'''
Compute the transportation matrix to solve the regularized discrete
measures optimal transport max problem
@@ -367,52 +371,53 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
Parameters
----------
- a : np.ndarray(ns,)
+ a : ndarray, shape (ns,)
source measure
- b : np.ndarray(nt,)
+ b : ndarray, shape (nt,)
target measure
- M : np.ndarray(ns, nt)
+ M : ndarray, shape (ns, nt)
cost matrix
- reg : float number
+ reg : float
Regularization term > 0
methode : str
used method (SAG or ASGD)
- numItermax : int number
+ numItermax : int
number of iteration
- lr : float number
+ lr : float
learning rate
- n_source : int number
+ n_source : int
size of the source measure
- n_target : int number
+ n_target : int
size of the target measure
log : bool, optional
record log if True
Returns
-------
-
- pi : np.ndarray(ns, nt)
+ pi : ndarray, shape (ns, nt)
transportation matrix
log : dict
log dictionary return only if log==True in parameters
Examples
--------
-
+ >>> import ot
+ >>> np.random.seed(0)
>>> n_source = 7
>>> n_target = 4
- >>> reg = 1
- >>> numItermax = 300000
>>> a = ot.utils.unif(n_source)
>>> b = ot.utils.unif(n_target)
- >>> rng = np.random.RandomState(0)
- >>> X_source = rng.randn(n_source, 2)
- >>> Y_target = rng.randn(n_target, 2)
+ >>> X_source = np.random.randn(n_source, 2)
+ >>> Y_target = np.random.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> method = "ASGD"
- >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
- method, numItermax)
- >>> print(asgd_pi)
+ >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000)
+ array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06],
+ [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03],
+ [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07],
+ [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04],
+ [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01],
+ [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01],
+ [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
References
----------
@@ -451,7 +456,7 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
batch_beta):
- '''
+ r'''
Computes the partial gradient of the dual optimal transport problem.
For each (i,j) in a batch of coordinates, the partial gradients are :
@@ -478,53 +483,55 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
Parameters
----------
-
- a : np.ndarray(ns,)
+ a : ndarray, shape (ns,)
source measure
- b : np.ndarray(nt,)
+ b : ndarray, shape (nt,)
target measure
- M : np.ndarray(ns, nt)
+ M : ndarray, shape (ns, nt)
cost matrix
- reg : float number
+ reg : float
Regularization term > 0
- alpha : np.ndarray(ns,)
+ alpha : ndarray, shape (ns,)
dual variable
- beta : np.ndarray(nt,)
+ beta : ndarray, shape (nt,)
dual variable
- batch_size : int number
+ batch_size : int
size of the batch
- batch_alpha : np.ndarray(bs,)
+ batch_alpha : ndarray, shape (bs,)
batch of index of alpha
- batch_beta : np.ndarray(bs,)
+ batch_beta : ndarray, shape (bs,)
batch of index of beta
Returns
-------
-
- grad : np.ndarray(ns,)
+ grad : ndarray, shape (ns,)
partial grad F
Examples
--------
-
+ >>> import ot
+ >>> np.random.seed(0)
>>> n_source = 7
>>> n_target = 4
- >>> reg = 1
- >>> numItermax = 20000
- >>> lr = 0.1
- >>> batch_size = 3
- >>> log = True
>>> a = ot.utils.unif(n_source)
>>> b = ot.utils.unif(n_target)
- >>> rng = np.random.RandomState(0)
- >>> X_source = rng.randn(n_source, 2)
- >>> Y_target = rng.randn(n_target, 2)
+ >>> X_source = np.random.randn(n_source, 2)
+ >>> Y_target = np.random.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
- batch_size,
- numItermax, lr, log)
- >>> print(log['alpha'], log['beta'])
- >>> print(sgd_dual_pi)
+ >>> sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, M, reg=1, batch_size=3, numItermax=30000, lr=0.1, log=True)
+ >>> log['alpha']
+ array([0.71759102, 1.57057384, 0.85576566, 0.1208211 , 0.59190466,
+ 1.197148 , 0.17805133])
+ >>> log['beta']
+ array([0.49741367, 0.57478564, 1.40075528, 2.75890102])
+ >>> sgd_dual_pi
+ array([[2.09730063e-02, 8.38169324e-02, 7.50365455e-03, 8.72731415e-09],
+ [5.58432437e-03, 5.89881299e-04, 3.09558411e-05, 8.35469849e-07],
+ [3.26489515e-03, 7.15536035e-02, 2.99778211e-02, 3.02601593e-10],
+ [4.05390622e-02, 5.31085068e-02, 6.65191787e-02, 1.55812785e-06],
+ [7.82299812e-02, 6.12099102e-03, 4.44989098e-02, 2.37719187e-03],
+ [5.06266486e-02, 2.16230494e-03, 2.26215141e-03, 6.81514609e-04],
+ [6.06713990e-02, 3.98139808e-02, 5.46829338e-02, 8.62371424e-06]])
References
----------
@@ -533,7 +540,6 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
International Conference on Learning Representation (2018),
arXiv preprint arxiv:1711.02283.
'''
-
G = - (np.exp((alpha[batch_alpha, None] + beta[None, batch_beta] -
M[batch_alpha, :][:, batch_beta]) / reg) *
a[batch_alpha, None] * b[None, batch_beta])
@@ -548,7 +554,7 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
- '''
+ r'''
Compute the sgd algorithm to solve the regularized discrete measures
optimal transport dual problem
@@ -571,33 +577,31 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
Parameters
----------
-
- a : np.ndarray(ns,)
+ a : ndarray, shape (ns,)
source measure
- b : np.ndarray(nt,)
+ b : ndarray, shape (nt,)
target measure
- M : np.ndarray(ns, nt)
+ M : ndarray, shape (ns, nt)
cost matrix
- reg : float number
+ reg : float
Regularization term > 0
- batch_size : int number
+ batch_size : int
size of the batch
- numItermax : int number
+ numItermax : int
number of iteration
- lr : float number
+ lr : float
learning rate
Returns
-------
-
- alpha : np.ndarray(ns,)
+ alpha : ndarray, shape (ns,)
dual variable
- beta : np.ndarray(nt,)
+ beta : ndarray, shape (nt,)
dual variable
Examples
--------
-
+ >>> import ot
>>> n_source = 7
>>> n_target = 4
>>> reg = 1
@@ -611,18 +615,26 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
>>> X_source = rng.randn(n_source, 2)
>>> Y_target = rng.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
- batch_size,
- numItermax, lr, log)
- >>> print(log['alpha'], log['beta'])
- >>> print(sgd_dual_pi)
+ >>> sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size, numItermax, lr, log)
+ >>> log['alpha']
+ array([0.64171798, 1.27932201, 0.78132257, 0.15638935, 0.54888354,
+ 1.03663469, 0.20595781])
+ >>> log['beta']
+ array([0.51207194, 0.58033189, 1.28922676, 2.26859736])
+ >>> sgd_dual_pi
+ array([[1.97276541e-02, 7.81248547e-02, 6.22136048e-03, 4.95442423e-09],
+ [4.23494310e-03, 4.43286263e-04, 2.06927079e-05, 3.82389139e-07],
+ [3.07542414e-03, 6.67897769e-02, 2.48904999e-02, 1.72030247e-10],
+ [4.26271990e-02, 5.53375455e-02, 6.16535024e-02, 9.88812650e-07],
+ [7.60423265e-02, 5.89585256e-03, 3.81267087e-02, 1.39458256e-03],
+ [4.37557504e-02, 1.85189176e-03, 1.72335760e-03, 3.55491279e-04],
+ [6.33096109e-02, 4.11683954e-02, 5.02962051e-02, 5.43097516e-06]])
References
----------
-
[Seguy et al., 2018] :
- International Conference on Learning Representation (2018),
- arXiv preprint arxiv:1711.02283.
+ International Conference on Learning Representation (2018),
+ arXiv preprint arxiv:1711.02283.
'''
n_source = np.shape(M)[0]
@@ -644,7 +656,7 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
log=False):
- '''
+ r'''
Compute the transportation matrix to solve the regularized discrete measures
optimal transport dual problem
@@ -667,35 +679,33 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
Parameters
----------
-
- a : np.ndarray(ns,)
+ a : ndarray, shape (ns,)
source measure
- b : np.ndarray(nt,)
+ b : ndarray, shape (nt,)
target measure
- M : np.ndarray(ns, nt)
+ M : ndarray, shape (ns, nt)
cost matrix
- reg : float number
+ reg : float
Regularization term > 0
- batch_size : int number
+ batch_size : int
size of the batch
- numItermax : int number
+ numItermax : int
number of iteration
- lr : float number
+ lr : float
learning rate
log : bool, optional
record log if True
Returns
-------
-
- pi : np.ndarray(ns, nt)
+ pi : ndarray, shape (ns, nt)
transportation matrix
log : dict
log dictionary return only if log==True in parameters
Examples
--------
-
+ >>> import ot
>>> n_source = 7
>>> n_target = 4
>>> reg = 1
@@ -709,18 +719,27 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
>>> X_source = rng.randn(n_source, 2)
>>> Y_target = rng.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
- batch_size,
- numItermax, lr, log)
- >>> print(log['alpha'], log['beta'])
- >>> print(sgd_dual_pi)
+ >>> sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size, numItermax, lr, log)
+ >>> log['alpha']
+ array([0.64057733, 1.2683513 , 0.75610161, 0.16024284, 0.54926534,
+ 1.0514201 , 0.19958936])
+ >>> log['beta']
+ array([0.51372571, 0.58843489, 1.27993921, 2.24344807])
+ >>> sgd_dual_pi
+ array([[1.97377795e-02, 7.86706853e-02, 6.15682001e-03, 4.82586997e-09],
+ [4.19566963e-03, 4.42016865e-04, 2.02777272e-05, 3.68823708e-07],
+ [3.00379244e-03, 6.56562018e-02, 2.40462171e-02, 1.63579656e-10],
+ [4.28626062e-02, 5.60031599e-02, 6.13193826e-02, 9.67977735e-07],
+ [7.61972739e-02, 5.94609051e-03, 3.77886693e-02, 1.36046648e-03],
+ [4.44810042e-02, 1.89476742e-03, 1.73285847e-03, 3.51826036e-04],
+ [6.30118293e-02, 4.12398660e-02, 4.95148998e-02, 5.26247246e-06]])
References
----------
[Seguy et al., 2018] :
- International Conference on Learning Representation (2018),
- arXiv preprint arxiv:1711.02283.
+ International Conference on Learning Representation (2018),
+ arXiv preprint arxiv:1711.02283.
'''
opt_alpha, opt_beta = sgd_entropic_regularization(a, b, M, reg, batch_size,
diff --git a/ot/unbalanced.py b/ot/unbalanced.py
new file mode 100644
index 0000000..467fda2
--- /dev/null
+++ b/ot/unbalanced.py
@@ -0,0 +1,519 @@
+# -*- coding: utf-8 -*-
+"""
+Regularized Unbalanced OT
+"""
+
+# Author: Hicham Janati <hicham.janati@inria.fr>
+# License: MIT License
+
+from __future__ import division
+import warnings
+import numpy as np
+# from .utils import unif, dist
+
+
+def sinkhorn_unbalanced(a, b, M, reg, alpha, method='sinkhorn', numItermax=1000,
+ stopThr=1e-9, verbose=False, log=False, **kwargs):
+ r"""
+ Solve the unbalanced entropic regularization optimal transport problem and return the loss
+
+ The function solves the following optimization problem:
+
+ .. math::
+ W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + \\alpha KL(\gamma 1, a) + \\alpha KL(\gamma^T 1, b)
+
+ s.t.
+ \gamma\geq 0
+ where :
+
+ - M is the (ns, nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights
+ - KL is the Kullback-Leibler divergence
+
+ The algorithm used for solving the problem is the generalized Sinkhorn-Knopp matrix scaling algorithm as proposed in [10, 23]_
+
+
+ Parameters
+ ----------
+ a : np.ndarray (ns,)
+ samples weights in the source domain
+ b : np.ndarray (nt,) or np.ndarray (nt,n_hists)
+ samples in the target domain, compute sinkhorn with multiple targets
+ and fixed M if b is a matrix (return OT loss + dual variables in log)
+ M : np.ndarray (ns, nt)
+ loss matrix
+ reg : float
+ Entropy regularization term > 0
+ alpha : float
+ Marginal relaxation term > 0
+ method : str
+ method used for the solver either 'sinkhorn', 'sinkhorn_stabilized' or
+ 'sinkhorn_epsilon_scaling', see those function for specific parameters
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (> 0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ W : (nt) ndarray or float
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary return only if log==True in parameters
+
+ Examples
+ --------
+
+ >>> import ot
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.], [1., 0.]]
+ >>> ot.sinkhorn_unbalanced(a, b, M, 1, 1)
+ array([[0.51122823, 0.18807035],
+ [0.18807035, 0.51122823]])
+
+
+ References
+ ----------
+
+ .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+
+ .. [9] Schmitzer, B. (2016). Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. arXiv preprint arXiv:1610.06519.
+
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+
+ .. [25] Frogner C., Zhang C., Mobahi H., Araya-Polo M., Poggio T. : Learning with a Wasserstein Loss, Advances in Neural Information Processing Systems (NIPS) 2015
+
+
+ See Also
+ --------
+ ot.unbalanced.sinkhorn_knopp_unbalanced : Unbalanced Classic Sinkhorn [10]
+ ot.unbalanced.sinkhorn_stabilized_unbalanced: Unbalanced Stabilized sinkhorn [9][10]
+ ot.unbalanced.sinkhorn_epsilon_scaling_unbalanced: Unbalanced Sinkhorn with epslilon scaling [9][10]
+
+ """
+
+ if method.lower() == 'sinkhorn':
+ def sink():
+ return sinkhorn_knopp_unbalanced(a, b, M, reg, alpha,
+ numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose,
+ log=log, **kwargs)
+
+ elif method.lower() in ['sinkhorn_stabilized', 'sinkhorn_epsilon_scaling']:
+ warnings.warn('Method not implemented yet. Using classic Sinkhorn Knopp')
+
+ def sink():
+ return sinkhorn_knopp_unbalanced(a, b, M, reg, alpha,
+ numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose,
+ log=log, **kwargs)
+ else:
+ raise ValueError('Unknown method. Using classic Sinkhorn Knopp')
+
+ return sink()
+
+
+def sinkhorn_unbalanced2(a, b, M, reg, alpha, method='sinkhorn',
+ numItermax=1000, stopThr=1e-9, verbose=False,
+ log=False, **kwargs):
+ r"""
+ Solve the entropic regularization unbalanced optimal transport problem and return the loss
+
+ The function solves the following optimization problem:
+
+ .. math::
+ W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + \\alpha KL(\gamma 1, a) + \\alpha KL(\gamma^T 1, b)
+
+ s.t.
+ \gamma\geq 0
+ where :
+
+ - M is the (ns, nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights
+ - KL is the Kullback-Leibler divergence
+
+ The algorithm used for solving the problem is the generalized Sinkhorn-Knopp matrix scaling algorithm as proposed in [10, 23]_
+
+
+ Parameters
+ ----------
+ a : np.ndarray (ns,)
+ samples weights in the source domain
+ b : np.ndarray (nt,) or np.ndarray (nt, n_hists)
+ samples in the target domain, compute sinkhorn with multiple targets
+ and fixed M if b is a matrix (return OT loss + dual variables in log)
+ M : np.ndarray (ns,nt)
+ loss matrix
+ reg : float
+ Entropy regularization term > 0
+ alpha : float
+ Marginal relaxation term > 0
+ method : str
+ method used for the solver either 'sinkhorn', 'sinkhorn_stabilized' or
+ 'sinkhorn_epsilon_scaling', see those function for specific parameters
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ W : (nt) ndarray or float
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary return only if log==True in parameters
+
+ Examples
+ --------
+
+ >>> import ot
+ >>> a=[.5, .10]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.],[1., 0.]]
+ >>> ot.unbalanced.sinkhorn_unbalanced2(a, b, M, 1., 1.)
+ array([0.31912866])
+
+
+
+ References
+ ----------
+
+ .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+
+ .. [9] Schmitzer, B. (2016). Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. arXiv preprint arXiv:1610.06519.
+
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+
+ .. [25] Frogner C., Zhang C., Mobahi H., Araya-Polo M., Poggio T. : Learning with a Wasserstein Loss, Advances in Neural Information Processing Systems (NIPS) 2015
+
+ See Also
+ --------
+ ot.unbalanced.sinkhorn_knopp : Unbalanced Classic Sinkhorn [10]
+ ot.unbalanced.sinkhorn_stabilized: Unbalanced Stabilized sinkhorn [9][10]
+ ot.unbalanced.sinkhorn_epsilon_scaling: Unbalanced Sinkhorn with epslilon scaling [9][10]
+
+ """
+
+ if method.lower() == 'sinkhorn':
+ def sink():
+ return sinkhorn_knopp_unbalanced(a, b, M, reg, alpha,
+ numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose,
+ log=log, **kwargs)
+
+ elif method.lower() in ['sinkhorn_stabilized', 'sinkhorn_epsilon_scaling']:
+ warnings.warn('Method not implemented yet. Using classic Sinkhorn Knopp')
+
+ def sink():
+ return sinkhorn_knopp_unbalanced(a, b, M, reg, alpha,
+ numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose,
+ log=log, **kwargs)
+ else:
+ raise ValueError('Unknown method. Using classic Sinkhorn Knopp')
+
+ b = np.asarray(b, dtype=np.float64)
+ if len(b.shape) < 2:
+ b = b[:, None]
+
+ return sink()
+
+
+def sinkhorn_knopp_unbalanced(a, b, M, reg, alpha, numItermax=1000,
+ stopThr=1e-9, verbose=False, log=False, **kwargs):
+ r"""
+ Solve the entropic regularization unbalanced optimal transport problem and return the loss
+
+ The function solves the following optimization problem:
+
+ .. math::
+ W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + \\alpha KL(\gamma 1, a) + \\alpha KL(\gamma^T 1, b)
+
+ s.t.
+ \gamma\geq 0
+ where :
+
+ - M is the (ns, nt) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target weights
+ - KL is the Kullback-Leibler divergence
+
+ The algorithm used for solving the problem is the generalized Sinkhorn-Knopp matrix scaling algorithm as proposed in [10, 23]_
+
+
+ Parameters
+ ----------
+ a : np.ndarray (ns,)
+ samples weights in the source domain
+ b : np.ndarray (nt,) or np.ndarray (nt, n_hists)
+ samples in the target domain, compute sinkhorn with multiple targets
+ and fixed M if b is a matrix (return OT loss + dual variables in log)
+ M : np.ndarray (ns,nt)
+ loss matrix
+ reg : float
+ Entropy regularization term > 0
+ alpha : float
+ Marginal relaxation term > 0
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ gamma : (ns x nt) ndarray
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary return only if log==True in parameters
+
+ Examples
+ --------
+
+ >>> import ot
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.],[1., 0.]]
+ >>> ot.unbalanced.sinkhorn_knopp_unbalanced(a, b, M, 1., 1.)
+ array([[0.51122823, 0.18807035],
+ [0.18807035, 0.51122823]])
+
+ References
+ ----------
+
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+
+ .. [25] Frogner C., Zhang C., Mobahi H., Araya-Polo M., Poggio T. : Learning with a Wasserstein Loss, Advances in Neural Information Processing Systems (NIPS) 2015
+
+ See Also
+ --------
+ ot.lp.emd : Unregularized OT
+ ot.optim.cg : General regularized OT
+
+ """
+
+ a = np.asarray(a, dtype=np.float64)
+ b = np.asarray(b, dtype=np.float64)
+ M = np.asarray(M, dtype=np.float64)
+
+ n_a, n_b = M.shape
+
+ if len(a) == 0:
+ a = np.ones(n_a, dtype=np.float64) / n_a
+ if len(b) == 0:
+ b = np.ones(n_b, dtype=np.float64) / n_b
+
+ if len(b.shape) > 1:
+ n_hists = b.shape[1]
+ else:
+ n_hists = 0
+
+ if log:
+ log = {'err': []}
+
+ # we assume that no distances are null except those of the diagonal of
+ # distances
+ if n_hists:
+ u = np.ones((n_a, 1)) / n_a
+ v = np.ones((n_b, n_hists)) / n_b
+ a = a.reshape(n_a, 1)
+ else:
+ u = np.ones(n_a) / n_a
+ v = np.ones(n_b) / n_b
+
+ # print(reg)
+ # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
+ K = np.empty(M.shape, dtype=M.dtype)
+ np.divide(M, -reg, out=K)
+ np.exp(K, out=K)
+
+ # print(np.min(K))
+ fi = alpha / (alpha + reg)
+
+ cpt = 0
+ err = 1.
+
+ while (err > stopThr and cpt < numItermax):
+ uprev = u
+ vprev = v
+
+ Kv = K.dot(v)
+ u = (a / Kv) ** fi
+ Ktu = K.T.dot(u)
+ v = (b / Ktu) ** fi
+
+ if (np.any(Ktu == 0.)
+ or np.any(np.isnan(u)) or np.any(np.isnan(v))
+ or np.any(np.isinf(u)) or np.any(np.isinf(v))):
+ # we have reached the machine precision
+ # come back to previous solution and quit loop
+ warnings.warn('Numerical errors at iteration', cpt)
+ u = uprev
+ v = vprev
+ break
+ if cpt % 10 == 0:
+ # we can speed up the process by checking for the error only all
+ # the 10th iterations
+ err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
+ np.sum((v - vprev)**2) / np.sum((v)**2)
+ if log:
+ log['err'].append(err)
+ if verbose:
+ if cpt % 200 == 0:
+ print(
+ '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
+ cpt += 1
+
+ if log:
+ log['u'] = u
+ log['v'] = v
+
+ if n_hists: # return only loss
+ res = np.einsum('ik,ij,jk,ij->k', u, K, v, M)
+ if log:
+ return res, log
+ else:
+ return res
+
+ else: # return OT matrix
+
+ if log:
+ return u[:, None] * K * v[None, :], log
+ else:
+ return u[:, None] * K * v[None, :]
+
+
+def barycenter_unbalanced(A, M, reg, alpha, weights=None, numItermax=1000,
+ stopThr=1e-4, verbose=False, log=False):
+ r"""Compute the entropic regularized unbalanced wasserstein barycenter of distributions A
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \mathbf{a} = arg\min_\mathbf{a} \sum_i Wu_{reg}(\mathbf{a},\mathbf{a}_i)
+
+ where :
+
+ - :math:`Wu_{reg}(\cdot,\cdot)` is the unbalanced entropic regularized Wasserstein distance (see ot.unbalanced.sinkhorn_unbalanced)
+ - :math:`\mathbf{a}_i` are training distributions in the columns of matrix :math:`\mathbf{A}`
+ - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT
+ - alpha is the marginal relaxation hyperparameter
+ The algorithm used for solving the problem is the generalized Sinkhorn-Knopp matrix scaling algorithm as proposed in [10]_
+
+ Parameters
+ ----------
+ A : np.ndarray (d,n)
+ n training distributions a_i of size d
+ M : np.ndarray (d,d)
+ loss matrix for OT
+ reg : float
+ Entropy regularization term > 0
+ alpha : float
+ Marginal relaxation term > 0
+ weights : np.ndarray (n,)
+ Weights of each histogram a_i on the simplex (barycentric coodinates)
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ a : (d,) ndarray
+ Unbalanced Wasserstein barycenter
+ log : dict
+ log dictionary return only if log==True in parameters
+
+
+ References
+ ----------
+
+ .. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G. (2015). Iterative Bregman projections for regularized transportation problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+
+
+ """
+ p, n_hists = A.shape
+ if weights is None:
+ weights = np.ones(n_hists) / n_hists
+ else:
+ assert(len(weights) == A.shape[1])
+
+ if log:
+ log = {'err': []}
+
+ K = np.exp(- M / reg)
+
+ fi = alpha / (alpha + reg)
+
+ v = np.ones((p, n_hists)) / p
+ u = np.ones((p, 1)) / p
+
+ cpt = 0
+ err = 1.
+
+ while (err > stopThr and cpt < numItermax):
+ uprev = u
+ vprev = v
+
+ Kv = K.dot(v)
+ u = (A / Kv) ** fi
+ Ktu = K.T.dot(u)
+ q = ((Ktu ** (1 - fi)).dot(weights))
+ q = q ** (1 / (1 - fi))
+ Q = q[:, None]
+ v = (Q / Ktu) ** fi
+
+ if (np.any(Ktu == 0.)
+ or np.any(np.isnan(u)) or np.any(np.isnan(v))
+ or np.any(np.isinf(u)) or np.any(np.isinf(v))):
+ # we have reached the machine precision
+ # come back to previous solution and quit loop
+ warnings.warn('Numerical errors at iteration %s' % cpt)
+ u = uprev
+ v = vprev
+ break
+ if cpt % 10 == 0:
+ # we can speed up the process by checking for the error only all
+ # the 10th iterations
+ err = np.sum((u - uprev) ** 2) / np.sum((u) ** 2) + \
+ np.sum((v - vprev) ** 2) / np.sum((v) ** 2)
+ if log:
+ log['err'].append(err)
+ if verbose:
+ if cpt % 50 == 0:
+ print(
+ '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
+
+ cpt += 1
+ if log:
+ log['niter'] = cpt
+ log['u'] = u
+ log['v'] = v
+ return q, log
+ else:
+ return q
diff --git a/ot/utils.py b/ot/utils.py
index efd1288..8419c83 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
-Various function that can be usefull
+Various useful functions
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
@@ -111,12 +111,12 @@ def dist(x1, x2=None, metric='sqeuclidean'):
Parameters
----------
- x1 : np.array (n1,d)
+ x1 : ndarray, shape (n1,d)
matrix with n1 samples of size d
- x2 : np.array (n2,d), optional
+ x2 : array, shape (n2,d), optional
matrix with n2 samples of size d (if None then x2=x1)
- metric : str, fun, optional
- name of the metric to be computed (full list in the doc of scipy), If a string,
+ metric : str | callable, optional
+ Name of the metric to be computed (full list in the doc of scipy), If a string,
the distance function can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock',
'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski',
'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
@@ -138,26 +138,21 @@ def dist(x1, x2=None, metric='sqeuclidean'):
def dist0(n, method='lin_square'):
- """Compute standard cost matrices of size (n,n) for OT problems
+ """Compute standard cost matrices of size (n, n) for OT problems
Parameters
----------
-
n : int
- size of the cost matrix
+ Size of the cost matrix.
method : str, optional
Type of loss matrix chosen from:
* 'lin_square' : linear sampling between 0 and n-1, quadratic loss
-
Returns
-------
-
- M : np.array (n1,n2)
- distance matrix computed with given metric
-
-
+ M : ndarray, shape (n1,n2)
+ Distance matrix computed with given metric.
"""
res = 0
if method == 'lin_square':
@@ -169,22 +164,18 @@ def dist0(n, method='lin_square'):
def cost_normalization(C, norm=None):
""" Apply normalization to the loss matrix
-
Parameters
----------
- C : np.array (n1, n2)
+ C : ndarray, shape (n1, n2)
The cost matrix to normalize.
norm : str
- type of normalization from 'median','max','log','loglog'. Any other
- value do not normalize.
-
+ Type of normalization from 'median', 'max', 'log', 'loglog'. Any
+ other value do not normalize.
Returns
-------
-
- C : np.array (n1, n2)
+ C : ndarray, shape (n1, n2)
The input cost matrix normalized according to given norm.
-
"""
if norm == "median":
@@ -194,7 +185,7 @@ def cost_normalization(C, norm=None):
elif norm == "log":
C = np.log(1 + C)
elif norm == "loglog":
- C = np.log(1 + np.log(1 + C))
+ C = np.log1p(np.log1p(C))
return C
@@ -256,6 +247,7 @@ def check_params(**kwargs):
def check_random_state(seed):
"""Turn seed into a np.random.RandomState instance
+
Parameters
----------
seed : None | int | instance of RandomState
@@ -275,7 +267,6 @@ def check_random_state(seed):
class deprecated(object):
-
"""Decorator to mark a function or class as deprecated.
deprecated class from scikit-learn package
@@ -285,14 +276,14 @@ class deprecated(object):
The optional extra argument will be appended to the deprecation message
and the docstring. Note: to use this with the default value for extra, put
in an empty of parentheses:
- >>> from ot.deprecation import deprecated
- >>> @deprecated()
- ... def some_function(): pass
+ >>> from ot.deprecation import deprecated # doctest: +SKIP
+ >>> @deprecated() # doctest: +SKIP
+ ... def some_function(): pass # doctest: +SKIP
Parameters
----------
- extra : string
- to be added to the deprecation messages
+ extra : str
+ To be added to the deprecation messages.
"""
# Adapted from http://wiki.python.org/moin/PythonDecoratorLibrary,
@@ -373,9 +364,9 @@ def _is_deprecated(func):
class BaseEstimator(object):
-
"""Base class for most objects in POT
- adapted from sklearn BaseEstimator class
+
+ Code adapted from sklearn BaseEstimator class
Notes
-----
@@ -417,7 +408,7 @@ class BaseEstimator(object):
Parameters
----------
- deep : boolean, optional
+ deep : bool, optional
If True, will return the parameters for this estimator and
contained subobjects that are estimators.