diff options
Diffstat (limited to 'ot')
-rw-r--r-- | ot/__init__.py | 52 | ||||
-rw-r--r-- | ot/bregman.py | 227 | ||||
-rw-r--r-- | ot/da.py | 8 | ||||
-rw-r--r-- | ot/datasets.py | 32 | ||||
-rw-r--r-- | ot/dr.py | 63 | ||||
-rw-r--r-- | ot/gpu/__init__.py | 6 | ||||
-rw-r--r-- | ot/gpu/bregman.py | 11 | ||||
-rw-r--r-- | ot/gromov.py | 514 | ||||
-rw-r--r-- | ot/lp/__init__.py | 351 | ||||
-rw-r--r-- | ot/lp/emd_wrap.pyx | 100 | ||||
-rw-r--r-- | ot/optim.py | 32 | ||||
-rw-r--r-- | ot/plot.py | 12 | ||||
-rw-r--r-- | ot/stochastic.py | 407 | ||||
-rw-r--r-- | ot/unbalanced.py | 519 | ||||
-rw-r--r-- | ot/utils.py | 55 |
15 files changed, 1672 insertions, 717 deletions
diff --git a/ot/__init__.py b/ot/__init__.py index b74b924..35ae6fc 100644 --- a/ot/__init__.py +++ b/ot/__init__.py @@ -1,6 +1,46 @@ -"""Python Optimal Transport toolbox +""" + +This is the main module of the POT toolbox. It provides easy access to +a number of sub-modules and functions described below. + +.. note:: + + + Here is a list of the submodules and short description of what they contain. + + - :any:`ot.lp` contains OT solvers for the exact (Linear Program) OT problems. + - :any:`ot.bregman` contains OT solvers for the entropic OT problems using + Bregman projections. + - :any:`ot.lp` contains OT solvers for the exact (Linear Program) OT problems. + - :any:`ot.smooth` contains OT solvers for the regularized (l2 and kl) smooth OT + problems. + - :any:`ot.gromov` contains solvers for Gromov-Wasserstein and Fused Gromov + Wasserstein problems. + - :any:`ot.optim` contains generic solvers OT based optimization problems + - :any:`ot.da` contains classes and function related to Monge mapping + estimation and Domain Adaptation (DA). + - :any:`ot.gpu` contains GPU (cupy) implementation of some OT solvers + - :any:`ot.dr` contains Dimension Reduction (DR) methods such as Wasserstein + Discriminant Analysis. + - :any:`ot.utils` contains utility functions such as distance computation and + timing. + - :any:`ot.datasets` contains toy dataset generation functions. + - :any:`ot.plot` contains visualization functions + - :any:`ot.stochastic` contains stochastic solvers for regularized OT. + - :any:`ot.unbalanced` contains solvers for regularized unbalanced OT. + +.. warning:: + The list of automatically imported sub-modules is as follows: + :py:mod:`ot.lp`, :py:mod:`ot.bregman`, :py:mod:`ot.optim` + :py:mod:`ot.utils`, :py:mod:`ot.datasets`, + :py:mod:`ot.gromov`, :py:mod:`ot.smooth` + :py:mod:`ot.stochastic` + The following sub-modules are not imported due to additional dependencies: + - :any:`ot.dr` : depends on :code:`pymanopt` and :code:`autograd`. + - :any:`ot.gpu` : depends on :code:`cupy` and a CUDA GPU. + - :any:`ot.plot` : depends on :code:`matplotlib` """ @@ -20,10 +60,12 @@ from . import da from . import gromov from . import smooth from . import stochastic +from . import unbalanced # OT functions -from .lp import emd, emd2 +from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d from .bregman import sinkhorn, sinkhorn2, barycenter +from .unbalanced import sinkhorn_unbalanced, barycenter_unbalanced from .da import sinkhorn_lpl1_mm # utils functions @@ -31,6 +73,8 @@ from .utils import dist, unif, tic, toc, toq __version__ = "0.5.1" -__all__ = ["emd", "emd2", "sinkhorn", "sinkhorn2", "utils", 'datasets', +__all__ = ["emd", "emd2", 'emd_1d', "sinkhorn", "sinkhorn2", "utils", 'datasets', 'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov', - 'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim'] + 'emd_1d', 'emd2_1d', 'wasserstein_1d', + 'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim', + 'sinkhorn_unbalanced', "barycenter_unbalanced"] diff --git a/ot/bregman.py b/ot/bregman.py index 321712b..70e4208 100644 --- a/ot/bregman.py +++ b/ot/bregman.py @@ -16,7 +16,7 @@ from .utils import unif, dist def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs): - u""" + r""" Solve the entropic regularization optimal transport problem and return the OT matrix The function solves the following optimization problem: @@ -40,12 +40,12 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, Parameters ---------- - a : np.ndarray (ns,) + a : ndarray, shape (ns,) samples weights in the source domain - b : np.ndarray (nt,) or np.ndarray (nt,nbb) + b : ndarray, shape (nt,) or ndarray, shape (nt, nbb) samples in the target domain, compute sinkhorn with multiple targets and fixed M if b is a matrix (return OT loss + dual variables in log) - M : np.ndarray (ns,nt) + M : ndarray, shape (ns, nt) loss matrix reg : float Regularization term >0 @@ -64,7 +64,7 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, Returns ------- - gamma : (ns x nt) ndarray + gamma : ndarray, shape (ns, nt) Optimal transportation matrix for the given parameters log : dict log dictionary return only if log==True in parameters @@ -73,12 +73,12 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, -------- >>> import ot - >>> a=[.5,.5] - >>> b=[.5,.5] - >>> M=[[0.,1.],[1.,0.]] - >>> ot.sinkhorn(a,b,M,1) - array([[ 0.36552929, 0.13447071], - [ 0.13447071, 0.36552929]]) + >>> a=[.5, .5] + >>> b=[.5, .5] + >>> M=[[0., 1.], [1., 0.]] + >>> ot.sinkhorn(a, b, M, 1) + array([[0.36552929, 0.13447071], + [0.13447071, 0.36552929]]) References @@ -131,7 +131,7 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000, def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs): - u""" + r""" Solve the entropic regularization optimal transport problem and return the loss The function solves the following optimization problem: @@ -155,12 +155,12 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000, Parameters ---------- - a : np.ndarray (ns,) + a : ndarray, shape (ns,) samples weights in the source domain - b : np.ndarray (nt,) or np.ndarray (nt,nbb) + b : ndarray, shape (nt,) or ndarray, shape (nt, nbb) samples in the target domain, compute sinkhorn with multiple targets and fixed M if b is a matrix (return OT loss + dual variables in log) - M : np.ndarray (ns,nt) + M : ndarray, shape (ns, nt) loss matrix reg : float Regularization term >0 @@ -176,7 +176,6 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000, log : bool, optional record log if True - Returns ------- W : (nt) ndarray or float @@ -188,11 +187,11 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000, -------- >>> import ot - >>> a=[.5,.5] - >>> b=[.5,.5] - >>> M=[[0.,1.],[1.,0.]] - >>> ot.sinkhorn2(a,b,M,1) - array([ 0.26894142]) + >>> a=[.5, .5] + >>> b=[.5, .5] + >>> M=[[0., 1.], [1., 0.]] + >>> ot.sinkhorn2(a, b, M, 1) + array([0.26894142]) @@ -241,14 +240,14 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000, b = np.asarray(b, dtype=np.float64) if len(b.shape) < 2: - b = b.reshape((-1, 1)) + b = b[:, None] return sink() def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, verbose=False, log=False, **kwargs): - """ + r""" Solve the entropic regularization optimal transport problem and return the OT matrix The function solves the following optimization problem: @@ -272,12 +271,12 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, Parameters ---------- - a : np.ndarray (ns,) + a : ndarray, shape (ns,) samples weights in the source domain - b : np.ndarray (nt,) or np.ndarray (nt,nbb) + b : ndarray, shape (nt,) or ndarray, shape (nt, nbb) samples in the target domain, compute sinkhorn with multiple targets and fixed M if b is a matrix (return OT loss + dual variables in log) - M : np.ndarray (ns,nt) + M : ndarray, shape (ns, nt) loss matrix reg : float Regularization term >0 @@ -290,10 +289,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, log : bool, optional record log if True - Returns ------- - gamma : (ns x nt) ndarray + gamma : ndarray, shape (ns, nt) Optimal transportation matrix for the given parameters log : dict log dictionary return only if log==True in parameters @@ -302,12 +300,12 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, -------- >>> import ot - >>> a=[.5,.5] - >>> b=[.5,.5] - >>> M=[[0.,1.],[1.,0.]] - >>> ot.sinkhorn(a,b,M,1) - array([[ 0.36552929, 0.13447071], - [ 0.13447071, 0.36552929]]) + >>> a=[.5, .5] + >>> b=[.5, .5] + >>> M=[[0., 1.], [1., 0.]] + >>> ot.sinkhorn(a, b, M, 1) + array([[0.36552929, 0.13447071], + [0.13447071, 0.36552929]]) References @@ -422,7 +420,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=False): - """ + r""" Solve the entropic regularization optimal transport problem and return the OT matrix The algorithm used is based on the paper @@ -453,12 +451,12 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log= Parameters ---------- - a : np.ndarray (ns,) + a : ndarray, shape (ns,) samples weights in the source domain - b : np.ndarray (nt,) or np.ndarray (nt,nbb) + b : ndarray, shape (nt,) or ndarray, shape (nt, nbb) samples in the target domain, compute sinkhorn with multiple targets and fixed M if b is a matrix (return OT loss + dual variables in log) - M : np.ndarray (ns,nt) + M : ndarray, shape (ns, nt) loss matrix reg : float Regularization term >0 @@ -469,10 +467,9 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log= log : bool, optional record log if True - Returns ------- - gamma : (ns x nt) ndarray + gamma : ndarray, shape (ns, nt) Optimal transportation matrix for the given parameters log : dict log dictionary return only if log==True in parameters @@ -481,12 +478,12 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log= -------- >>> import ot - >>> a=[.5,.5] - >>> b=[.5,.5] - >>> M=[[0.,1.],[1.,0.]] - >>> ot.bregman.greenkhorn(a,b,M,1) - array([[ 0.36552929, 0.13447071], - [ 0.13447071, 0.36552929]]) + >>> a=[.5, .5] + >>> b=[.5, .5] + >>> M=[[0., 1.], [1., 0.]] + >>> ot.bregman.greenkhorn(a, b, M, 1) + array([[0.36552929, 0.13447071], + [0.13447071, 0.36552929]]) References @@ -576,7 +573,7 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log= def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9, warmstart=None, verbose=False, print_period=20, log=False, **kwargs): - """ + r""" Solve the entropic regularization OT problem with log stabilization The function solves the following optimization problem: @@ -602,11 +599,11 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9, Parameters ---------- - a : np.ndarray (ns,) + a : ndarray, shape (ns,) samples weights in the source domain - b : np.ndarray (nt,) + b : ndarray, shape (nt,) samples in the target domain - M : np.ndarray (ns,nt) + M : ndarray, shape (ns, nt) loss matrix reg : float Regularization term >0 @@ -623,10 +620,9 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9, log : bool, optional record log if True - Returns ------- - gamma : (ns x nt) ndarray + gamma : ndarray, shape (ns, nt) Optimal transportation matrix for the given parameters log : dict log dictionary return only if log==True in parameters @@ -639,8 +635,8 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9, >>> b=[.5,.5] >>> M=[[0.,1.],[1.,0.]] >>> ot.bregman.sinkhorn_stabilized(a,b,M,1) - array([[ 0.36552929, 0.13447071], - [ 0.13447071, 0.36552929]]) + array([[0.36552929, 0.13447071], + [0.13447071, 0.36552929]]) References @@ -769,10 +765,14 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9, cpt = cpt + 1 - # print('err=',err,' cpt=',cpt) if log: - log['logu'] = alpha / reg + np.log(u) - log['logv'] = beta / reg + np.log(v) + if nbb: + alpha = alpha[:, None] + beta = beta[:, None] + logu = alpha / reg + np.log(u) + logv = beta / reg + np.log(v) + log['logu'] = logu + log['logv'] = logv log['alpha'] = alpha + reg * np.log(u) log['beta'] = beta + reg * np.log(v) log['warmstart'] = (log['alpha'], log['beta']) @@ -796,7 +796,7 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9, def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInnerItermax=100, tau=1e3, stopThr=1e-9, warmstart=None, verbose=False, print_period=10, log=False, **kwargs): - """ + r""" Solve the entropic regularization optimal transport problem with log stabilization and epsilon scaling. @@ -823,11 +823,11 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne Parameters ---------- - a : np.ndarray (ns,) + a : ndarray, shape (ns,) samples weights in the source domain - b : np.ndarray (nt,) + b : ndarray, shape (nt,) samples in the target domain - M : np.ndarray (ns,nt) + M : ndarray, shape (ns, nt) loss matrix reg : float Regularization term >0 @@ -835,7 +835,7 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne thershold for max value in u or v for log scaling tau : float thershold for max value in u or v for log scaling - warmstart : tible of vectors + warmstart : tuple of vectors if given then sarting values for alpha an beta log scalings numItermax : int, optional Max number of iterations @@ -850,10 +850,9 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne log : bool, optional record log if True - Returns ------- - gamma : (ns x nt) ndarray + gamma : ndarray, shape (ns, nt) Optimal transportation matrix for the given parameters log : dict log dictionary return only if log==True in parameters @@ -862,12 +861,12 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne -------- >>> import ot - >>> a=[.5,.5] - >>> b=[.5,.5] - >>> M=[[0.,1.],[1.,0.]] - >>> ot.bregman.sinkhorn_epsilon_scaling(a,b,M,1) - array([[ 0.36552929, 0.13447071], - [ 0.13447071, 0.36552929]]) + >>> a=[.5, .5] + >>> b=[.5, .5] + >>> M=[[0., 1.], [1., 0.]] + >>> ot.bregman.sinkhorn_epsilon_scaling(a, b, M, 1) + array([[0.36552929, 0.13447071], + [0.13447071, 0.36552929]]) References @@ -989,7 +988,7 @@ def projC(gamma, q): def barycenter(A, M, reg, weights=None, numItermax=1000, stopThr=1e-4, verbose=False, log=False): - """Compute the entropic regularized wasserstein barycenter of distributions A + r"""Compute the entropic regularized wasserstein barycenter of distributions A The function solves the following optimization problem: @@ -1006,13 +1005,13 @@ def barycenter(A, M, reg, weights=None, numItermax=1000, Parameters ---------- - A : np.ndarray (d,n) + A : ndarray, shape (d,n) n training distributions a_i of size d - M : np.ndarray (d,d) + M : ndarray, shape (d,d) loss matrix for OT reg : float Regularization term >0 - weights : np.ndarray (n,) + weights : ndarray, shape (n,) Weights of each histogram a_i on the simplex (barycentric coodinates) numItermax : int, optional Max number of iterations @@ -1084,7 +1083,7 @@ def barycenter(A, M, reg, weights=None, numItermax=1000, def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1e-9, stabThr=1e-30, verbose=False, log=False): - """Compute the entropic regularized wasserstein barycenter of distributions A + r"""Compute the entropic regularized wasserstein barycenter of distributions A where A is a collection of 2D images. The function solves the following optimization problem: @@ -1102,11 +1101,11 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1 Parameters ---------- - A : np.ndarray (n,w,h) + A : ndarray, shape (n, w, h) n distributions (2D images) of size w x h reg : float Regularization term >0 - weights : np.ndarray (n,) + weights : ndarray, shape (n,) Weights of each image on the simplex (barycentric coodinates) numItermax : int, optional Max number of iterations @@ -1119,15 +1118,13 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1 log : bool, optional record log if True - Returns ------- - a : (w,h) ndarray + a : ndarray, shape (w, h) 2D Wasserstein barycenter log : dict log dictionary return only if log==True in parameters - References ---------- @@ -1195,7 +1192,7 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1 def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000, stopThr=1e-3, verbose=False, log=False): - """ + r""" Compute the unmixing of an observation with a given dictionary using Wasserstein distance The function solve the following optimization problem: @@ -1217,15 +1214,15 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000, Parameters ---------- - a : np.ndarray (d) + a : ndarray, shape (d) observed distribution - D : np.ndarray (d,n) + D : ndarray, shape (d, n) dictionary matrix - M : np.ndarray (d,d) + M : ndarray, shape (d, d) loss matrix - M0 : np.ndarray (n,n) + M0 : ndarray, shape (n, n) loss matrix - h0 : np.ndarray (n,) + h0 : ndarray, shape (n,) prior on h reg : float Regularization term >0 (Wasserstein data fitting) @@ -1245,7 +1242,7 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000, Returns ------- - a : (d,) ndarray + a : ndarray, shape (d,) Wasserstein barycenter log : dict log dictionary return only if log==True in parameters @@ -1302,7 +1299,7 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000, def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs): - ''' + r''' Solve the entropic regularization optimal transport problem and return the OT matrix from empirical data @@ -1325,15 +1322,15 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numI Parameters ---------- - X_s : np.ndarray (ns, d) + X_s : ndarray, shape (ns, d) samples in the source domain - X_t : np.ndarray (nt, d) + X_t : ndarray, shape (nt, d) samples in the target domain reg : float Regularization term >0 - a : np.ndarray (ns,) + a : ndarray, shape (ns,) samples weights in the source domain - b : np.ndarray (nt,) + b : ndarray, shape (nt,) samples weights in the target domain numItermax : int, optional Max number of iterations @@ -1347,7 +1344,7 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numI Returns ------- - gamma : (ns x nt) ndarray + gamma : ndarray, shape (ns, nt) Regularized optimal transportation matrix for the given parameters log : dict log dictionary return only if log==True in parameters @@ -1360,10 +1357,9 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numI >>> reg = 0.1 >>> X_s = np.reshape(np.arange(n_s), (n_s, 1)) >>> X_t = np.reshape(np.arange(0, n_t), (n_t, 1)) - >>> emp_sinkhorn = empirical_sinkhorn(X_s, X_t, reg, verbose=False) - >>> print(emp_sinkhorn) - >>> [[4.99977301e-01 2.26989344e-05] - [2.26989344e-05 4.99977301e-01]] + >>> empirical_sinkhorn(X_s, X_t, reg, verbose=False) # doctest: +NORMALIZE_WHITESPACE + array([[4.99977301e-01, 2.26989344e-05], + [2.26989344e-05, 4.99977301e-01]]) References @@ -1392,7 +1388,7 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numI def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs): - ''' + r''' Solve the entropic regularization optimal transport problem from empirical data and return the OT loss @@ -1416,15 +1412,15 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', num Parameters ---------- - X_s : np.ndarray (ns, d) + X_s : ndarray, shape (ns, d) samples in the source domain - X_t : np.ndarray (nt, d) + X_t : ndarray, shape (nt, d) samples in the target domain reg : float Regularization term >0 - a : np.ndarray (ns,) + a : ndarray, shape (ns,) samples weights in the source domain - b : np.ndarray (nt,) + b : ndarray, shape (nt,) samples weights in the target domain numItermax : int, optional Max number of iterations @@ -1438,7 +1434,7 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', num Returns ------- - gamma : (ns x nt) ndarray + gamma : ndarray, shape (ns, nt) Regularized optimal transportation matrix for the given parameters log : dict log dictionary return only if log==True in parameters @@ -1451,9 +1447,8 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', num >>> reg = 0.1 >>> X_s = np.reshape(np.arange(n_s), (n_s, 1)) >>> X_t = np.reshape(np.arange(0, n_t), (n_t, 1)) - >>> loss_sinkhorn = empirical_sinkhorn2(X_s, X_t, reg, verbose=False) - >>> print(loss_sinkhorn) - >>> [4.53978687e-05] + >>> empirical_sinkhorn2(X_s, X_t, reg, verbose=False) + array([4.53978687e-05]) References @@ -1482,7 +1477,7 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', num def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs): - ''' + r''' Compute the sinkhorn divergence loss from empirical data The function solves the following optimization problems and return the @@ -1525,15 +1520,15 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli Parameters ---------- - X_s : np.ndarray (ns, d) + X_s : ndarray, shape (ns, d) samples in the source domain - X_t : np.ndarray (nt, d) + X_t : ndarray, shape (nt, d) samples in the target domain reg : float Regularization term >0 - a : np.ndarray (ns,) + a : ndarray, shape (ns,) samples weights in the source domain - b : np.ndarray (nt,) + b : ndarray, shape (nt,) samples weights in the target domain numItermax : int, optional Max number of iterations @@ -1544,30 +1539,26 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli log : bool, optional record log if True - Returns ------- - gamma : (ns x nt) ndarray + gamma : ndarray, shape (ns, nt) Regularized optimal transportation matrix for the given parameters log : dict log dictionary return only if log==True in parameters Examples -------- - >>> n_s = 2 >>> n_t = 4 >>> reg = 0.1 >>> X_s = np.reshape(np.arange(n_s), (n_s, 1)) >>> X_t = np.reshape(np.arange(0, n_t), (n_t, 1)) - >>> emp_sinkhorn_div = empirical_sinkhorn_divergence(X_s, X_t, reg) - >>> print(emp_sinkhorn_div) - >>> [2.99977435] + >>> empirical_sinkhorn_divergence(X_s, X_t, reg) # doctest: +ELLIPSIS + array([1.499...]) References ---------- - .. [23] Aude Genevay, Gabriel Peyré, Marco Cuturi, Learning Generative Models with Sinkhorn Divergences, Proceedings of the Twenty-First International Conference on Artficial Intelligence and Statistics, (AISTATS) 21, 2018 ''' if log: @@ -41,15 +41,15 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10, where : - M is the (ns,nt) metric cost matrix - - :math:`\Omega_e` is the entropic regularization term - :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` - - :math:`\Omega_g` is the group lasso regulaization term + - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e + (\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - :math:`\Omega_g` is the group lasso regularization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` where :math:`\mathcal{I}_c` are the index of samples from class c in the source domain. - a and b are source and target weights (sum to 1) - The algorithm used for solving the problem is the generalised conditional + The algorithm used for solving the problem is the generalized conditional gradient as proposed in [5]_ [7]_ diff --git a/ot/datasets.py b/ot/datasets.py index e76e75d..ba0cfd9 100644 --- a/ot/datasets.py +++ b/ot/datasets.py @@ -17,7 +17,6 @@ def make_1D_gauss(n, m, s): Parameters ---------- - n : int number of bins in the histogram m : float @@ -25,12 +24,10 @@ def make_1D_gauss(n, m, s): s : float standard deviaton of the gaussian distribution - Returns ------- - h : np.array (n,) - 1D histogram for a gaussian distribution - + h : ndarray (n,) + 1D histogram for a gaussian distribution """ x = np.arange(n, dtype=np.float64) h = np.exp(-(x - m)**2 / (2 * s**2)) @@ -44,16 +41,15 @@ def get_1D_gauss(n, m, sigma): def make_2D_samples_gauss(n, m, sigma, random_state=None): - """return n samples drawn from 2D gaussian N(m,sigma) + """Return n samples drawn from 2D gaussian N(m,sigma) Parameters ---------- - n : int number of samples to make - m : np.array (2,) + m : ndarray, shape (2,) mean value of the gaussian distribution - sigma : np.array (2,2) + sigma : ndarray, shape (2, 2) covariance matrix of the gaussian distribution random_state : int, RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; @@ -63,9 +59,8 @@ def make_2D_samples_gauss(n, m, sigma, random_state=None): Returns ------- - X : np.array (n,2) - n samples drawn from N(m,sigma) - + X : ndarray, shape (n, 2) + n samples drawn from N(m, sigma). """ generator = check_random_state(random_state) @@ -86,11 +81,10 @@ def get_2D_samples_gauss(n, m, sigma, random_state=None): def make_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs): - """ dataset generation for classification problems + """Dataset generation for classification problems Parameters ---------- - dataset : str type of classification problem (see code) n : int @@ -105,13 +99,11 @@ def make_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs): Returns ------- - X : np.array (n,d) - n observation of size d - y : np.array (n,) - labels of the samples - + X : ndarray, shape (n, d) + n observation of size d + y : ndarray, shape (n,) + labels of the samples. """ - generator = check_random_state(random_state) if dataset.lower() == '3gauss': @@ -1,6 +1,12 @@ # -*- coding: utf-8 -*- """ Dimension reduction with optimal transport + + +.. warning:: + Note that by default the module is not import in :mod:`ot`. In order to + use it you need to explicitely import :mod:`ot.dr` + """ # Author: Remi Flamary <remi.flamary@unice.fr> @@ -43,30 +49,25 @@ def split_classes(X, y): def fda(X, y, p=2, reg=1e-16): - """ - Fisher Discriminant Analysis - + """Fisher Discriminant Analysis Parameters ---------- - X : numpy.ndarray (n,d) - Training samples - y : np.ndarray (n,) - labels for training samples + X : ndarray, shape (n, d) + Training samples. + y : ndarray, shape (n,) + Labels for training samples. p : int, optional - size of dimensionnality reduction + Size of dimensionnality reduction. reg : float, optional Regularization term >0 (ridge regularization) - Returns ------- - P : (d x p) ndarray + P : ndarray, shape (d, p) Optimal transportation matrix for the given parameters - proj : fun + proj : callable projection function including mean centering - - """ mx = np.mean(X) @@ -124,37 +125,33 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None): Parameters ---------- - X : numpy.ndarray (n,d) - Training samples - y : np.ndarray (n,) - labels for training samples + X : ndarray, shape (n, d) + Training samples. + y : ndarray, shape (n,) + Labels for training samples. p : int, optional - size of dimensionnality reduction + Size of dimensionnality reduction. reg : float, optional Regularization term >0 (entropic regularization) - solver : str, optional - None for steepest decsent or 'TrustRegions' for trust regions algorithm - else shoudl be a pymanopt.solvers - P0 : numpy.ndarray (d,p) - Initial starting point for projection + solver : None | str, optional + None for steepest descent or 'TrustRegions' for trust regions algorithm + else should be a pymanopt.solvers + P0 : ndarray, shape (d, p) + Initial starting point for projection. verbose : int, optional - Print information along iterations - - + Print information along iterations. Returns ------- - P : (d x p) ndarray + P : ndarray, shape (d, p) Optimal transportation matrix for the given parameters - proj : fun - projection function including mean centering - + proj : callable + Projection function including mean centering. References ---------- - - .. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016). Wasserstein Discriminant Analysis. arXiv preprint arXiv:1608.08063. - + .. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016). + Wasserstein Discriminant Analysis. arXiv preprint arXiv:1608.08063. """ # noqa mx = np.mean(X) diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py index deda6b1..1ab95bb 100644 --- a/ot/gpu/__init__.py +++ b/ot/gpu/__init__.py @@ -5,11 +5,15 @@ This module provides GPU implementation for several OT solvers and utility functions. The GPU backend in handled by `cupy <https://cupy.chainer.org/>`_. +.. warning:: + Note that by default the module is not import in :mod:`ot`. In order to + use it you need to explicitely import :mod:`ot.gpu` . + By default, the functions in this module accept and return numpy arrays in order to proide drop-in replacement for the other POT function but the transfer between CPU en GPU comes with a significant overhead. -In order to get the best erformances, we recommend to give only cupy +In order to get the best performances, we recommend to give only cupy arrays to the functions and desactivate the conversion to numpy of the result of the function with parameter ``to_numpy=False``. diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py index 978b307..2e2df83 100644 --- a/ot/gpu/bregman.py +++ b/ot/gpu/bregman.py @@ -70,17 +70,6 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9, log : dict log dictionary return only if log==True in parameters - Examples - -------- - - >>> import ot - >>> a=[.5,.5] - >>> b=[.5,.5] - >>> M=[[0.,1.],[1.,0.]] - >>> ot.sinkhorn(a,b,M,1) - array([[ 0.36552929, 0.13447071], - [ 0.13447071, 0.36552929]]) - References ---------- diff --git a/ot/gromov.py b/ot/gromov.py index ca96b31..699ae4c 100644 --- a/ot/gromov.py +++ b/ot/gromov.py @@ -1,9 +1,6 @@ -
# -*- coding: utf-8 -*-
"""
Gromov-Wasserstein transport method
-
-
"""
# Author: Erwan Vautier <erwan.vautier@gmail.com>
@@ -22,7 +19,7 @@ from .optim import cg def init_matrix(C1, C2, p, q, loss_fun='square_loss'):
- """ Return loss matrices and tensors for Gromov-Wasserstein fast computation
+ """Return loss matrices and tensors for Gromov-Wasserstein fast computation
Returns the value of \mathcal{L}(C1,C2) \otimes T with the selected loss
function as the loss function of Gromow-Wasserstein discrepancy.
@@ -51,29 +48,27 @@ def init_matrix(C1, C2, p, q, loss_fun='square_loss'): Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
+ Metric costfr matrix in the target space
T : ndarray, shape (ns, nt)
- Coupling between source and target spaces
+ Coupling between source and target spaces
p : ndarray, shape (ns,)
-
Returns
-------
-
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
+ h2(C) matrix in Eq. (6)
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
@@ -114,31 +109,29 @@ def init_matrix(C1, C2, p, q, loss_fun='square_loss'): def tensor_product(constC, hC1, hC2, T):
- """ Return the tensor for Gromov-Wasserstein fast computation
+ """Return the tensor for Gromov-Wasserstein fast computation
The tensor is computed as described in Proposition 1 Eq. (6) in [12].
Parameters
----------
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
-
+ h2(C) matrix in Eq. (6)
Returns
-------
-
tens : ndarray, shape (ns, nt)
- \mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
+ \mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
A = -np.dot(hC1, T).dot(hC2.T)
@@ -148,32 +141,31 @@ def tensor_product(constC, hC1, hC2, T): def gwloss(constC, hC1, hC2, T):
- """ Return the Loss for Gromov-Wasserstein
+ """Return the Loss for Gromov-Wasserstein
The loss is computed as described in Proposition 1 Eq. (6) in [12].
Parameters
----------
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
+ h2(C) matrix in Eq. (6)
T : ndarray, shape (ns, nt)
- Current value of transport matrix T
+ Current value of transport matrix T
Returns
-------
-
loss : float
- Gromov Wasserstein loss
+ Gromov Wasserstein loss
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
@@ -183,32 +175,31 @@ def gwloss(constC, hC1, hC2, T): def gwggrad(constC, hC1, hC2, T):
- """ Return the gradient for Gromov-Wasserstein
+ """Return the gradient for Gromov-Wasserstein
The gradient is computed as described in Proposition 2 in [12].
Parameters
----------
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
+ h2(C) matrix in Eq. (6)
T : ndarray, shape (ns, nt)
- Current value of transport matrix T
+ Current value of transport matrix T
Returns
-------
-
grad : ndarray, shape (ns, nt)
Gromov Wasserstein gradient
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
return 2 * tensor_product(constC, hC1, hC2,
@@ -222,19 +213,19 @@ def update_square_loss(p, lambdas, T, Cs): Parameters
----------
- p : ndarray, shape (N,)
- masses in the targeted barycenter
+ p : ndarray, shape (N,)
+ Masses in the targeted barycenter.
lambdas : list of float
- list of the S spaces' weights
- T : list of S np.ndarray(ns,N)
- the S Ts couplings calculated at each iteration
+ List of the S spaces' weights.
+ T : list of S np.ndarray of shape (ns,N)
+ The S Ts couplings calculated at each iteration.
Cs : list of S ndarray, shape(ns,ns)
- Metric cost matrices
+ Metric cost matrices.
Returns
----------
- C : ndarray, shape (nt,nt)
- updated C matrix
+ C : ndarray, shape (nt, nt)
+ Updated C matrix.
"""
tmpsum = sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s])
for s in range(len(T))])
@@ -251,12 +242,12 @@ def update_kl_loss(p, lambdas, T, Cs): Parameters
----------
p : ndarray, shape (N,)
- weights in the targeted barycenter
+ Weights in the targeted barycenter.
lambdas : list of the S spaces' weights
- T : list of S np.ndarray(ns,N)
- the S Ts couplings calculated at each iteration
+ T : list of S np.ndarray of shape (ns,N)
+ The S Ts couplings calculated at each iteration.
Cs : list of S ndarray, shape(ns,ns)
- Metric cost matrices
+ Metric cost matrices.
Returns
----------
@@ -277,27 +268,27 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs The function solves the following optimization problem:
.. math::
- \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+ - H : entropy
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- p : ndarray, shape (ns,)
- distribution in the source space
- q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string
+ Metric costfr matrix in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space
+ q : ndarray, shape (nt,)
+ Distribution in the target space
+ loss_fun : str
loss function used for the solver either 'square_loss' or 'kl_loss'
max_iter : int, optional
@@ -312,15 +303,15 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs If True the steps of the line-search is found via an armijo research. Else closed form is used.
If there is convergence issues use False.
**kwargs : dict
- parameters can be directly pased to the ot.optim.cg solver
+ parameters can be directly passed to the ot.optim.cg solver
Returns
-------
T : ndarray, shape (ns, nt)
- coupling between the two spaces that minimizes :
+ Doupling between the two spaces that minimizes:
\sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
log : dict
- convergence information and loss
+ Convergence information and loss.
References
----------
@@ -355,31 +346,37 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, log=False, **kwargs):
"""
Computes the FGW transport between two graphs see [24]
+
.. math::
- \gamma = arg\min_\gamma (1-\alpha)*<\gamma,M>_F + alpha* \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ \gamma = arg\min_\gamma (1-\\alpha)*<\gamma,M>_F + \\alpha* \sum_{i,j,k,l}
+ L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+
s.t. \gamma 1 = p
\gamma^T 1= q
\gamma\geq 0
+
where :
- M is the (ns,nt) metric cost matrix
- :math:`f` is the regularization term ( and df is its gradient)
- a and b are source and target weights (sum to 1)
- L is a loss function to account for the misfit between the similarity matrices
- The algorithm used for solving the problem is conditional gradient as discussed in [1]_
+
+ The algorithm used for solving the problem is conditional gradient as discussed in [24]_
+
Parameters
----------
- M : ndarray, shape (ns, nt)
- Metric cost matrix between features across domains
+ M : ndarray, shape (ns, nt)
+ Metric cost matrix between features across domains
C1 : ndarray, shape (ns, ns)
- Metric cost matrix respresentative of the structure in the source space
+ Metric cost matrix representative of the structure in the source space
C2 : ndarray, shape (nt, nt)
- Metric cost matrix espresentative of the structure in the target space
- p : ndarray, shape (ns,)
- distribution in the source space
- q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string,optional
- loss function used for the solver
+ Metric cost matrix representative of the structure in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space
+ q : ndarray, shape (nt,)
+ Distribution in the target space
+ loss_fun : str, optional
+ Loss function used for the solver
max_iter : int, optional
Max number of iterations
tol : float, optional
@@ -392,19 +389,21 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, If True the steps of the line-search is found via an armijo research. Else closed form is used.
If there is convergence issues use False.
**kwargs : dict
- parameters can be directly pased to the ot.optim.cg solver
+ parameters can be directly passed to the ot.optim.cg solver
+
Returns
-------
- gamma : (ns x nt) ndarray
- Optimal transportation matrix for the given parameters
+ gamma : ndarray, shape (ns, nt)
+ Optimal transportation matrix for the given parameters.
log : dict
- log dictionary return only if log==True in parameters
+ Log dictionary return only if log==True in parameters.
+
References
----------
.. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
- and Courty Nicolas
- "Optimal Transport for structured data with application on graphs"
- International Conference on Machine Learning (ICML). 2019.
+ and Courty Nicolas "Optimal Transport for structured data with
+ application on graphs", International Conference on Machine Learning
+ (ICML). 2019.
"""
constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
@@ -428,31 +427,37 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, log=False, **kwargs):
"""
Computes the FGW distance between two graphs see [24]
+
.. math::
- \gamma = arg\min_\gamma (1-\alpha)*<\gamma,M>_F + alpha* \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ \min_\gamma (1-\\alpha)*<\gamma,M>_F + \\alpha* \sum_{i,j,k,l}
+ L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+
+
s.t. \gamma 1 = p
\gamma^T 1= q
\gamma\geq 0
+
where :
- M is the (ns,nt) metric cost matrix
- :math:`f` is the regularization term ( and df is its gradient)
- a and b are source and target weights (sum to 1)
- L is a loss function to account for the misfit between the similarity matrices
The algorithm used for solving the problem is conditional gradient as discussed in [1]_
+
Parameters
----------
- M : ndarray, shape (ns, nt)
- Metric cost matrix between features across domains
+ M : ndarray, shape (ns, nt)
+ Metric cost matrix between features across domains
C1 : ndarray, shape (ns, ns)
- Metric cost matrix respresentative of the structure in the source space
+ Metric cost matrix respresentative of the structure in the source space.
C2 : ndarray, shape (nt, nt)
- Metric cost matrix espresentative of the structure in the target space
+ Metric cost matrix espresentative of the structure in the target space.
p : ndarray, shape (ns,)
- distribution in the source space
+ Distribution in the source space.
q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string,optional
- loss function used for the solver
+ Distribution in the target space.
+ loss_fun : str, optional
+ Loss function used for the solver.
max_iter : int, optional
Max number of iterations
tol : float, optional
@@ -460,22 +465,24 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5 verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
+ Record log if True.
armijo : bool, optional
- If True the steps of the line-search is found via an armijo research. Else closed form is used.
- If there is convergence issues use False.
+ If True the steps of the line-search is found via an armijo research.
+ Else closed form is used. If there is convergence issues use False.
**kwargs : dict
- parameters can be directly pased to the ot.optim.cg solver
+ Parameters can be directly pased to the ot.optim.cg solver.
+
Returns
-------
- gamma : (ns x nt) ndarray
- Optimal transportation matrix for the given parameters
+ gamma : ndarray, shape (ns, nt)
+ Optimal transportation matrix for the given parameters.
log : dict
- log dictionary return only if log==True in parameters
+ Log dictionary return only if log==True in parameters.
+
References
----------
.. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
- and Courty Nicolas
+ and Courty Nicolas
"Optimal Transport for structured data with application on graphs"
International Conference on Machine Learning (ICML). 2019.
"""
@@ -506,29 +513,28 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwarg The function solves the following optimization problem:
.. math::
- \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+ - H : entropy
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- p : ndarray, shape (ns,)
- distribution in the source space
+ Metric cost matrix in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space.
q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string
+ Distribution in the target space.
+ loss_fun : str
loss function used for the solver either 'square_loss' or 'kl_loss'
-
max_iter : int, optional
Max number of iterations
tol : float, optional
@@ -540,6 +546,7 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwarg armijo : bool, optional
If True the steps of the line-search is found via an armijo research. Else closed form is used.
If there is convergence issues use False.
+
Returns
-------
gw_dist : float
@@ -587,56 +594,55 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon, The function solves the following optimization problem:
.. math::
- \GW = arg\min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+ GW = arg\min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
- s.t. \GW 1 = p
+ s.t. T 1 = p
- \GW^T 1= q
+ T^T 1= q
- \GW\geq 0
+ T\geq 0
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+ - H : entropy
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
+ Metric costfr matrix in the target space
p : ndarray, shape (ns,)
- distribution in the source space
+ Distribution in the source space
q : ndarray, shape (nt,)
- distribution in the target space
+ Distribution in the target space
loss_fun : string
- loss function used for the solver either 'square_loss' or 'kl_loss'
+ Loss function used for the solver either 'square_loss' or 'kl_loss'
epsilon : float
Regularization term >0
max_iter : int, optional
- Max number of iterations
+ Max number of iterations
tol : float, optional
Stop threshold on error (>0)
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
+ Record log if True.
Returns
-------
T : ndarray, shape (ns, nt)
- coupling between the two spaces that minimizes :
- \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+ Optimal coupling between the two spaces
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
@@ -695,28 +701,28 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon, The function solves the following optimization problem:
.. math::
- \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+ GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+ - H : entropy
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
+ Metric costfr matrix in the target space
p : ndarray, shape (ns,)
- distribution in the source space
+ Distribution in the source space
q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string
- loss function used for the solver either 'square_loss' or 'kl_loss'
+ Distribution in the target space
+ loss_fun : str
+ Loss function used for the solver either 'square_loss' or 'kl_loss'
epsilon : float
Regularization term >0
max_iter : int, optional
@@ -726,7 +732,7 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon, verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
+ Record log if True.
Returns
-------
@@ -736,11 +742,10 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon, References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
-
gw, logv = entropic_gromov_wasserstein(
C1, C2, p, q, loss_fun, epsilon, max_iter, tol, verbose, log=True)
@@ -762,29 +767,31 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon, The function solves the following optimization problem:
.. math::
- C = argmin_C\in R^{NxN} \sum_s \lambda_s GW(C,Cs,p,ps)
+ C = argmin_{C\in R^{NxN}} \sum_s \lambda_s GW(C,C_s,p,p_s)
Where :
- Cs : metric cost matrix
- ps : distribution
+ - :math:`C_s` : metric cost matrix
+ - :math:`p_s` : distribution
Parameters
----------
- N : Integer
- Size of the targeted barycenter
- Cs : list of S np.ndarray(ns,ns)
- Metric cost matrices
- ps : list of S np.ndarray(ns,)
- sample weights in the S spaces
- p : ndarray, shape(N,)
- weights in the targeted barycenter
+ N : int
+ Size of the targeted barycenter
+ Cs : list of S np.ndarray of shape (ns,ns)
+ Metric cost matrices
+ ps : list of S np.ndarray of shape (ns,)
+ Sample weights in the S spaces
+ p : ndarray, shape(N,)
+ Weights in the targeted barycenter
lambdas : list of float
- list of the S spaces' weights
- loss_fun : tensor-matrix multiplication function based on specific loss function
- update : function(p,lambdas,T,Cs) that updates C according to a specific Kernel
- with the S Ts couplings calculated at each iteration
+ List of the S spaces' weights.
+ loss_fun : callable
+ Tensor-matrix multiplication function based on specific loss function.
+ update : callable
+ function(p,lambdas,T,Cs) that updates C according to a specific Kernel
+ with the S Ts couplings calculated at each iteration
epsilon : float
Regularization term >0
max_iter : int, optional
@@ -792,11 +799,11 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon, tol : float, optional
Stop threshol on error (>0)
verbose : bool, optional
- Print information along iterations
+ Print information along iterations.
log : bool, optional
- record log if True
- init_C : bool, ndarray, shape(N,N)
- random initial value for the C matrix provided by user
+ Record log if True.
+ init_C : bool | ndarray, shape (N, N)
+ Random initial value for the C matrix provided by user.
Returns
-------
@@ -806,9 +813,8 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon, References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
-
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
S = len(Cs)
@@ -818,6 +824,7 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon, # Initialization of C : random SPD matrix (if not provided by user)
if init_C is None:
+ # XXX use random state
xalea = np.random.randn(N, 2)
C = dist(xalea, xalea)
C /= C.max()
@@ -829,7 +836,7 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon, error = []
- while(err > tol and cpt < max_iter):
+ while (err > tol) and (cpt < max_iter):
Cprev = C
T = [entropic_gromov_wasserstein(Cs[s], C, ps[s], p, loss_fun, epsilon,
@@ -873,37 +880,36 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, .. math::
C = argmin_C\in R^NxN \sum_s \lambda_s GW(C,Cs,p,ps)
-
Where :
- Cs : metric cost matrix
- ps : distribution
+ - Cs : metric cost matrix
+ - ps : distribution
Parameters
----------
- N : Integer
- Size of the targeted barycenter
- Cs : list of S np.ndarray(ns,ns)
- Metric cost matrices
- ps : list of S np.ndarray(ns,)
- sample weights in the S spaces
- p : ndarray, shape(N,)
- weights in the targeted barycenter
+ N : int
+ Size of the targeted barycenter
+ Cs : list of S np.ndarray of shape (ns, ns)
+ Metric cost matrices
+ ps : list of S np.ndarray of shape (ns,)
+ Sample weights in the S spaces
+ p : ndarray, shape (N,)
+ Weights in the targeted barycenter
lambdas : list of float
- list of the S spaces' weights
+ List of the S spaces' weights
loss_fun : tensor-matrix multiplication function based on specific loss function
update : function(p,lambdas,T,Cs) that updates C according to a specific Kernel
with the S Ts couplings calculated at each iteration
max_iter : int, optional
Max number of iterations
tol : float, optional
- Stop threshol on error (>0)
+ Stop threshol on error (>0).
verbose : bool, optional
- Print information along iterations
+ Print information along iterations.
log : bool, optional
- record log if True
- init_C : bool, ndarray, shape(N,N)
- random initial value for the C matrix provided by user
+ Record log if True.
+ init_C : bool | ndarray, shape(N,N)
+ Random initial value for the C matrix provided by user.
Returns
-------
@@ -913,11 +919,10 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
-
S = len(Cs)
Cs = [np.asarray(Cs[s], dtype=np.float64) for s in range(S)]
@@ -925,6 +930,7 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, # Initialization of C : random SPD matrix (if not provided by user)
if init_C is None:
+ # XXX : should use a random state and not use the global seed
xalea = np.random.randn(N, 2)
C = dist(xalea, xalea)
C /= C.max()
@@ -970,47 +976,52 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_features=False,
p=None, loss_fun='square_loss', max_iter=100, tol=1e-9,
verbose=False, log=False, init_C=None, init_X=None):
- """
- Compute the fgw barycenter as presented eq (5) in [24].
+ """Compute the fgw barycenter as presented eq (5) in [24].
+
+ Parameters
----------
N : integer
Desired number of samples of the target barycenter
Ys: list of ndarray, each element has shape (ns,d)
Features of all samples
Cs : list of ndarray, each element has shape (ns,ns)
- Structure matrices of all samples
+ Structure matrices of all samples
ps : list of ndarray, each element has shape (ns,)
- masses of all samples
+ Masses of all samples.
lambdas : list of float
- list of the S spaces' weights
+ List of the S spaces' weights
alpha : float
- Alpha parameter for the fgw distance
- fixed_structure : bool
- Wether to fix the structure of the barycenter during the updates
- fixed_features : bool
- Wether to fix the feature of the barycenter during the updates
- init_C : ndarray, shape (N,N), optional
- initialization for the barycenters' structure matrix. If not set random init
- init_X : ndarray, shape (N,d), optional
- initialization for the barycenters' features. If not set random init
+ Alpha parameter for the fgw distance
+ fixed_structure : bool
+ Whether to fix the structure of the barycenter during the updates
+ fixed_features : bool
+ Whether to fix the feature of the barycenter during the updates
+ init_C : ndarray, shape (N,N), optional
+ Initialization for the barycenters' structure matrix. If not set
+ a random init is used.
+ init_X : ndarray, shape (N,d), optional
+ Initialization for the barycenters' features. If not set a
+ random init is used.
+
Returns
- ----------
- X : ndarray, shape (N,d)
+ -------
+ X : ndarray, shape (N, d)
Barycenters' features
- C : ndarray, shape (N,N)
+ C : ndarray, shape (N, N)
Barycenters' structure matrix
- log_: dictionary
- Only returned when log=True
+ log_: dict
+ Only returned when log=True. It contains the keys:
T : list of (N,ns) transport matrices
- Ms : all distance matrices between the feature of the barycenter and the other features dist(X,Ys) shape (N,ns)
+ Ms : all distance matrices between the feature of the barycenter and the
+ other features dist(X,Ys) shape (N,ns)
+
References
----------
.. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
- and Courty Nicolas
+ and Courty Nicolas
"Optimal Transport for structured data with application on graphs"
International Conference on Machine Learning (ICML). 2019.
"""
-
S = len(Cs)
d = Ys[0].shape[1] # dimension on the node features
if p is None:
@@ -1073,7 +1084,8 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_ T_temp = [t.T for t in T]
C = update_sructure_matrix(p, lambdas, T_temp, Cs)
- T = [fused_gromov_wasserstein((1 - alpha) * Ms[s], C, Cs[s], p, ps[s], loss_fun, alpha, numItermax=max_iter, stopThr=1e-5, verbose=verbose) for s in range(S)]
+ T = [fused_gromov_wasserstein((1 - alpha) * Ms[s], C, Cs[s], p, ps[s], loss_fun, alpha,
+ numItermax=max_iter, stopThr=1e-5, verbose=verbose) for s in range(S)]
# T is N,ns
err_feature = np.linalg.norm(X - Xprev.reshape(N, d))
@@ -1092,6 +1104,7 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_ print('{:5d}|{:8e}|'.format(cpt, err_feature))
cpt += 1
+
if log:
log_['T'] = T # from target to Ys
log_['p'] = p
@@ -1104,23 +1117,25 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_ def update_sructure_matrix(p, lambdas, T, Cs):
- """
- Updates C according to the L2 Loss kernel with the S Ts couplings
- calculated at each iteration
+ """Updates C according to the L2 Loss kernel with the S Ts couplings.
+
+ It is calculated at each iteration
+
Parameters
----------
- p : ndarray, shape (N,)
- masses in the targeted barycenter
+ p : ndarray, shape (N,)
+ Masses in the targeted barycenter.
lambdas : list of float
- list of the S spaces' weights
- T : list of S np.ndarray(ns,N)
- the S Ts couplings calculated at each iteration
- Cs : list of S ndarray, shape(ns,ns)
- Metric cost matrices
+ List of the S spaces' weights.
+ T : list of S ndarray of shape (ns, N)
+ The S Ts couplings calculated at each iteration.
+ Cs : list of S ndarray, shape (ns, ns)
+ Metric cost matrices.
+
Returns
- ----------
- C : ndarray, shape (nt,nt)
- updated C matrix
+ -------
+ C : ndarray, shape (nt, nt)
+ Updated C matrix.
"""
tmpsum = sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s]) for s in range(len(T))])
ppt = np.outer(p, p)
@@ -1129,22 +1144,26 @@ def update_sructure_matrix(p, lambdas, T, Cs): def update_feature_matrix(lambdas, Ys, Ts, p):
- """
- Updates the feature with respect to the S Ts couplings. See "Solving the barycenter problem with Block Coordinate Descent (BCD)" in [24]
- calculated at each iteration
+ """Updates the feature with respect to the S Ts couplings.
+
+
+ See "Solving the barycenter problem with Block Coordinate Descent (BCD)"
+ in [24] calculated at each iteration
+
Parameters
----------
- p : ndarray, shape (N,)
- masses in the targeted barycenter
+ p : ndarray, shape (N,)
+ masses in the targeted barycenter
lambdas : list of float
- list of the S spaces' weights
+ List of the S spaces' weights
Ts : list of S np.ndarray(ns,N)
the S Ts couplings calculated at each iteration
Ys : list of S ndarray, shape(d,ns)
- The features
+ The features.
+
Returns
- ----------
- X : ndarray, shape (d,N)
+ -------
+ X : ndarray, shape (d, N)
References
----------
@@ -1153,9 +1172,8 @@ def update_feature_matrix(lambdas, Ys, Ts, p): "Optimal Transport for structured data with application on graphs"
International Conference on Machine Learning (ICML). 2019.
"""
+ p = np.array(1. / p).reshape(-1,)
- p = np.diag(np.array(1 / p).reshape(-1,))
-
- tmpsum = sum([lambdas[s] * np.dot(Ys[s], Ts[s].T).dot(p) for s in range(len(Ts))])
+ tmpsum = sum([lambdas[s] * np.dot(Ys[s], Ts[s].T) * p[None, :] for s in range(len(Ts))])
return tmpsum
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 02cbd8c..17f1731 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -1,6 +1,9 @@ # -*- coding: utf-8 -*- """ Solvers for the original linear program OT problem + + + """ # Author: Remi Flamary <remi.flamary@unice.fr> @@ -10,20 +13,22 @@ Solvers for the original linear program OT problem import multiprocessing import numpy as np +from scipy.sparse import coo_matrix from .import cvx # import compiled emd -from .emd_wrap import emd_c, check_result +from .emd_wrap import emd_c, check_result, emd_1d_sorted from ..utils import parmap from .cvx import barycenter from ..utils import dist -__all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx'] +__all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx', + 'emd_1d', 'emd2_1d', 'wasserstein_1d'] def emd(a, b, M, numItermax=100000, log=False): - """Solves the Earth Movers distance problem and returns the OT matrix + r"""Solves the Earth Movers distance problem and returns the OT matrix .. math:: @@ -37,26 +42,30 @@ def emd(a, b, M, numItermax=100000, log=False): - M is the metric cost matrix - a and b are the sample weights + .. warning:: + Note that the M matrix needs to be a C-order numpy.array in float64 + format. + Uses the algorithm proposed in [1]_ Parameters ---------- - a : (ns,) ndarray, float64 - Source histogram (uniform weigth if empty list) - b : (nt,) ndarray, float64 - Target histogram (uniform weigth if empty list) - M : (ns,nt) ndarray, float64 - loss matrix + a : (ns,) numpy.ndarray, float64 + Source histogram (uniform weight if empty list) + b : (nt,) numpy.ndarray, float64 + Target histogram (uniform weight if empty list) + M : (ns,nt) numpy.ndarray, float64 + Loss matrix (c-order array with type float64) numItermax : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. - log: boolean, optional (default=False) + log: bool, optional (default=False) If True, returns a dictionary containing the cost and dual variables. Otherwise returns only the optimal transportation matrix. Returns ------- - gamma: (ns x nt) ndarray + gamma: (ns x nt) numpy.ndarray Optimal transportation matrix for the given parameters log: dict If input log is true, a dictionary containing the cost and dual @@ -74,8 +83,8 @@ def emd(a, b, M, numItermax=100000, log=False): >>> b=[.5,.5] >>> M=[[0.,1.],[1.,0.]] >>> ot.emd(a,b,M) - array([[ 0.5, 0. ], - [ 0. , 0.5]]) + array([[0.5, 0. ], + [0. , 0.5]]) References ---------- @@ -94,7 +103,7 @@ def emd(a, b, M, numItermax=100000, log=False): b = np.asarray(b, dtype=np.float64) M = np.asarray(M, dtype=np.float64) - # if empty array given then use unifor distributions + # if empty array given then use uniform distributions if len(a) == 0: a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0] if len(b) == 0: @@ -115,7 +124,7 @@ def emd(a, b, M, numItermax=100000, log=False): def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log=False, return_matrix=False): - """Solves the Earth Movers distance problem and returns the loss + r"""Solves the Earth Movers distance problem and returns the loss .. math:: \gamma = arg\min_\gamma <\gamma,M>_F @@ -128,16 +137,20 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), - M is the metric cost matrix - a and b are the sample weights + .. warning:: + Note that the M matrix needs to be a C-order numpy.array in float64 + format. + Uses the algorithm proposed in [1]_ Parameters ---------- - a : (ns,) ndarray, float64 - Source histogram (uniform weigth if empty list) - b : (nt,) ndarray, float64 - Target histogram (uniform weigth if empty list) - M : (ns,nt) ndarray, float64 - loss matrix + a : (ns,) numpy.ndarray, float64 + Source histogram (uniform weight if empty list) + b : (nt,) numpy.ndarray, float64 + Target histogram (uniform weight if empty list) + M : (ns,nt) numpy.ndarray, float64 + Loss matrix (c-order array with type float64) numItermax : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. @@ -151,7 +164,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), ------- gamma: (ns x nt) ndarray Optimal transportation matrix for the given parameters - log: dict + log: dictnp If input log is true, a dictionary containing the cost and dual variables and exit status @@ -187,7 +200,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), b = np.asarray(b, dtype=np.float64) M = np.asarray(M, dtype=np.float64) - # if empty array given then use unifor distributions + # if empty array given then use uniform distributions if len(a) == 0: a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0] if len(b) == 0: @@ -231,9 +244,9 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None Parameters ---------- - measures_locations : list of (k_i,d) np.ndarray + measures_locations : list of (k_i,d) numpy.ndarray The discrete support of a measure supported on k_i locations of a d-dimensional space (k_i can be different for each element of the list) - measures_weights : list of (k_i,) np.ndarray + measures_weights : list of (k_i,) numpy.ndarray Numpy arrays where each numpy array has k_i non-negatives values summing to one representing the weights of each discrete input measure X_init : (k,d) np.ndarray @@ -246,7 +259,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None numItermax : int, optional Max number of iterations stopThr : float, optional - Stop threshol on error (>0) + Stop threshold on error (>0) verbose : bool, optional Print information along iterations log : bool, optional @@ -308,4 +321,288 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None log_dict['displacement_square_norms'] = displacement_square_norms return X, log_dict else: - return X
\ No newline at end of file + return X + + +def emd_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True, + log=False): + r"""Solves the Earth Movers distance problem between 1d measures and returns + the OT matrix + + + .. math:: + \gamma = arg\min_\gamma \sum_i \sum_j \gamma_{ij} d(x_a[i], x_b[j]) + + s.t. \gamma 1 = a, + \gamma^T 1= b, + \gamma\geq 0 + where : + + - d is the metric + - x_a and x_b are the samples + - a and b are the sample weights + + When 'minkowski' is used as a metric, :math:`d(x, y) = |x - y|^p`. + + Uses the algorithm detailed in [1]_ + + Parameters + ---------- + x_a : (ns,) or (ns, 1) ndarray, float64 + Source dirac locations (on the real line) + x_b : (nt,) or (ns, 1) ndarray, float64 + Target dirac locations (on the real line) + a : (ns,) ndarray, float64, optional + Source histogram (default is uniform weight) + b : (nt,) ndarray, float64, optional + Target histogram (default is uniform weight) + metric: str, optional (default='sqeuclidean') + Metric to be used. Only strings listed in :func:`ot.dist` are accepted. + Due to implementation details, this function runs faster when + `'sqeuclidean'`, `'cityblock'`, or `'euclidean'` metrics are used. + p: float, optional (default=1.0) + The p-norm to apply for if metric='minkowski' + dense: boolean, optional (default=True) + If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt). + Otherwise returns a sparse representation using scipy's `coo_matrix` + format. Due to implementation details, this function runs faster when + `'sqeuclidean'`, `'minkowski'`, `'cityblock'`, or `'euclidean'` metrics + are used. + log: boolean, optional (default=False) + If True, returns a dictionary containing the cost. + Otherwise returns only the optimal transportation matrix. + + Returns + ------- + gamma: (ns, nt) ndarray + Optimal transportation matrix for the given parameters + log: dict + If input log is True, a dictionary containing the cost + + + Examples + -------- + + Simple example with obvious solution. The function emd_1d accepts lists and + performs automatic conversion to numpy arrays + + >>> import ot + >>> a=[.5, .5] + >>> b=[.5, .5] + >>> x_a = [2., 0.] + >>> x_b = [0., 3.] + >>> ot.emd_1d(x_a, x_b, a, b) + array([[0. , 0.5], + [0.5, 0. ]]) + >>> ot.emd_1d(x_a, x_b) + array([[0. , 0.5], + [0.5, 0. ]]) + + References + ---------- + + .. [1] Peyré, G., & Cuturi, M. (2017). "Computational Optimal + Transport", 2018. + + See Also + -------- + ot.lp.emd : EMD for multidimensional distributions + ot.lp.emd2_1d : EMD for 1d distributions (returns cost instead of the + transportation matrix) + """ + a = np.asarray(a, dtype=np.float64) + b = np.asarray(b, dtype=np.float64) + x_a = np.asarray(x_a, dtype=np.float64) + x_b = np.asarray(x_b, dtype=np.float64) + + assert (x_a.ndim == 1 or x_a.ndim == 2 and x_a.shape[1] == 1), \ + "emd_1d should only be used with monodimensional data" + assert (x_b.ndim == 1 or x_b.ndim == 2 and x_b.shape[1] == 1), \ + "emd_1d should only be used with monodimensional data" + + # if empty array given then use uniform distributions + if a.ndim == 0 or len(a) == 0: + a = np.ones((x_a.shape[0],), dtype=np.float64) / x_a.shape[0] + if b.ndim == 0 or len(b) == 0: + b = np.ones((x_b.shape[0],), dtype=np.float64) / x_b.shape[0] + + x_a_1d = x_a.reshape((-1, )) + x_b_1d = x_b.reshape((-1, )) + perm_a = np.argsort(x_a_1d) + perm_b = np.argsort(x_b_1d) + + G_sorted, indices, cost = emd_1d_sorted(a, b, + x_a_1d[perm_a], x_b_1d[perm_b], + metric=metric, p=p) + G = coo_matrix((G_sorted, (perm_a[indices[:, 0]], perm_b[indices[:, 1]])), + shape=(a.shape[0], b.shape[0])) + if dense: + G = G.toarray() + if log: + log = {'cost': cost} + return G, log + return G + + +def emd2_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True, + log=False): + r"""Solves the Earth Movers distance problem between 1d measures and returns + the loss + + + .. math:: + \gamma = arg\min_\gamma \sum_i \sum_j \gamma_{ij} d(x_a[i], x_b[j]) + + s.t. \gamma 1 = a, + \gamma^T 1= b, + \gamma\geq 0 + where : + + - d is the metric + - x_a and x_b are the samples + - a and b are the sample weights + + When 'minkowski' is used as a metric, :math:`d(x, y) = |x - y|^p`. + + Uses the algorithm detailed in [1]_ + + Parameters + ---------- + x_a : (ns,) or (ns, 1) ndarray, float64 + Source dirac locations (on the real line) + x_b : (nt,) or (ns, 1) ndarray, float64 + Target dirac locations (on the real line) + a : (ns,) ndarray, float64, optional + Source histogram (default is uniform weight) + b : (nt,) ndarray, float64, optional + Target histogram (default is uniform weight) + metric: str, optional (default='sqeuclidean') + Metric to be used. Only strings listed in :func:`ot.dist` are accepted. + Due to implementation details, this function runs faster when + `'sqeuclidean'`, `'minkowski'`, `'cityblock'`, or `'euclidean'` metrics + are used. + p: float, optional (default=1.0) + The p-norm to apply for if metric='minkowski' + dense: boolean, optional (default=True) + If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt). + Otherwise returns a sparse representation using scipy's `coo_matrix` + format. Only used if log is set to True. Due to implementation details, + this function runs faster when dense is set to False. + log: boolean, optional (default=False) + If True, returns a dictionary containing the transportation matrix. + Otherwise returns only the loss. + + Returns + ------- + loss: float + Cost associated to the optimal transportation + log: dict + If input log is True, a dictionary containing the Optimal transportation + matrix for the given parameters + + + Examples + -------- + + Simple example with obvious solution. The function emd2_1d accepts lists and + performs automatic conversion to numpy arrays + + >>> import ot + >>> a=[.5, .5] + >>> b=[.5, .5] + >>> x_a = [2., 0.] + >>> x_b = [0., 3.] + >>> ot.emd2_1d(x_a, x_b, a, b) + 0.5 + >>> ot.emd2_1d(x_a, x_b) + 0.5 + + References + ---------- + + .. [1] Peyré, G., & Cuturi, M. (2017). "Computational Optimal + Transport", 2018. + + See Also + -------- + ot.lp.emd2 : EMD for multidimensional distributions + ot.lp.emd_1d : EMD for 1d distributions (returns the transportation matrix + instead of the cost) + """ + # If we do not return G (log==False), then we should not to cast it to dense + # (useless overhead) + G, log_emd = emd_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric=metric, p=p, + dense=dense and log, log=True) + cost = log_emd['cost'] + if log: + log_emd = {'G': G} + return cost, log_emd + return cost + + +def wasserstein_1d(x_a, x_b, a=None, b=None, p=1.): + r"""Solves the p-Wasserstein distance problem between 1d measures and returns + the distance + + .. math:: + \min_\gamma \left( \sum_i \sum_j \gamma_{ij} \|x_a[i] - x_b[j]\|^p \right)^{1/p} + + s.t. \gamma 1 = a, + \gamma^T 1= b, + \gamma\geq 0 + + where : + + - x_a and x_b are the samples + - a and b are the sample weights + + Uses the algorithm detailed in [1]_ + + Parameters + ---------- + x_a : (ns,) or (ns, 1) ndarray, float64 + Source dirac locations (on the real line) + x_b : (nt,) or (ns, 1) ndarray, float64 + Target dirac locations (on the real line) + a : (ns,) ndarray, float64, optional + Source histogram (default is uniform weight) + b : (nt,) ndarray, float64, optional + Target histogram (default is uniform weight) + p: float, optional (default=1.0) + The order of the p-Wasserstein distance to be computed + + Returns + ------- + dist: float + p-Wasserstein distance + + + Examples + -------- + + Simple example with obvious solution. The function wasserstein_1d accepts + lists and performs automatic conversion to numpy arrays + + >>> import ot + >>> a=[.5, .5] + >>> b=[.5, .5] + >>> x_a = [2., 0.] + >>> x_b = [0., 3.] + >>> ot.wasserstein_1d(x_a, x_b, a, b) + 0.5 + >>> ot.wasserstein_1d(x_a, x_b) + 0.5 + + References + ---------- + + .. [1] Peyré, G., & Cuturi, M. (2017). "Computational Optimal + Transport", 2018. + + See Also + -------- + ot.lp.emd_1d : EMD for 1d distributions + """ + cost_emd = emd2_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p, + dense=False, log=False) + return np.power(cost_emd, 1. / p) diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 83ee6aa..2b6c495 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -10,7 +10,10 @@ Cython linker with C solver import numpy as np cimport numpy as np +from ..utils import dist + cimport cython +cimport libc.math as math import warnings @@ -55,13 +58,16 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod - M is the metric cost matrix - a and b are the sample weights + .. warning:: + Note that the M matrix needs to be a C-order :py.cls:`numpy.array` + Parameters ---------- - a : (ns,) ndarray, float64 + a : (ns,) numpy.ndarray, float64 source histogram - b : (nt,) ndarray, float64 + b : (nt,) numpy.ndarray, float64 target histogram - M : (ns,nt) ndarray, float64 + M : (ns,nt) numpy.ndarray, float64 loss matrix max_iter : int The maximum number of iterations before stopping the optimization @@ -70,7 +76,7 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod Returns ------- - gamma: (ns x nt) ndarray + gamma: (ns x nt) numpy.ndarray Optimal transportation matrix for the given parameters """ @@ -93,3 +99,89 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod cdef int result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter) return G, cost, alpha, beta, result_code + + +@cython.boundscheck(False) +@cython.wraparound(False) +def emd_1d_sorted(np.ndarray[double, ndim=1, mode="c"] u_weights, + np.ndarray[double, ndim=1, mode="c"] v_weights, + np.ndarray[double, ndim=1, mode="c"] u, + np.ndarray[double, ndim=1, mode="c"] v, + str metric='sqeuclidean', + double p=1.): + r""" + Solves the Earth Movers distance problem between sorted 1d measures and + returns the OT matrix and the associated cost + + Parameters + ---------- + u_weights : (ns,) ndarray, float64 + Source histogram + v_weights : (nt,) ndarray, float64 + Target histogram + u : (ns,) ndarray, float64 + Source dirac locations (on the real line) + v : (nt,) ndarray, float64 + Target dirac locations (on the real line) + metric: str, optional (default='sqeuclidean') + Metric to be used. Only strings listed in :func:`ot.dist` are accepted. + Due to implementation details, this function runs faster when + `'sqeuclidean'`, `'minkowski'`, `'cityblock'`, or `'euclidean'` metrics + are used. + p: float, optional (default=1.0) + The p-norm to apply for if metric='minkowski' + + Returns + ------- + gamma: (n, ) ndarray, float64 + Values in the Optimal transportation matrix + indices: (n, 2) ndarray, int64 + Indices of the values stored in gamma for the Optimal transportation + matrix + cost + cost associated to the optimal transportation + """ + cdef double cost = 0. + cdef int n = u_weights.shape[0] + cdef int m = v_weights.shape[0] + + cdef int i = 0 + cdef double w_i = u_weights[0] + cdef int j = 0 + cdef double w_j = v_weights[0] + + cdef double m_ij = 0. + + cdef np.ndarray[double, ndim=1, mode="c"] G = np.zeros((n + m - 1, ), + dtype=np.float64) + cdef np.ndarray[long, ndim=2, mode="c"] indices = np.zeros((n + m - 1, 2), + dtype=np.int) + cdef int cur_idx = 0 + while i < n and j < m: + if metric == 'sqeuclidean': + m_ij = (u[i] - v[j]) * (u[i] - v[j]) + elif metric == 'cityblock' or metric == 'euclidean': + m_ij = math.fabs(u[i] - v[j]) + elif metric == 'minkowski': + m_ij = math.pow(math.fabs(u[i] - v[j]), p) + else: + m_ij = dist(u[i].reshape((1, 1)), v[j].reshape((1, 1)), + metric=metric)[0, 0] + if w_i < w_j or j == m - 1: + cost += m_ij * w_i + G[cur_idx] = w_i + indices[cur_idx, 0] = i + indices[cur_idx, 1] = j + i += 1 + w_j -= w_i + w_i = u_weights[i] + else: + cost += m_ij * w_j + G[cur_idx] = w_j + indices[cur_idx, 0] = i + indices[cur_idx, 1] = j + j += 1 + w_i -= w_j + w_j = v_weights[j] + cur_idx += 1 + return G[:cur_idx], indices[:cur_idx], cost diff --git a/ot/optim.py b/ot/optim.py index f94aceb..0abd9e9 100644 --- a/ot/optim.py +++ b/ot/optim.py @@ -26,14 +26,13 @@ def line_search_armijo(f, xk, pk, gfk, old_fval, Parameters ---------- - - f : function + f : callable loss function - xk : np.ndarray + xk : ndarray initial position - pk : np.ndarray + pk : ndarray descent direction - gfk : np.ndarray + gfk : ndarray gradient of f at xk old_fval : float loss value at xk @@ -161,15 +160,15 @@ def cg(a, b, M, reg, f, df, G0=None, numItermax=200, Parameters ---------- - a : np.ndarray (ns,) + a : ndarray, shape (ns,) samples weights in the source domain - b : np.ndarray (nt,) + b : ndarray, shape (nt,) samples in the target domain - M : np.ndarray (ns,nt) + M : ndarray, shape (ns, nt) loss matrix reg : float Regularization term >0 - G0 : np.ndarray (ns,nt), optional + G0 : ndarray, shape (ns,nt), optional initial guess (default is indep joint density) numItermax : int, optional Max number of iterations @@ -299,17 +298,17 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10, Parameters ---------- - a : np.ndarray (ns,) + a : ndarray, shape (ns,) samples weights in the source domain - b : np.ndarray (nt,) + b : ndarrayv (nt,) samples in the target domain - M : np.ndarray (ns,nt) + M : ndarray, shape (ns, nt) loss matrix reg1 : float Entropic Regularization term >0 reg2 : float Second Regularization term >0 - G0 : np.ndarray (ns,nt), optional + G0 : ndarray, shape (ns, nt), optional initial guess (default is indep joint density) numItermax : int, optional Max number of iterations @@ -326,15 +325,13 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10, Returns ------- - gamma : (ns x nt) ndarray + gamma : ndarray, shape (ns, nt) Optimal transportation matrix for the given parameters log : dict log dictionary return only if log==True in parameters - References ---------- - .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567. @@ -422,13 +419,12 @@ def solve_1d_linesearch_quad(a, b, c): Parameters ---------- a,b,c : float - The coefficients of the quadratic function + The coefficients of the quadratic function Returns ------- x : float The optimal value which leads to the minimal cost - """ f0 = c df0 = b @@ -1,5 +1,11 @@ """ Functions for plotting OT matrices + +.. warning:: + Note that by default the module is not import in :mod:`ot`. In order to + use it you need to explicitely import :mod:`ot.plot` + + """ # Author: Remi Flamary <remi.flamary@unice.fr> @@ -20,11 +26,11 @@ def plot1D_mat(a, b, M, title=''): Parameters ---------- - a : np.array, shape (na,) + a : ndarray, shape (na,) Source distribution - b : np.array, shape (nb,) + b : ndarray, shape (nb,) Target distribution - M : np.array, shape (na,nb) + M : ndarray, shape (na, nb) Matrix to plot """ na, nb = M.shape diff --git a/ot/stochastic.py b/ot/stochastic.py index 85c4230..13ed9cc 100644 --- a/ot/stochastic.py +++ b/ot/stochastic.py @@ -1,3 +1,9 @@ +""" +Stochastic solvers for regularized OT. + + +""" + # Author: Kilian Fatras <kilian.fatras@gmail.com> # # License: MIT License @@ -11,7 +17,7 @@ import numpy as np def coordinate_grad_semi_dual(b, M, reg, beta, i): - ''' + r''' Compute the coordinate gradient update for regularized discrete distributions for (i, :) The function computes the gradient of the semi dual problem: @@ -32,51 +38,49 @@ def coordinate_grad_semi_dual(b, M, reg, beta, i): Parameters ---------- - - b : np.ndarray(nt,) - target measure - M : np.ndarray(ns, nt) - cost matrix - reg : float nu - Regularization term > 0 - v : np.ndarray(nt,) - dual variable - i : number int - picked number i + b : ndarray, shape (nt,) + Target measure. + M : ndarray, shape (ns, nt) + Cost matrix. + reg : float + Regularization term > 0. + v : ndarray, shape (nt,) + Dual variable. + i : int + Picked number i. Returns ------- - - coordinate gradient : np.ndarray(nt,) + coordinate gradient : ndarray, shape (nt,) Examples -------- - + >>> import ot + >>> np.random.seed(0) >>> n_source = 7 >>> n_target = 4 - >>> reg = 1 - >>> numItermax = 300000 >>> a = ot.utils.unif(n_source) >>> b = ot.utils.unif(n_target) - >>> rng = np.random.RandomState(0) - >>> X_source = rng.randn(n_source, 2) - >>> Y_target = rng.randn(n_target, 2) + >>> X_source = np.random.randn(n_source, 2) + >>> Y_target = np.random.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) - >>> method = "ASGD" - >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, - method, numItermax) - >>> print(asgd_pi) + >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000) + array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06], + [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03], + [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07], + [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04], + [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01], + [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01], + [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]]) + References ---------- - [Genevay et al., 2016] : - Stochastic Optimization for Large-scale Optimal Transport, - Advances in Neural Information Processing Systems (2016), - arXiv preprint arxiv:1605.08527. - + Stochastic Optimization for Large-scale Optimal Transport, + Advances in Neural Information Processing Systems (2016), + arXiv preprint arxiv:1605.08527. ''' - r = M[i, :] - beta exp_beta = np.exp(-r / reg) * b khi = exp_beta / (np.sum(exp_beta)) @@ -84,7 +88,7 @@ def coordinate_grad_semi_dual(b, M, reg, beta, i): def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None): - ''' + r''' Compute the SAG algorithm to solve the regularized discrete measures optimal transport max problem @@ -112,42 +116,43 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None): Parameters ---------- - a : np.ndarray(ns,), - source measure - b : np.ndarray(nt,), - target measure - M : np.ndarray(ns, nt), - cost matrix - reg : float number, + a : ndarray, shape (ns,), + Source measure. + b : ndarray, shape (nt,), + Target measure. + M : ndarray, shape (ns, nt), + Cost matrix. + reg : float Regularization term > 0 - numItermax : int number - number of iteration - lr : float number - learning rate + numItermax : int + Number of iteration. + lr : float + Learning rate. Returns ------- - - v : np.ndarray(nt,) - dual variable + v : ndarray, shape (nt,) + Dual variable. Examples -------- - + >>> import ot + >>> np.random.seed(0) >>> n_source = 7 >>> n_target = 4 - >>> reg = 1 - >>> numItermax = 300000 >>> a = ot.utils.unif(n_source) >>> b = ot.utils.unif(n_target) - >>> rng = np.random.RandomState(0) - >>> X_source = rng.randn(n_source, 2) - >>> Y_target = rng.randn(n_target, 2) + >>> X_source = np.random.randn(n_source, 2) + >>> Y_target = np.random.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) - >>> method = "ASGD" - >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, - method, numItermax) - >>> print(asgd_pi) + >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000) + array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06], + [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03], + [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07], + [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04], + [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01], + [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01], + [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]]) References ---------- @@ -176,7 +181,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None): def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None): - ''' + r''' Compute the ASGD algorithm to solve the regularized semi continous measures optimal transport max problem The function solves the following optimization problem: @@ -202,50 +207,49 @@ def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None): Parameters ---------- - - b : np.ndarray(nt,) + b : ndarray, shape (nt,) target measure - M : np.ndarray(ns, nt) + M : ndarray, shape (ns, nt) cost matrix - reg : float number + reg : float Regularization term > 0 - numItermax : int number - number of iteration - lr : float number - learning rate - + numItermax : int + Number of iteration. + lr : float + Learning rate. Returns ------- - - ave_v : np.ndarray(nt,) + ave_v : ndarray, shape (nt,) dual variable Examples -------- - + >>> import ot + >>> np.random.seed(0) >>> n_source = 7 >>> n_target = 4 - >>> reg = 1 - >>> numItermax = 300000 >>> a = ot.utils.unif(n_source) >>> b = ot.utils.unif(n_target) - >>> rng = np.random.RandomState(0) - >>> X_source = rng.randn(n_source, 2) - >>> Y_target = rng.randn(n_target, 2) + >>> X_source = np.random.randn(n_source, 2) + >>> Y_target = np.random.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) - >>> method = "ASGD" - >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, - method, numItermax) - >>> print(asgd_pi) + >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000) + array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06], + [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03], + [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07], + [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04], + [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01], + [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01], + [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]]) References ---------- [Genevay et al., 2016] : - Stochastic Optimization for Large-scale Optimal Transport, - Advances in Neural Information Processing Systems (2016), - arXiv preprint arxiv:1605.08527. + Stochastic Optimization for Large-scale Optimal Transport, + Advances in Neural Information Processing Systems (2016), + arXiv preprint arxiv:1605.08527. ''' if lr is None: @@ -264,7 +268,7 @@ def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None): def c_transform_entropic(b, M, reg, beta): - ''' + r''' The goal is to recover u from the c-transform. The function computes the c_transform of a dual variable from the other @@ -285,47 +289,47 @@ def c_transform_entropic(b, M, reg, beta): Parameters ---------- - - b : np.ndarray(nt,) - target measure - M : np.ndarray(ns, nt) - cost matrix + b : ndarray, shape (nt,) + Target measure + M : ndarray, shape (ns, nt) + Cost matrix reg : float - regularization term > 0 - v : np.ndarray(nt,) - dual variable + Regularization term > 0 + v : ndarray, shape (nt,) + Dual variable. Returns ------- - - u : np.ndarray(ns,) - dual variable + u : ndarray, shape (ns,) + Dual variable. Examples -------- - + >>> import ot + >>> np.random.seed(0) >>> n_source = 7 >>> n_target = 4 - >>> reg = 1 - >>> numItermax = 300000 >>> a = ot.utils.unif(n_source) >>> b = ot.utils.unif(n_target) - >>> rng = np.random.RandomState(0) - >>> X_source = rng.randn(n_source, 2) - >>> Y_target = rng.randn(n_target, 2) + >>> X_source = np.random.randn(n_source, 2) + >>> Y_target = np.random.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) - >>> method = "ASGD" - >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, - method, numItermax) - >>> print(asgd_pi) + >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000) + array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06], + [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03], + [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07], + [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04], + [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01], + [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01], + [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]]) References ---------- [Genevay et al., 2016] : - Stochastic Optimization for Large-scale Optimal Transport, - Advances in Neural Information Processing Systems (2016), - arXiv preprint arxiv:1605.08527. + Stochastic Optimization for Large-scale Optimal Transport, + Advances in Neural Information Processing Systems (2016), + arXiv preprint arxiv:1605.08527. ''' n_source = np.shape(M)[0] @@ -340,7 +344,7 @@ def c_transform_entropic(b, M, reg, beta): def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None, log=False): - ''' + r''' Compute the transportation matrix to solve the regularized discrete measures optimal transport max problem @@ -367,52 +371,53 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None, Parameters ---------- - a : np.ndarray(ns,) + a : ndarray, shape (ns,) source measure - b : np.ndarray(nt,) + b : ndarray, shape (nt,) target measure - M : np.ndarray(ns, nt) + M : ndarray, shape (ns, nt) cost matrix - reg : float number + reg : float Regularization term > 0 methode : str used method (SAG or ASGD) - numItermax : int number + numItermax : int number of iteration - lr : float number + lr : float learning rate - n_source : int number + n_source : int size of the source measure - n_target : int number + n_target : int size of the target measure log : bool, optional record log if True Returns ------- - - pi : np.ndarray(ns, nt) + pi : ndarray, shape (ns, nt) transportation matrix log : dict log dictionary return only if log==True in parameters Examples -------- - + >>> import ot + >>> np.random.seed(0) >>> n_source = 7 >>> n_target = 4 - >>> reg = 1 - >>> numItermax = 300000 >>> a = ot.utils.unif(n_source) >>> b = ot.utils.unif(n_target) - >>> rng = np.random.RandomState(0) - >>> X_source = rng.randn(n_source, 2) - >>> Y_target = rng.randn(n_target, 2) + >>> X_source = np.random.randn(n_source, 2) + >>> Y_target = np.random.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) - >>> method = "ASGD" - >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg, - method, numItermax) - >>> print(asgd_pi) + >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000) + array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06], + [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03], + [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07], + [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04], + [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01], + [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01], + [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]]) References ---------- @@ -451,7 +456,7 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None, def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha, batch_beta): - ''' + r''' Computes the partial gradient of the dual optimal transport problem. For each (i,j) in a batch of coordinates, the partial gradients are : @@ -478,53 +483,55 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha, Parameters ---------- - - a : np.ndarray(ns,) + a : ndarray, shape (ns,) source measure - b : np.ndarray(nt,) + b : ndarray, shape (nt,) target measure - M : np.ndarray(ns, nt) + M : ndarray, shape (ns, nt) cost matrix - reg : float number + reg : float Regularization term > 0 - alpha : np.ndarray(ns,) + alpha : ndarray, shape (ns,) dual variable - beta : np.ndarray(nt,) + beta : ndarray, shape (nt,) dual variable - batch_size : int number + batch_size : int size of the batch - batch_alpha : np.ndarray(bs,) + batch_alpha : ndarray, shape (bs,) batch of index of alpha - batch_beta : np.ndarray(bs,) + batch_beta : ndarray, shape (bs,) batch of index of beta Returns ------- - - grad : np.ndarray(ns,) + grad : ndarray, shape (ns,) partial grad F Examples -------- - + >>> import ot + >>> np.random.seed(0) >>> n_source = 7 >>> n_target = 4 - >>> reg = 1 - >>> numItermax = 20000 - >>> lr = 0.1 - >>> batch_size = 3 - >>> log = True >>> a = ot.utils.unif(n_source) >>> b = ot.utils.unif(n_target) - >>> rng = np.random.RandomState(0) - >>> X_source = rng.randn(n_source, 2) - >>> Y_target = rng.randn(n_target, 2) + >>> X_source = np.random.randn(n_source, 2) + >>> Y_target = np.random.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) - >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, - batch_size, - numItermax, lr, log) - >>> print(log['alpha'], log['beta']) - >>> print(sgd_dual_pi) + >>> sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, M, reg=1, batch_size=3, numItermax=30000, lr=0.1, log=True) + >>> log['alpha'] + array([0.71759102, 1.57057384, 0.85576566, 0.1208211 , 0.59190466, + 1.197148 , 0.17805133]) + >>> log['beta'] + array([0.49741367, 0.57478564, 1.40075528, 2.75890102]) + >>> sgd_dual_pi + array([[2.09730063e-02, 8.38169324e-02, 7.50365455e-03, 8.72731415e-09], + [5.58432437e-03, 5.89881299e-04, 3.09558411e-05, 8.35469849e-07], + [3.26489515e-03, 7.15536035e-02, 2.99778211e-02, 3.02601593e-10], + [4.05390622e-02, 5.31085068e-02, 6.65191787e-02, 1.55812785e-06], + [7.82299812e-02, 6.12099102e-03, 4.44989098e-02, 2.37719187e-03], + [5.06266486e-02, 2.16230494e-03, 2.26215141e-03, 6.81514609e-04], + [6.06713990e-02, 3.98139808e-02, 5.46829338e-02, 8.62371424e-06]]) References ---------- @@ -533,7 +540,6 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha, International Conference on Learning Representation (2018), arXiv preprint arxiv:1711.02283. ''' - G = - (np.exp((alpha[batch_alpha, None] + beta[None, batch_beta] - M[batch_alpha, :][:, batch_beta]) / reg) * a[batch_alpha, None] * b[None, batch_beta]) @@ -548,7 +554,7 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha, def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr): - ''' + r''' Compute the sgd algorithm to solve the regularized discrete measures optimal transport dual problem @@ -571,33 +577,31 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr): Parameters ---------- - - a : np.ndarray(ns,) + a : ndarray, shape (ns,) source measure - b : np.ndarray(nt,) + b : ndarray, shape (nt,) target measure - M : np.ndarray(ns, nt) + M : ndarray, shape (ns, nt) cost matrix - reg : float number + reg : float Regularization term > 0 - batch_size : int number + batch_size : int size of the batch - numItermax : int number + numItermax : int number of iteration - lr : float number + lr : float learning rate Returns ------- - - alpha : np.ndarray(ns,) + alpha : ndarray, shape (ns,) dual variable - beta : np.ndarray(nt,) + beta : ndarray, shape (nt,) dual variable Examples -------- - + >>> import ot >>> n_source = 7 >>> n_target = 4 >>> reg = 1 @@ -611,18 +615,26 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr): >>> X_source = rng.randn(n_source, 2) >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) - >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, - batch_size, - numItermax, lr, log) - >>> print(log['alpha'], log['beta']) - >>> print(sgd_dual_pi) + >>> sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size, numItermax, lr, log) + >>> log['alpha'] + array([0.64171798, 1.27932201, 0.78132257, 0.15638935, 0.54888354, + 1.03663469, 0.20595781]) + >>> log['beta'] + array([0.51207194, 0.58033189, 1.28922676, 2.26859736]) + >>> sgd_dual_pi + array([[1.97276541e-02, 7.81248547e-02, 6.22136048e-03, 4.95442423e-09], + [4.23494310e-03, 4.43286263e-04, 2.06927079e-05, 3.82389139e-07], + [3.07542414e-03, 6.67897769e-02, 2.48904999e-02, 1.72030247e-10], + [4.26271990e-02, 5.53375455e-02, 6.16535024e-02, 9.88812650e-07], + [7.60423265e-02, 5.89585256e-03, 3.81267087e-02, 1.39458256e-03], + [4.37557504e-02, 1.85189176e-03, 1.72335760e-03, 3.55491279e-04], + [6.33096109e-02, 4.11683954e-02, 5.02962051e-02, 5.43097516e-06]]) References ---------- - [Seguy et al., 2018] : - International Conference on Learning Representation (2018), - arXiv preprint arxiv:1711.02283. + International Conference on Learning Representation (2018), + arXiv preprint arxiv:1711.02283. ''' n_source = np.shape(M)[0] @@ -644,7 +656,7 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr): def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1, log=False): - ''' + r''' Compute the transportation matrix to solve the regularized discrete measures optimal transport dual problem @@ -667,35 +679,33 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1, Parameters ---------- - - a : np.ndarray(ns,) + a : ndarray, shape (ns,) source measure - b : np.ndarray(nt,) + b : ndarray, shape (nt,) target measure - M : np.ndarray(ns, nt) + M : ndarray, shape (ns, nt) cost matrix - reg : float number + reg : float Regularization term > 0 - batch_size : int number + batch_size : int size of the batch - numItermax : int number + numItermax : int number of iteration - lr : float number + lr : float learning rate log : bool, optional record log if True Returns ------- - - pi : np.ndarray(ns, nt) + pi : ndarray, shape (ns, nt) transportation matrix log : dict log dictionary return only if log==True in parameters Examples -------- - + >>> import ot >>> n_source = 7 >>> n_target = 4 >>> reg = 1 @@ -709,18 +719,27 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1, >>> X_source = rng.randn(n_source, 2) >>> Y_target = rng.randn(n_target, 2) >>> M = ot.dist(X_source, Y_target) - >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg, - batch_size, - numItermax, lr, log) - >>> print(log['alpha'], log['beta']) - >>> print(sgd_dual_pi) + >>> sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size, numItermax, lr, log) + >>> log['alpha'] + array([0.64057733, 1.2683513 , 0.75610161, 0.16024284, 0.54926534, + 1.0514201 , 0.19958936]) + >>> log['beta'] + array([0.51372571, 0.58843489, 1.27993921, 2.24344807]) + >>> sgd_dual_pi + array([[1.97377795e-02, 7.86706853e-02, 6.15682001e-03, 4.82586997e-09], + [4.19566963e-03, 4.42016865e-04, 2.02777272e-05, 3.68823708e-07], + [3.00379244e-03, 6.56562018e-02, 2.40462171e-02, 1.63579656e-10], + [4.28626062e-02, 5.60031599e-02, 6.13193826e-02, 9.67977735e-07], + [7.61972739e-02, 5.94609051e-03, 3.77886693e-02, 1.36046648e-03], + [4.44810042e-02, 1.89476742e-03, 1.73285847e-03, 3.51826036e-04], + [6.30118293e-02, 4.12398660e-02, 4.95148998e-02, 5.26247246e-06]]) References ---------- [Seguy et al., 2018] : - International Conference on Learning Representation (2018), - arXiv preprint arxiv:1711.02283. + International Conference on Learning Representation (2018), + arXiv preprint arxiv:1711.02283. ''' opt_alpha, opt_beta = sgd_entropic_regularization(a, b, M, reg, batch_size, diff --git a/ot/unbalanced.py b/ot/unbalanced.py new file mode 100644 index 0000000..467fda2 --- /dev/null +++ b/ot/unbalanced.py @@ -0,0 +1,519 @@ +# -*- coding: utf-8 -*- +""" +Regularized Unbalanced OT +""" + +# Author: Hicham Janati <hicham.janati@inria.fr> +# License: MIT License + +from __future__ import division +import warnings +import numpy as np +# from .utils import unif, dist + + +def sinkhorn_unbalanced(a, b, M, reg, alpha, method='sinkhorn', numItermax=1000, + stopThr=1e-9, verbose=False, log=False, **kwargs): + r""" + Solve the unbalanced entropic regularization optimal transport problem and return the loss + + The function solves the following optimization problem: + + .. math:: + W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + \\alpha KL(\gamma 1, a) + \\alpha KL(\gamma^T 1, b) + + s.t. + \gamma\geq 0 + where : + + - M is the (ns, nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights + - KL is the Kullback-Leibler divergence + + The algorithm used for solving the problem is the generalized Sinkhorn-Knopp matrix scaling algorithm as proposed in [10, 23]_ + + + Parameters + ---------- + a : np.ndarray (ns,) + samples weights in the source domain + b : np.ndarray (nt,) or np.ndarray (nt,n_hists) + samples in the target domain, compute sinkhorn with multiple targets + and fixed M if b is a matrix (return OT loss + dual variables in log) + M : np.ndarray (ns, nt) + loss matrix + reg : float + Entropy regularization term > 0 + alpha : float + Marginal relaxation term > 0 + method : str + method used for the solver either 'sinkhorn', 'sinkhorn_stabilized' or + 'sinkhorn_epsilon_scaling', see those function for specific parameters + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (> 0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + + Returns + ------- + W : (nt) ndarray or float + Optimal transportation matrix for the given parameters + log : dict + log dictionary return only if log==True in parameters + + Examples + -------- + + >>> import ot + >>> a=[.5, .5] + >>> b=[.5, .5] + >>> M=[[0., 1.], [1., 0.]] + >>> ot.sinkhorn_unbalanced(a, b, M, 1, 1) + array([[0.51122823, 0.18807035], + [0.18807035, 0.51122823]]) + + + References + ---------- + + .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013 + + .. [9] Schmitzer, B. (2016). Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. arXiv preprint arXiv:1610.06519. + + .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816. + + .. [25] Frogner C., Zhang C., Mobahi H., Araya-Polo M., Poggio T. : Learning with a Wasserstein Loss, Advances in Neural Information Processing Systems (NIPS) 2015 + + + See Also + -------- + ot.unbalanced.sinkhorn_knopp_unbalanced : Unbalanced Classic Sinkhorn [10] + ot.unbalanced.sinkhorn_stabilized_unbalanced: Unbalanced Stabilized sinkhorn [9][10] + ot.unbalanced.sinkhorn_epsilon_scaling_unbalanced: Unbalanced Sinkhorn with epslilon scaling [9][10] + + """ + + if method.lower() == 'sinkhorn': + def sink(): + return sinkhorn_knopp_unbalanced(a, b, M, reg, alpha, + numItermax=numItermax, + stopThr=stopThr, verbose=verbose, + log=log, **kwargs) + + elif method.lower() in ['sinkhorn_stabilized', 'sinkhorn_epsilon_scaling']: + warnings.warn('Method not implemented yet. Using classic Sinkhorn Knopp') + + def sink(): + return sinkhorn_knopp_unbalanced(a, b, M, reg, alpha, + numItermax=numItermax, + stopThr=stopThr, verbose=verbose, + log=log, **kwargs) + else: + raise ValueError('Unknown method. Using classic Sinkhorn Knopp') + + return sink() + + +def sinkhorn_unbalanced2(a, b, M, reg, alpha, method='sinkhorn', + numItermax=1000, stopThr=1e-9, verbose=False, + log=False, **kwargs): + r""" + Solve the entropic regularization unbalanced optimal transport problem and return the loss + + The function solves the following optimization problem: + + .. math:: + W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + \\alpha KL(\gamma 1, a) + \\alpha KL(\gamma^T 1, b) + + s.t. + \gamma\geq 0 + where : + + - M is the (ns, nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights + - KL is the Kullback-Leibler divergence + + The algorithm used for solving the problem is the generalized Sinkhorn-Knopp matrix scaling algorithm as proposed in [10, 23]_ + + + Parameters + ---------- + a : np.ndarray (ns,) + samples weights in the source domain + b : np.ndarray (nt,) or np.ndarray (nt, n_hists) + samples in the target domain, compute sinkhorn with multiple targets + and fixed M if b is a matrix (return OT loss + dual variables in log) + M : np.ndarray (ns,nt) + loss matrix + reg : float + Entropy regularization term > 0 + alpha : float + Marginal relaxation term > 0 + method : str + method used for the solver either 'sinkhorn', 'sinkhorn_stabilized' or + 'sinkhorn_epsilon_scaling', see those function for specific parameters + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + + Returns + ------- + W : (nt) ndarray or float + Optimal transportation matrix for the given parameters + log : dict + log dictionary return only if log==True in parameters + + Examples + -------- + + >>> import ot + >>> a=[.5, .10] + >>> b=[.5, .5] + >>> M=[[0., 1.],[1., 0.]] + >>> ot.unbalanced.sinkhorn_unbalanced2(a, b, M, 1., 1.) + array([0.31912866]) + + + + References + ---------- + + .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013 + + .. [9] Schmitzer, B. (2016). Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. arXiv preprint arXiv:1610.06519. + + .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816. + + .. [25] Frogner C., Zhang C., Mobahi H., Araya-Polo M., Poggio T. : Learning with a Wasserstein Loss, Advances in Neural Information Processing Systems (NIPS) 2015 + + See Also + -------- + ot.unbalanced.sinkhorn_knopp : Unbalanced Classic Sinkhorn [10] + ot.unbalanced.sinkhorn_stabilized: Unbalanced Stabilized sinkhorn [9][10] + ot.unbalanced.sinkhorn_epsilon_scaling: Unbalanced Sinkhorn with epslilon scaling [9][10] + + """ + + if method.lower() == 'sinkhorn': + def sink(): + return sinkhorn_knopp_unbalanced(a, b, M, reg, alpha, + numItermax=numItermax, + stopThr=stopThr, verbose=verbose, + log=log, **kwargs) + + elif method.lower() in ['sinkhorn_stabilized', 'sinkhorn_epsilon_scaling']: + warnings.warn('Method not implemented yet. Using classic Sinkhorn Knopp') + + def sink(): + return sinkhorn_knopp_unbalanced(a, b, M, reg, alpha, + numItermax=numItermax, + stopThr=stopThr, verbose=verbose, + log=log, **kwargs) + else: + raise ValueError('Unknown method. Using classic Sinkhorn Knopp') + + b = np.asarray(b, dtype=np.float64) + if len(b.shape) < 2: + b = b[:, None] + + return sink() + + +def sinkhorn_knopp_unbalanced(a, b, M, reg, alpha, numItermax=1000, + stopThr=1e-9, verbose=False, log=False, **kwargs): + r""" + Solve the entropic regularization unbalanced optimal transport problem and return the loss + + The function solves the following optimization problem: + + .. math:: + W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + \\alpha KL(\gamma 1, a) + \\alpha KL(\gamma^T 1, b) + + s.t. + \gamma\geq 0 + where : + + - M is the (ns, nt) metric cost matrix + - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})` + - a and b are source and target weights + - KL is the Kullback-Leibler divergence + + The algorithm used for solving the problem is the generalized Sinkhorn-Knopp matrix scaling algorithm as proposed in [10, 23]_ + + + Parameters + ---------- + a : np.ndarray (ns,) + samples weights in the source domain + b : np.ndarray (nt,) or np.ndarray (nt, n_hists) + samples in the target domain, compute sinkhorn with multiple targets + and fixed M if b is a matrix (return OT loss + dual variables in log) + M : np.ndarray (ns,nt) + loss matrix + reg : float + Entropy regularization term > 0 + alpha : float + Marginal relaxation term > 0 + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + + Returns + ------- + gamma : (ns x nt) ndarray + Optimal transportation matrix for the given parameters + log : dict + log dictionary return only if log==True in parameters + + Examples + -------- + + >>> import ot + >>> a=[.5, .5] + >>> b=[.5, .5] + >>> M=[[0., 1.],[1., 0.]] + >>> ot.unbalanced.sinkhorn_knopp_unbalanced(a, b, M, 1., 1.) + array([[0.51122823, 0.18807035], + [0.18807035, 0.51122823]]) + + References + ---------- + + .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816. + + .. [25] Frogner C., Zhang C., Mobahi H., Araya-Polo M., Poggio T. : Learning with a Wasserstein Loss, Advances in Neural Information Processing Systems (NIPS) 2015 + + See Also + -------- + ot.lp.emd : Unregularized OT + ot.optim.cg : General regularized OT + + """ + + a = np.asarray(a, dtype=np.float64) + b = np.asarray(b, dtype=np.float64) + M = np.asarray(M, dtype=np.float64) + + n_a, n_b = M.shape + + if len(a) == 0: + a = np.ones(n_a, dtype=np.float64) / n_a + if len(b) == 0: + b = np.ones(n_b, dtype=np.float64) / n_b + + if len(b.shape) > 1: + n_hists = b.shape[1] + else: + n_hists = 0 + + if log: + log = {'err': []} + + # we assume that no distances are null except those of the diagonal of + # distances + if n_hists: + u = np.ones((n_a, 1)) / n_a + v = np.ones((n_b, n_hists)) / n_b + a = a.reshape(n_a, 1) + else: + u = np.ones(n_a) / n_a + v = np.ones(n_b) / n_b + + # print(reg) + # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute + K = np.empty(M.shape, dtype=M.dtype) + np.divide(M, -reg, out=K) + np.exp(K, out=K) + + # print(np.min(K)) + fi = alpha / (alpha + reg) + + cpt = 0 + err = 1. + + while (err > stopThr and cpt < numItermax): + uprev = u + vprev = v + + Kv = K.dot(v) + u = (a / Kv) ** fi + Ktu = K.T.dot(u) + v = (b / Ktu) ** fi + + if (np.any(Ktu == 0.) + or np.any(np.isnan(u)) or np.any(np.isnan(v)) + or np.any(np.isinf(u)) or np.any(np.isinf(v))): + # we have reached the machine precision + # come back to previous solution and quit loop + warnings.warn('Numerical errors at iteration', cpt) + u = uprev + v = vprev + break + if cpt % 10 == 0: + # we can speed up the process by checking for the error only all + # the 10th iterations + err = np.sum((u - uprev)**2) / np.sum((u)**2) + \ + np.sum((v - vprev)**2) / np.sum((v)**2) + if log: + log['err'].append(err) + if verbose: + if cpt % 200 == 0: + print( + '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) + print('{:5d}|{:8e}|'.format(cpt, err)) + cpt += 1 + + if log: + log['u'] = u + log['v'] = v + + if n_hists: # return only loss + res = np.einsum('ik,ij,jk,ij->k', u, K, v, M) + if log: + return res, log + else: + return res + + else: # return OT matrix + + if log: + return u[:, None] * K * v[None, :], log + else: + return u[:, None] * K * v[None, :] + + +def barycenter_unbalanced(A, M, reg, alpha, weights=None, numItermax=1000, + stopThr=1e-4, verbose=False, log=False): + r"""Compute the entropic regularized unbalanced wasserstein barycenter of distributions A + + The function solves the following optimization problem: + + .. math:: + \mathbf{a} = arg\min_\mathbf{a} \sum_i Wu_{reg}(\mathbf{a},\mathbf{a}_i) + + where : + + - :math:`Wu_{reg}(\cdot,\cdot)` is the unbalanced entropic regularized Wasserstein distance (see ot.unbalanced.sinkhorn_unbalanced) + - :math:`\mathbf{a}_i` are training distributions in the columns of matrix :math:`\mathbf{A}` + - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT + - alpha is the marginal relaxation hyperparameter + The algorithm used for solving the problem is the generalized Sinkhorn-Knopp matrix scaling algorithm as proposed in [10]_ + + Parameters + ---------- + A : np.ndarray (d,n) + n training distributions a_i of size d + M : np.ndarray (d,d) + loss matrix for OT + reg : float + Entropy regularization term > 0 + alpha : float + Marginal relaxation term > 0 + weights : np.ndarray (n,) + Weights of each histogram a_i on the simplex (barycentric coodinates) + numItermax : int, optional + Max number of iterations + stopThr : float, optional + Stop threshol on error (>0) + verbose : bool, optional + Print information along iterations + log : bool, optional + record log if True + + + Returns + ------- + a : (d,) ndarray + Unbalanced Wasserstein barycenter + log : dict + log dictionary return only if log==True in parameters + + + References + ---------- + + .. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G. (2015). Iterative Bregman projections for regularized transportation problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138. + .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816. + + + """ + p, n_hists = A.shape + if weights is None: + weights = np.ones(n_hists) / n_hists + else: + assert(len(weights) == A.shape[1]) + + if log: + log = {'err': []} + + K = np.exp(- M / reg) + + fi = alpha / (alpha + reg) + + v = np.ones((p, n_hists)) / p + u = np.ones((p, 1)) / p + + cpt = 0 + err = 1. + + while (err > stopThr and cpt < numItermax): + uprev = u + vprev = v + + Kv = K.dot(v) + u = (A / Kv) ** fi + Ktu = K.T.dot(u) + q = ((Ktu ** (1 - fi)).dot(weights)) + q = q ** (1 / (1 - fi)) + Q = q[:, None] + v = (Q / Ktu) ** fi + + if (np.any(Ktu == 0.) + or np.any(np.isnan(u)) or np.any(np.isnan(v)) + or np.any(np.isinf(u)) or np.any(np.isinf(v))): + # we have reached the machine precision + # come back to previous solution and quit loop + warnings.warn('Numerical errors at iteration %s' % cpt) + u = uprev + v = vprev + break + if cpt % 10 == 0: + # we can speed up the process by checking for the error only all + # the 10th iterations + err = np.sum((u - uprev) ** 2) / np.sum((u) ** 2) + \ + np.sum((v - vprev) ** 2) / np.sum((v) ** 2) + if log: + log['err'].append(err) + if verbose: + if cpt % 50 == 0: + print( + '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19) + print('{:5d}|{:8e}|'.format(cpt, err)) + + cpt += 1 + if log: + log['niter'] = cpt + log['u'] = u + log['v'] = v + return q, log + else: + return q diff --git a/ot/utils.py b/ot/utils.py index efd1288..8419c83 100644 --- a/ot/utils.py +++ b/ot/utils.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- """ -Various function that can be usefull +Various useful functions """ # Author: Remi Flamary <remi.flamary@unice.fr> @@ -111,12 +111,12 @@ def dist(x1, x2=None, metric='sqeuclidean'): Parameters ---------- - x1 : np.array (n1,d) + x1 : ndarray, shape (n1,d) matrix with n1 samples of size d - x2 : np.array (n2,d), optional + x2 : array, shape (n2,d), optional matrix with n2 samples of size d (if None then x2=x1) - metric : str, fun, optional - name of the metric to be computed (full list in the doc of scipy), If a string, + metric : str | callable, optional + Name of the metric to be computed (full list in the doc of scipy), If a string, the distance function can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock', 'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski', 'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean', @@ -138,26 +138,21 @@ def dist(x1, x2=None, metric='sqeuclidean'): def dist0(n, method='lin_square'): - """Compute standard cost matrices of size (n,n) for OT problems + """Compute standard cost matrices of size (n, n) for OT problems Parameters ---------- - n : int - size of the cost matrix + Size of the cost matrix. method : str, optional Type of loss matrix chosen from: * 'lin_square' : linear sampling between 0 and n-1, quadratic loss - Returns ------- - - M : np.array (n1,n2) - distance matrix computed with given metric - - + M : ndarray, shape (n1,n2) + Distance matrix computed with given metric. """ res = 0 if method == 'lin_square': @@ -169,22 +164,18 @@ def dist0(n, method='lin_square'): def cost_normalization(C, norm=None): """ Apply normalization to the loss matrix - Parameters ---------- - C : np.array (n1, n2) + C : ndarray, shape (n1, n2) The cost matrix to normalize. norm : str - type of normalization from 'median','max','log','loglog'. Any other - value do not normalize. - + Type of normalization from 'median', 'max', 'log', 'loglog'. Any + other value do not normalize. Returns ------- - - C : np.array (n1, n2) + C : ndarray, shape (n1, n2) The input cost matrix normalized according to given norm. - """ if norm == "median": @@ -194,7 +185,7 @@ def cost_normalization(C, norm=None): elif norm == "log": C = np.log(1 + C) elif norm == "loglog": - C = np.log(1 + np.log(1 + C)) + C = np.log1p(np.log1p(C)) return C @@ -256,6 +247,7 @@ def check_params(**kwargs): def check_random_state(seed): """Turn seed into a np.random.RandomState instance + Parameters ---------- seed : None | int | instance of RandomState @@ -275,7 +267,6 @@ def check_random_state(seed): class deprecated(object): - """Decorator to mark a function or class as deprecated. deprecated class from scikit-learn package @@ -285,14 +276,14 @@ class deprecated(object): The optional extra argument will be appended to the deprecation message and the docstring. Note: to use this with the default value for extra, put in an empty of parentheses: - >>> from ot.deprecation import deprecated - >>> @deprecated() - ... def some_function(): pass + >>> from ot.deprecation import deprecated # doctest: +SKIP + >>> @deprecated() # doctest: +SKIP + ... def some_function(): pass # doctest: +SKIP Parameters ---------- - extra : string - to be added to the deprecation messages + extra : str + To be added to the deprecation messages. """ # Adapted from http://wiki.python.org/moin/PythonDecoratorLibrary, @@ -373,9 +364,9 @@ def _is_deprecated(func): class BaseEstimator(object): - """Base class for most objects in POT - adapted from sklearn BaseEstimator class + + Code adapted from sklearn BaseEstimator class Notes ----- @@ -417,7 +408,7 @@ class BaseEstimator(object): Parameters ---------- - deep : boolean, optional + deep : bool, optional If True, will return the parameters for this estimator and contained subobjects that are estimators. |