summaryrefslogtreecommitdiff
path: root/ot
diff options
context:
space:
mode:
Diffstat (limited to 'ot')
-rw-r--r--ot/__init__.py55
-rw-r--r--ot/bregman.py1272
-rw-r--r--ot/da.py252
-rw-r--r--ot/datasets.py32
-rw-r--r--ot/dr.py63
-rw-r--r--ot/externals/funcsigs.py46
-rw-r--r--ot/gpu/__init__.py6
-rw-r--r--ot/gpu/bregman.py11
-rw-r--r--ot/gromov.py738
-rw-r--r--ot/lp/EMD.h5
-rw-r--r--ot/lp/EMD_wrapper.cpp191
-rw-r--r--ot/lp/__init__.py602
-rw-r--r--ot/lp/emd_wrap.pyx150
-rw-r--r--ot/lp/network_simplex_simple.h2
-rw-r--r--ot/optim.py179
-rw-r--r--ot/plot.py12
-rw-r--r--ot/stochastic.py418
-rw-r--r--ot/unbalanced.py1023
-rw-r--r--ot/utils.py103
19 files changed, 4259 insertions, 901 deletions
diff --git a/ot/__init__.py b/ot/__init__.py
index b74b924..89c7936 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -1,6 +1,46 @@
-"""Python Optimal Transport toolbox
+"""
+
+This is the main module of the POT toolbox. It provides easy access to
+a number of sub-modules and functions described below.
+
+.. note::
+
+
+ Here is a list of the submodules and short description of what they contain.
+
+ - :any:`ot.lp` contains OT solvers for the exact (Linear Program) OT problems.
+ - :any:`ot.bregman` contains OT solvers for the entropic OT problems using
+ Bregman projections.
+ - :any:`ot.lp` contains OT solvers for the exact (Linear Program) OT problems.
+ - :any:`ot.smooth` contains OT solvers for the regularized (l2 and kl) smooth OT
+ problems.
+ - :any:`ot.gromov` contains solvers for Gromov-Wasserstein and Fused Gromov
+ Wasserstein problems.
+ - :any:`ot.optim` contains generic solvers OT based optimization problems
+ - :any:`ot.da` contains classes and function related to Monge mapping
+ estimation and Domain Adaptation (DA).
+ - :any:`ot.gpu` contains GPU (cupy) implementation of some OT solvers
+ - :any:`ot.dr` contains Dimension Reduction (DR) methods such as Wasserstein
+ Discriminant Analysis.
+ - :any:`ot.utils` contains utility functions such as distance computation and
+ timing.
+ - :any:`ot.datasets` contains toy dataset generation functions.
+ - :any:`ot.plot` contains visualization functions
+ - :any:`ot.stochastic` contains stochastic solvers for regularized OT.
+ - :any:`ot.unbalanced` contains solvers for regularized unbalanced OT.
+
+.. warning::
+ The list of automatically imported sub-modules is as follows:
+ :py:mod:`ot.lp`, :py:mod:`ot.bregman`, :py:mod:`ot.optim`
+ :py:mod:`ot.utils`, :py:mod:`ot.datasets`,
+ :py:mod:`ot.gromov`, :py:mod:`ot.smooth`
+ :py:mod:`ot.stochastic`
+ The following sub-modules are not imported due to additional dependencies:
+ - :any:`ot.dr` : depends on :code:`pymanopt` and :code:`autograd`.
+ - :any:`ot.gpu` : depends on :code:`cupy` and a CUDA GPU.
+ - :any:`ot.plot` : depends on :code:`matplotlib`
"""
@@ -20,17 +60,22 @@ from . import da
from . import gromov
from . import smooth
from . import stochastic
+from . import unbalanced
# OT functions
-from .lp import emd, emd2
+from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d
from .bregman import sinkhorn, sinkhorn2, barycenter
+from .unbalanced import sinkhorn_unbalanced, barycenter_unbalanced, sinkhorn_unbalanced2
from .da import sinkhorn_lpl1_mm
# utils functions
from .utils import dist, unif, tic, toc, toq
-__version__ = "0.5.1"
+__version__ = "0.6.0"
-__all__ = ["emd", "emd2", "sinkhorn", "sinkhorn2", "utils", 'datasets',
+__all__ = ['emd', 'emd2', 'emd_1d', 'sinkhorn', 'sinkhorn2', 'utils', 'datasets',
'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov',
- 'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim']
+ 'emd_1d', 'emd2_1d', 'wasserstein_1d',
+ 'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim',
+ 'sinkhorn_unbalanced', 'barycenter_unbalanced',
+ 'sinkhorn_unbalanced2']
diff --git a/ot/bregman.py b/ot/bregman.py
index d1057ff..2707b7c 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -5,15 +5,22 @@ Bregman projections for regularized OT
# Author: Remi Flamary <remi.flamary@unice.fr>
# Nicolas Courty <ncourty@irisa.fr>
+# Kilian Fatras <kilian.fatras@irisa.fr>
+# Titouan Vayer <titouan.vayer@irisa.fr>
+# Hicham Janati <hicham.janati@inria.fr>
+# Mokhtar Z. Alaya <mokhtarzahdi.alaya@gmail.com>
#
# License: MIT License
import numpy as np
+import warnings
+from .utils import unif, dist
+from scipy.optimize import fmin_l_bfgs_b
def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
stopThr=1e-9, verbose=False, log=False, **kwargs):
- u"""
+ r"""
Solve the entropic regularization optimal transport problem and return the OT matrix
The function solves the following optimization problem:
@@ -28,21 +35,21 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
\gamma\geq 0
where :
- - M is the (ns,nt) metric cost matrix
+ - M is the (dim_a, dim_b) metric cost matrix
- :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target weights (sum to 1)
+ - a and b are source and target weights (histograms, both sum to 1)
The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [2]_
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (dim_a,)
samples weights in the source domain
- b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+ b : ndarray, shape (dim_b,) or ndarray, shape (dim_b, n_hists)
samples in the target domain, compute sinkhorn with multiple targets
and fixed M if b is a matrix (return OT loss + dual variables in log)
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (dim_a, dim_b)
loss matrix
reg : float
Regularization term >0
@@ -61,7 +68,7 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (dim_a, dim_b)
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
@@ -70,12 +77,12 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
--------
>>> import ot
- >>> a=[.5,.5]
- >>> b=[.5,.5]
- >>> M=[[0.,1.],[1.,0.]]
- >>> ot.sinkhorn(a,b,M,1)
- array([[ 0.36552929, 0.13447071],
- [ 0.13447071, 0.36552929]])
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.], [1., 0.]]
+ >>> ot.sinkhorn(a, b, M, 1)
+ array([[0.36552929, 0.13447071],
+ [0.13447071, 0.36552929]])
References
@@ -100,34 +107,28 @@ def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
"""
if method.lower() == 'sinkhorn':
- def sink():
- return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax,
- stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose, log=log,
+ **kwargs)
elif method.lower() == 'greenkhorn':
- def sink():
- return greenkhorn(a, b, M, reg, numItermax=numItermax,
- stopThr=stopThr, verbose=verbose, log=log)
+ return greenkhorn(a, b, M, reg, numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose, log=log)
elif method.lower() == 'sinkhorn_stabilized':
- def sink():
- return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax,
- stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose,
+ log=log, **kwargs)
elif method.lower() == 'sinkhorn_epsilon_scaling':
- def sink():
- return sinkhorn_epsilon_scaling(
- a, b, M, reg, numItermax=numItermax,
- stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ return sinkhorn_epsilon_scaling(a, b, M, reg,
+ numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose,
+ log=log, **kwargs)
else:
- print('Warning : unknown method using classic Sinkhorn Knopp')
-
- def sink():
- return sinkhorn_knopp(a, b, M, reg, **kwargs)
-
- return sink()
+ raise ValueError("Unknown method '%s'." % method)
def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
stopThr=1e-9, verbose=False, log=False, **kwargs):
- u"""
+ r"""
Solve the entropic regularization optimal transport problem and return the loss
The function solves the following optimization problem:
@@ -142,21 +143,21 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
\gamma\geq 0
where :
- - M is the (ns,nt) metric cost matrix
+ - M is the (dim_a, dim_b) metric cost matrix
- :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target weights (sum to 1)
+ - a and b are source and target weights (histograms, both sum to 1)
The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [2]_
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (dim_a,)
samples weights in the source domain
- b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+ b : ndarray, shape (dim_b,) or ndarray, shape (dim_b, n_hists)
samples in the target domain, compute sinkhorn with multiple targets
and fixed M if b is a matrix (return OT loss + dual variables in log)
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (dim_a, dim_b)
loss matrix
reg : float
Regularization term >0
@@ -172,11 +173,10 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
log : bool, optional
record log if True
-
Returns
-------
- W : (nt) ndarray or float
- Optimal transportation matrix for the given parameters
+ W : (n_hists) ndarray or float
+ Optimal transportation loss for the given parameters
log : dict
log dictionary return only if log==True in parameters
@@ -184,11 +184,11 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
--------
>>> import ot
- >>> a=[.5,.5]
- >>> b=[.5,.5]
- >>> M=[[0.,1.],[1.,0.]]
- >>> ot.sinkhorn2(a,b,M,1)
- array([ 0.26894142])
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.], [1., 0.]]
+ >>> ot.sinkhorn2(a, b, M, 1)
+ array([0.26894142])
@@ -215,36 +215,28 @@ def sinkhorn2(a, b, M, reg, method='sinkhorn', numItermax=1000,
ot.bregman.sinkhorn_epsilon_scaling: Sinkhorn with epslilon scaling [9][10]
"""
-
+ b = np.asarray(b, dtype=np.float64)
+ if len(b.shape) < 2:
+ b = b[:, None]
if method.lower() == 'sinkhorn':
- def sink():
- return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax,
- stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ return sinkhorn_knopp(a, b, M, reg, numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose, log=log,
+ **kwargs)
elif method.lower() == 'sinkhorn_stabilized':
- def sink():
- return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax,
- stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ return sinkhorn_stabilized(a, b, M, reg, numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose, log=log,
+ **kwargs)
elif method.lower() == 'sinkhorn_epsilon_scaling':
- def sink():
- return sinkhorn_epsilon_scaling(
- a, b, M, reg, numItermax=numItermax,
- stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ return sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose,
+ log=log, **kwargs)
else:
- print('Warning : unknown method using classic Sinkhorn Knopp')
-
- def sink():
- return sinkhorn_knopp(a, b, M, reg, **kwargs)
-
- b = np.asarray(b, dtype=np.float64)
- if len(b.shape) < 2:
- b = b.reshape((-1, 1))
-
- return sink()
+ raise ValueError("Unknown method '%s'." % method)
def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
stopThr=1e-9, verbose=False, log=False, **kwargs):
- """
+ r"""
Solve the entropic regularization optimal transport problem and return the OT matrix
The function solves the following optimization problem:
@@ -259,21 +251,21 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
\gamma\geq 0
where :
- - M is the (ns,nt) metric cost matrix
+ - M is the (dim_a, dim_b) metric cost matrix
- :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target weights (sum to 1)
+ - a and b are source and target weights (histograms, both sum to 1)
The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [2]_
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (dim_a,)
samples weights in the source domain
- b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+ b : ndarray, shape (dim_b,) or ndarray, shape (dim_b, n_hists)
samples in the target domain, compute sinkhorn with multiple targets
and fixed M if b is a matrix (return OT loss + dual variables in log)
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (dim_a, dim_b)
loss matrix
reg : float
Regularization term >0
@@ -286,10 +278,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
log : bool, optional
record log if True
-
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (dim_a, dim_b)
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
@@ -298,12 +289,12 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
--------
>>> import ot
- >>> a=[.5,.5]
- >>> b=[.5,.5]
- >>> M=[[0.,1.],[1.,0.]]
- >>> ot.sinkhorn(a,b,M,1)
- array([[ 0.36552929, 0.13447071],
- [ 0.13447071, 0.36552929]])
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.], [1., 0.]]
+ >>> ot.sinkhorn(a, b, M, 1)
+ array([[0.36552929, 0.13447071],
+ [0.13447071, 0.36552929]])
References
@@ -329,25 +320,25 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
# init data
- Nini = len(a)
- Nfin = len(b)
+ dim_a = len(a)
+ dim_b = len(b)
if len(b.shape) > 1:
- nbb = b.shape[1]
+ n_hists = b.shape[1]
else:
- nbb = 0
+ n_hists = 0
if log:
log = {'err': []}
# we assume that no distances are null except those of the diagonal of
# distances
- if nbb:
- u = np.ones((Nini, nbb)) / Nini
- v = np.ones((Nfin, nbb)) / Nfin
+ if n_hists:
+ u = np.ones((dim_a, n_hists)) / dim_a
+ v = np.ones((dim_b, n_hists)) / dim_b
else:
- u = np.ones(Nini) / Nini
- v = np.ones(Nfin) / Nfin
+ u = np.ones(dim_a) / dim_a
+ v = np.ones(dim_b) / dim_b
# print(reg)
@@ -370,9 +361,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
v = np.divide(b, KtransposeU)
u = 1. / np.dot(Kp, v)
- if (np.any(KtransposeU == 0) or
- np.any(np.isnan(u)) or np.any(np.isnan(v)) or
- np.any(np.isinf(u)) or np.any(np.isinf(v))):
+ if (np.any(KtransposeU == 0)
+ or np.any(np.isnan(u)) or np.any(np.isnan(v))
+ or np.any(np.isinf(u)) or np.any(np.isinf(v))):
# we have reached the machine precision
# come back to previous solution and quit loop
print('Warning: numerical errors at iteration', cpt)
@@ -382,13 +373,12 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
if cpt % 10 == 0:
# we can speed up the process by checking for the error only all
# the 10th iterations
- if nbb:
- err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
- np.sum((v - vprev)**2) / np.sum((v)**2)
+ if n_hists:
+ np.einsum('ik,ij,jk->jk', u, K, v, out=tmp2)
else:
# compute right marginal tmp2= (diag(u)Kdiag(v))^T1
np.einsum('i,ij,j->j', u, K, v, out=tmp2)
- err = np.linalg.norm(tmp2 - b)**2 # violation of marginal
+ err = np.linalg.norm(tmp2 - b) # violation of marginal
if log:
log['err'].append(err)
@@ -402,7 +392,7 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
log['u'] = u
log['v'] = v
- if nbb: # return only loss
+ if n_hists: # return only loss
res = np.einsum('ik,ij,jk,ij->k', u, K, v, M)
if log:
return res, log
@@ -417,8 +407,9 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000,
return u.reshape((-1, 1)) * K * v.reshape((1, -1))
-def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=False):
- """
+def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False,
+ log=False):
+ r"""
Solve the entropic regularization optimal transport problem and return the OT matrix
The algorithm used is based on the paper
@@ -441,20 +432,20 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=
\gamma\geq 0
where :
- - M is the (ns,nt) metric cost matrix
+ - M is the (dim_a, dim_b) metric cost matrix
- :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target weights (sum to 1)
+ - a and b are source and target weights (histograms, both sum to 1)
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (dim_a,)
samples weights in the source domain
- b : np.ndarray (nt,) or np.ndarray (nt,nbb)
+ b : ndarray, shape (dim_b,) or ndarray, shape (dim_b, n_hists)
samples in the target domain, compute sinkhorn with multiple targets
and fixed M if b is a matrix (return OT loss + dual variables in log)
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (dim_a, dim_b)
loss matrix
reg : float
Regularization term >0
@@ -465,10 +456,9 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=
log : bool, optional
record log if True
-
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (dim_a, dim_b)
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
@@ -477,12 +467,12 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=
--------
>>> import ot
- >>> a=[.5,.5]
- >>> b=[.5,.5]
- >>> M=[[0.,1.],[1.,0.]]
- >>> ot.bregman.greenkhorn(a,b,M,1)
- array([[ 0.36552929, 0.13447071],
- [ 0.13447071, 0.36552929]])
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.], [1., 0.]]
+ >>> ot.bregman.greenkhorn(a, b, M, 1)
+ array([[0.36552929, 0.13447071],
+ [0.13447071, 0.36552929]])
References
@@ -499,22 +489,33 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=
"""
- n = a.shape[0]
- m = b.shape[0]
+ a = np.asarray(a, dtype=np.float64)
+ b = np.asarray(b, dtype=np.float64)
+ M = np.asarray(M, dtype=np.float64)
+
+ if len(a) == 0:
+ a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
+ if len(b) == 0:
+ b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
+
+ dim_a = a.shape[0]
+ dim_b = b.shape[0]
# Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
K = np.empty_like(M)
np.divide(M, -reg, out=K)
np.exp(K, out=K)
- u = np.full(n, 1. / n)
- v = np.full(m, 1. / m)
+ u = np.full(dim_a, 1. / dim_a)
+ v = np.full(dim_b, 1. / dim_b)
G = u[:, np.newaxis] * K * v[np.newaxis, :]
viol = G.sum(1) - a
viol_2 = G.sum(0) - b
stopThr_val = 1
+
if log:
+ log = dict()
log['u'] = u
log['v'] = v
@@ -560,8 +561,9 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False, log=
def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
- warmstart=None, verbose=False, print_period=20, log=False, **kwargs):
- """
+ warmstart=None, verbose=False, print_period=20,
+ log=False, **kwargs):
+ r"""
Solve the entropic regularization OT problem with log stabilization
The function solves the following optimization problem:
@@ -576,9 +578,10 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
\gamma\geq 0
where :
- - M is the (ns,nt) metric cost matrix
+ - M is the (dim_a, dim_b) metric cost matrix
- :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target weights (sum to 1)
+ - a and b are source and target weights (histograms, both sum to 1)
+
The algorithm used for solving the problem is the Sinkhorn-Knopp matrix
scaling algorithm as proposed in [2]_ but with the log stabilization
@@ -587,11 +590,11 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (dim_a,)
samples weights in the source domain
- b : np.ndarray (nt,)
+ b : ndarray, shape (dim_b,)
samples in the target domain
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (dim_a, dim_b)
loss matrix
reg : float
Regularization term >0
@@ -608,10 +611,9 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
log : bool, optional
record log if True
-
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (dim_a, dim_b)
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
@@ -623,9 +625,9 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
>>> a=[.5,.5]
>>> b=[.5,.5]
>>> M=[[0.,1.],[1.,0.]]
- >>> ot.bregman.sinkhorn_stabilized(a,b,M,1)
- array([[ 0.36552929, 0.13447071],
- [ 0.13447071, 0.36552929]])
+ >>> ot.bregman.sinkhorn_stabilized(a, b, M, 1)
+ array([[0.36552929, 0.13447071],
+ [0.13447071, 0.36552929]])
References
@@ -656,14 +658,14 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
# test if multiple target
if len(b.shape) > 1:
- nbb = b.shape[1]
+ n_hists = b.shape[1]
a = a[:, np.newaxis]
else:
- nbb = 0
+ n_hists = 0
# init data
- na = len(a)
- nb = len(b)
+ dim_a = len(a)
+ dim_b = len(b)
cpt = 0
if log:
@@ -672,24 +674,25 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
# we assume that no distances are null except those of the diagonal of
# distances
if warmstart is None:
- alpha, beta = np.zeros(na), np.zeros(nb)
+ alpha, beta = np.zeros(dim_a), np.zeros(dim_b)
else:
alpha, beta = warmstart
- if nbb:
- u, v = np.ones((na, nbb)) / na, np.ones((nb, nbb)) / nb
+ if n_hists:
+ u = np.ones((dim_a, n_hists)) / dim_a
+ v = np.ones((dim_b, n_hists)) / dim_b
else:
- u, v = np.ones(na) / na, np.ones(nb) / nb
+ u, v = np.ones(dim_a) / dim_a, np.ones(dim_b) / dim_b
def get_K(alpha, beta):
"""log space computation"""
- return np.exp(-(M - alpha.reshape((na, 1)) -
- beta.reshape((1, nb))) / reg)
+ return np.exp(-(M - alpha.reshape((dim_a, 1))
+ - beta.reshape((1, dim_b))) / reg)
def get_Gamma(alpha, beta, u, v):
"""log space gamma computation"""
- return np.exp(-(M - alpha.reshape((na, 1)) - beta.reshape((1, nb))) /
- reg + np.log(u.reshape((na, 1))) + np.log(v.reshape((1, nb))))
+ return np.exp(-(M - alpha.reshape((dim_a, 1)) - beta.reshape((1, dim_b)))
+ / reg + np.log(u.reshape((dim_a, 1))) + np.log(v.reshape((1, dim_b))))
# print(np.min(K))
@@ -709,26 +712,29 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
# remove numerical problems and store them in K
if np.abs(u).max() > tau or np.abs(v).max() > tau:
- if nbb:
+ if n_hists:
alpha, beta = alpha + reg * \
np.max(np.log(u), 1), beta + reg * np.max(np.log(v))
else:
alpha, beta = alpha + reg * np.log(u), beta + reg * np.log(v)
- if nbb:
- u, v = np.ones((na, nbb)) / na, np.ones((nb, nbb)) / nb
+ if n_hists:
+ u, v = np.ones((dim_a, n_hists)) / dim_a, np.ones((dim_b, n_hists)) / dim_b
else:
- u, v = np.ones(na) / na, np.ones(nb) / nb
+ u, v = np.ones(dim_a) / dim_a, np.ones(dim_b) / dim_b
K = get_K(alpha, beta)
if cpt % print_period == 0:
# we can speed up the process by checking for the error only all
# the 10th iterations
- if nbb:
- err = np.sum((u - uprev)**2) / np.sum((u)**2) + \
- np.sum((v - vprev)**2) / np.sum((v)**2)
+ if n_hists:
+ err_u = abs(u - uprev).max()
+ err_u /= max(abs(u).max(), abs(uprev).max(), 1.)
+ err_v = abs(v - vprev).max()
+ err_v /= max(abs(v).max(), abs(vprev).max(), 1.)
+ err = 0.5 * (err_u + err_v)
else:
transp = get_Gamma(alpha, beta, u, v)
- err = np.linalg.norm((np.sum(transp, axis=0) - b))**2
+ err = np.linalg.norm((np.sum(transp, axis=0) - b))
if log:
log['err'].append(err)
@@ -754,34 +760,40 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
cpt = cpt + 1
- # print('err=',err,' cpt=',cpt)
if log:
- log['logu'] = alpha / reg + np.log(u)
- log['logv'] = beta / reg + np.log(v)
+ if n_hists:
+ alpha = alpha[:, None]
+ beta = beta[:, None]
+ logu = alpha / reg + np.log(u)
+ logv = beta / reg + np.log(v)
+ log['logu'] = logu
+ log['logv'] = logv
log['alpha'] = alpha + reg * np.log(u)
log['beta'] = beta + reg * np.log(v)
log['warmstart'] = (log['alpha'], log['beta'])
- if nbb:
- res = np.zeros((nbb))
- for i in range(nbb):
+ if n_hists:
+ res = np.zeros((n_hists))
+ for i in range(n_hists):
res[i] = np.sum(get_Gamma(alpha, beta, u[:, i], v[:, i]) * M)
return res, log
else:
return get_Gamma(alpha, beta, u, v), log
else:
- if nbb:
- res = np.zeros((nbb))
- for i in range(nbb):
+ if n_hists:
+ res = np.zeros((n_hists))
+ for i in range(n_hists):
res[i] = np.sum(get_Gamma(alpha, beta, u[:, i], v[:, i]) * M)
return res
else:
return get_Gamma(alpha, beta, u, v)
-def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInnerItermax=100,
- tau=1e3, stopThr=1e-9, warmstart=None, verbose=False, print_period=10, log=False, **kwargs):
- """
+def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4,
+ numInnerItermax=100, tau=1e3, stopThr=1e-9,
+ warmstart=None, verbose=False, print_period=10,
+ log=False, **kwargs):
+ r"""
Solve the entropic regularization optimal transport problem with log
stabilization and epsilon scaling.
@@ -797,9 +809,10 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
\gamma\geq 0
where :
- - M is the (ns,nt) metric cost matrix
+ - M is the (dim_a, dim_b) metric cost matrix
- :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - a and b are source and target weights (sum to 1)
+ - a and b are source and target weights (histograms, both sum to 1)
+
The algorithm used for solving the problem is the Sinkhorn-Knopp matrix
scaling algorithm as proposed in [2]_ but with the log stabilization
@@ -808,19 +821,17 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (dim_a,)
samples weights in the source domain
- b : np.ndarray (nt,)
+ b : ndarray, shape (dim_b,)
samples in the target domain
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (dim_a, dim_b)
loss matrix
reg : float
Regularization term >0
tau : float
thershold for max value in u or v for log scaling
- tau : float
- thershold for max value in u or v for log scaling
- warmstart : tible of vectors
+ warmstart : tuple of vectors
if given then sarting values for alpha an beta log scalings
numItermax : int, optional
Max number of iterations
@@ -835,10 +846,9 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
log : bool, optional
record log if True
-
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (dim_a, dim_b)
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
@@ -847,12 +857,12 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
--------
>>> import ot
- >>> a=[.5,.5]
- >>> b=[.5,.5]
- >>> M=[[0.,1.],[1.,0.]]
- >>> ot.bregman.sinkhorn_epsilon_scaling(a,b,M,1)
- array([[ 0.36552929, 0.13447071],
- [ 0.13447071, 0.36552929]])
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.], [1., 0.]]
+ >>> ot.bregman.sinkhorn_epsilon_scaling(a, b, M, 1)
+ array([[0.36552929, 0.13447071],
+ [0.13447071, 0.36552929]])
References
@@ -879,8 +889,8 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
# init data
- na = len(a)
- nb = len(b)
+ dim_a = len(a)
+ dim_b = len(b)
# nrelative umerical precision with 64 bits
numItermin = 35
@@ -893,14 +903,14 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
# we assume that no distances are null except those of the diagonal of
# distances
if warmstart is None:
- alpha, beta = np.zeros(na), np.zeros(nb)
+ alpha, beta = np.zeros(dim_a), np.zeros(dim_b)
else:
alpha, beta = warmstart
def get_K(alpha, beta):
"""log space computation"""
- return np.exp(-(M - alpha.reshape((na, 1)) -
- beta.reshape((1, nb))) / reg)
+ return np.exp(-(M - alpha.reshape((dim_a, 1))
+ - beta.reshape((1, dim_b))) / reg)
# print(np.min(K))
def get_reg(n): # exponential decreasing
@@ -913,8 +923,10 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4, numInne
regi = get_reg(cpt)
- G, logi = sinkhorn_stabilized(a, b, M, regi, numItermax=numInnerItermax, stopThr=1e-9, warmstart=(
- alpha, beta), verbose=False, print_period=20, tau=tau, log=True)
+ G, logi = sinkhorn_stabilized(a, b, M, regi,
+ numItermax=numInnerItermax, stopThr=1e-9,
+ warmstart=(alpha, beta), verbose=False,
+ print_period=20, tau=tau, log=True)
alpha = logi['alpha']
beta = logi['beta']
@@ -972,9 +984,9 @@ def projC(gamma, q):
return np.multiply(gamma, q / np.maximum(np.sum(gamma, axis=0), 1e-10))
-def barycenter(A, M, reg, weights=None, numItermax=1000,
- stopThr=1e-4, verbose=False, log=False):
- """Compute the entropic regularized wasserstein barycenter of distributions A
+def barycenter(A, M, reg, weights=None, method="sinkhorn", numItermax=10000,
+ stopThr=1e-4, verbose=False, log=False, **kwargs):
+ r"""Compute the entropic regularized wasserstein barycenter of distributions A
The function solves the following optimization problem:
@@ -991,13 +1003,15 @@ def barycenter(A, M, reg, weights=None, numItermax=1000,
Parameters
----------
- A : np.ndarray (d,n)
- n training distributions a_i of size d
- M : np.ndarray (d,d)
- loss matrix for OT
+ A : ndarray, shape (dim, n_hists)
+ n_hists training distributions a_i of size dim
+ M : ndarray, shape (dim, dim)
+ loss matrix for OT
reg : float
- Regularization term >0
- weights : np.ndarray (n,)
+ Regularization term > 0
+ method : str (optional)
+ method used for the solver either 'sinkhorn' or 'sinkhorn_stabilized'
+ weights : ndarray, shape (n_hists,)
Weights of each histogram a_i on the simplex (barycentric coodinates)
numItermax : int, optional
Max number of iterations
@@ -1011,7 +1025,7 @@ def barycenter(A, M, reg, weights=None, numItermax=1000,
Returns
-------
- a : (d,) ndarray
+ a : (dim,) ndarray
Wasserstein barycenter
log : dict
log dictionary return only if log==True in parameters
@@ -1022,7 +1036,71 @@ def barycenter(A, M, reg, weights=None, numItermax=1000,
.. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G. (2015). Iterative Bregman projections for regularized transportation problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
+ """
+ if method.lower() == 'sinkhorn':
+ return barycenter_sinkhorn(A, M, reg, weights=weights,
+ numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose, log=log,
+ **kwargs)
+ elif method.lower() == 'sinkhorn_stabilized':
+ return barycenter_stabilized(A, M, reg, weights=weights,
+ numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose,
+ log=log, **kwargs)
+ else:
+ raise ValueError("Unknown method '%s'." % method)
+
+
+def barycenter_sinkhorn(A, M, reg, weights=None, numItermax=1000,
+ stopThr=1e-4, verbose=False, log=False):
+ r"""Compute the entropic regularized wasserstein barycenter of distributions A
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \mathbf{a} = arg\min_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
+
+ where :
+
+ - :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance (see ot.bregman.sinkhorn)
+ - :math:`\mathbf{a}_i` are training distributions in the columns of matrix :math:`\mathbf{A}`
+ - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT
+
+ The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [3]_
+
+ Parameters
+ ----------
+ A : ndarray, shape (dim, n_hists)
+ n_hists training distributions a_i of size dim
+ M : ndarray, shape (dim, dim)
+ loss matrix for OT
+ reg : float
+ Regularization term > 0
+ weights : ndarray, shape (n_hists,)
+ Weights of each histogram a_i on the simplex (barycentric coodinates)
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ a : (dim,) ndarray
+ Wasserstein barycenter
+ log : dict
+ log dictionary return only if log==True in parameters
+
+
+ References
+ ----------
+
+ .. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G. (2015). Iterative Bregman projections for regularized transportation problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
"""
@@ -1068,8 +1146,139 @@ def barycenter(A, M, reg, weights=None, numItermax=1000,
return geometricBar(weights, UKv)
-def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1e-9, stabThr=1e-30, verbose=False, log=False):
- """Compute the entropic regularized wasserstein barycenter of distributions A
+def barycenter_stabilized(A, M, reg, tau=1e10, weights=None, numItermax=1000,
+ stopThr=1e-4, verbose=False, log=False):
+ r"""Compute the entropic regularized wasserstein barycenter of distributions A
+ with stabilization.
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \mathbf{a} = arg\min_\mathbf{a} \sum_i W_{reg}(\mathbf{a},\mathbf{a}_i)
+
+ where :
+
+ - :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance (see ot.bregman.sinkhorn)
+ - :math:`\mathbf{a}_i` are training distributions in the columns of matrix :math:`\mathbf{A}`
+ - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT
+
+ The algorithm used for solving the problem is the Sinkhorn-Knopp matrix scaling algorithm as proposed in [3]_
+
+ Parameters
+ ----------
+ A : ndarray, shape (dim, n_hists)
+ n_hists training distributions a_i of size dim
+ M : ndarray, shape (dim, dim)
+ loss matrix for OT
+ reg : float
+ Regularization term > 0
+ tau : float
+ thershold for max value in u or v for log scaling
+ weights : ndarray, shape (n_hists,)
+ Weights of each histogram a_i on the simplex (barycentric coodinates)
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ a : (dim,) ndarray
+ Wasserstein barycenter
+ log : dict
+ log dictionary return only if log==True in parameters
+
+
+ References
+ ----------
+
+ .. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G. (2015). Iterative Bregman projections for regularized transportation problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
+
+ """
+
+ dim, n_hists = A.shape
+ if weights is None:
+ weights = np.ones(n_hists) / n_hists
+ else:
+ assert(len(weights) == A.shape[1])
+
+ if log:
+ log = {'err': []}
+
+ u = np.ones((dim, n_hists)) / dim
+ v = np.ones((dim, n_hists)) / dim
+
+ # print(reg)
+ # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
+ K = np.empty(M.shape, dtype=M.dtype)
+ np.divide(M, -reg, out=K)
+ np.exp(K, out=K)
+
+ cpt = 0
+ err = 1.
+ alpha = np.zeros(dim)
+ beta = np.zeros(dim)
+ q = np.ones(dim) / dim
+ while (err > stopThr and cpt < numItermax):
+ qprev = q
+ Kv = K.dot(v)
+ u = A / (Kv + 1e-16)
+ Ktu = K.T.dot(u)
+ q = geometricBar(weights, Ktu)
+ Q = q[:, None]
+ v = Q / (Ktu + 1e-16)
+ absorbing = False
+ if (u > tau).any() or (v > tau).any():
+ absorbing = True
+ alpha = alpha + reg * np.log(np.max(u, 1))
+ beta = beta + reg * np.log(np.max(v, 1))
+ K = np.exp((alpha[:, None] + beta[None, :] -
+ M) / reg)
+ v = np.ones_like(v)
+ Kv = K.dot(v)
+ if (np.any(Ktu == 0.)
+ or np.any(np.isnan(u)) or np.any(np.isnan(v))
+ or np.any(np.isinf(u)) or np.any(np.isinf(v))):
+ # we have reached the machine precision
+ # come back to previous solution and quit loop
+ warnings.warn('Numerical errors at iteration %s' % cpt)
+ q = qprev
+ break
+ if (cpt % 10 == 0 and not absorbing) or cpt == 0:
+ # we can speed up the process by checking for the error only all
+ # the 10th iterations
+ err = abs(u * Kv - A).max()
+ if log:
+ log['err'].append(err)
+ if verbose:
+ if cpt % 50 == 0:
+ print(
+ '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
+
+ cpt += 1
+ if err > stopThr:
+ warnings.warn("Stabilized Unbalanced Sinkhorn did not converge." +
+ "Try a larger entropy `reg`" +
+ "Or a larger absorption threshold `tau`.")
+ if log:
+ log['niter'] = cpt
+ log['logu'] = np.log(u + 1e-16)
+ log['logv'] = np.log(v + 1e-16)
+ return q, log
+ else:
+ return q
+
+
+def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000,
+ stopThr=1e-9, stabThr=1e-30, verbose=False,
+ log=False):
+ r"""Compute the entropic regularized wasserstein barycenter of distributions A
where A is a collection of 2D images.
The function solves the following optimization problem:
@@ -1087,16 +1296,16 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1
Parameters
----------
- A : np.ndarray (n,w,h)
- n distributions (2D images) of size w x h
+ A : ndarray, shape (n_hists, width, height)
+ n distributions (2D images) of size width x height
reg : float
Regularization term >0
- weights : np.ndarray (n,)
+ weights : ndarray, shape (n_hists,)
Weights of each image on the simplex (barycentric coodinates)
numItermax : int, optional
Max number of iterations
stopThr : float, optional
- Stop threshol on error (>0)
+ Stop threshol on error (> 0)
stabThr : float, optional
Stabilization threshold to avoid numerical precision issue
verbose : bool, optional
@@ -1104,15 +1313,13 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1
log : bool, optional
record log if True
-
Returns
-------
- a : (w,h) ndarray
+ a : ndarray, shape (width, height)
2D Wasserstein barycenter
log : dict
log dictionary return only if log==True in parameters
-
References
----------
@@ -1180,7 +1387,7 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000, stopThr=1
def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
stopThr=1e-3, verbose=False, log=False):
- """
+ r"""
Compute the unmixing of an observation with a given dictionary using Wasserstein distance
The function solve the following optimization problem:
@@ -1192,9 +1399,12 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
where :
- :math:`W_{M,reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance with M loss matrix (see ot.bregman.sinkhorn)
- - :math:`\mathbf{a}` is an observed distribution, :math:`\mathbf{h}_0` is aprior on unmixing
- - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix for OT data fitting
- - reg0 and :math:`\mathbf{M0}` are respectively the regularization term and the cost matrix for regularization
+ - :math: `\mathbf{D}` is a dictionary of `n_atoms` atoms of dimension `dim_a`, its expected shape is `(dim_a, n_atoms)`
+ - :math:`\mathbf{h}` is the estimated unmixing of dimension `n_atoms`
+ - :math:`\mathbf{a}` is an observed distribution of dimension `dim_a`
+ - :math:`\mathbf{h}_0` is a prior on `h` of dimension `dim_prior`
+ - reg and :math:`\mathbf{M}` are respectively the regularization term and the cost matrix (dim_a, dim_a) for OT data fitting
+ - reg0 and :math:`\mathbf{M0}` are respectively the regularization term and the cost matrix (dim_prior, n_atoms) regularization
- :math:`\\alpha`weight data fitting and regularization
The optimization problem is solved suing the algorithm described in [4]
@@ -1202,16 +1412,16 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
Parameters
----------
- a : np.ndarray (d)
- observed distribution
- D : np.ndarray (d,n)
+ a : ndarray, shape (dim_a)
+ observed distribution (histogram, sums to 1)
+ D : ndarray, shape (dim_a, n_atoms)
dictionary matrix
- M : np.ndarray (d,d)
+ M : ndarray, shape (dim_a, dim_a)
loss matrix
- M0 : np.ndarray (n,n)
+ M0 : ndarray, shape (n_atoms, dim_prior)
loss matrix
- h0 : np.ndarray (n,)
- prior on h
+ h0 : ndarray, shape (n_atoms,)
+ prior on the estimated unmixing h
reg : float
Regularization term >0 (Wasserstein data fitting)
reg0 : float
@@ -1230,7 +1440,7 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
Returns
-------
- a : (d,) ndarray
+ h : ndarray, shape (n_atoms,)
Wasserstein barycenter
log : dict
log dictionary return only if log==True in parameters
@@ -1284,3 +1494,635 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
return np.sum(K0, axis=1), log
else:
return np.sum(K0, axis=1)
+
+
+def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
+ numIterMax=10000, stopThr=1e-9, verbose=False,
+ log=False, **kwargs):
+ r'''
+ Solve the entropic regularization optimal transport problem and return the
+ OT matrix from empirical data
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+ s.t. \gamma 1 = a
+
+ \gamma^T 1= b
+
+ \gamma\geq 0
+ where :
+
+ - :math:`M` is the (n_samples_a, n_samples_b) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`a` and :math:`b` are source and target weights (sum to 1)
+
+
+ Parameters
+ ----------
+ X_s : ndarray, shape (n_samples_a, dim)
+ samples in the source domain
+ X_t : ndarray, shape (n_samples_b, dim)
+ samples in the target domain
+ reg : float
+ Regularization term >0
+ a : ndarray, shape (n_samples_a,)
+ samples weights in the source domain
+ b : ndarray, shape (n_samples_b,)
+ samples weights in the target domain
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ gamma : ndarray, shape (n_samples_a, n_samples_b)
+ Regularized optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary return only if log==True in parameters
+
+ Examples
+ --------
+
+ >>> n_samples_a = 2
+ >>> n_samples_b = 2
+ >>> reg = 0.1
+ >>> X_s = np.reshape(np.arange(n_samples_a), (n_samples_a, 1))
+ >>> X_t = np.reshape(np.arange(0, n_samples_b), (n_samples_b, 1))
+ >>> empirical_sinkhorn(X_s, X_t, reg, verbose=False) # doctest: +NORMALIZE_WHITESPACE
+ array([[4.99977301e-01, 2.26989344e-05],
+ [2.26989344e-05, 4.99977301e-01]])
+
+
+ References
+ ----------
+
+ .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+
+ .. [9] Schmitzer, B. (2016). Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. arXiv preprint arXiv:1610.06519.
+
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+ '''
+
+ if a is None:
+ a = unif(np.shape(X_s)[0])
+ if b is None:
+ b = unif(np.shape(X_t)[0])
+
+ M = dist(X_s, X_t, metric=metric)
+
+ if log:
+ pi, log = sinkhorn(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=True, **kwargs)
+ return pi, log
+ else:
+ pi = sinkhorn(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=False, **kwargs)
+ return pi
+
+
+def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+ r'''
+ Solve the entropic regularization optimal transport problem from empirical
+ data and return the OT loss
+
+
+ The function solves the following optimization problem:
+
+ .. math::
+ W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+ s.t. \gamma 1 = a
+
+ \gamma^T 1= b
+
+ \gamma\geq 0
+ where :
+
+ - :math:`M` is the (n_samples_a, n_samples_b) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`a` and :math:`b` are source and target weights (sum to 1)
+
+
+ Parameters
+ ----------
+ X_s : ndarray, shape (n_samples_a, dim)
+ samples in the source domain
+ X_t : ndarray, shape (n_samples_b, dim)
+ samples in the target domain
+ reg : float
+ Regularization term >0
+ a : ndarray, shape (n_samples_a,)
+ samples weights in the source domain
+ b : ndarray, shape (n_samples_b,)
+ samples weights in the target domain
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ gamma : ndarray, shape (n_samples_a, n_samples_b)
+ Regularized optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary return only if log==True in parameters
+
+ Examples
+ --------
+
+ >>> n_samples_a = 2
+ >>> n_samples_b = 2
+ >>> reg = 0.1
+ >>> X_s = np.reshape(np.arange(n_samples_a), (n_samples_a, 1))
+ >>> X_t = np.reshape(np.arange(0, n_samples_b), (n_samples_b, 1))
+ >>> empirical_sinkhorn2(X_s, X_t, reg, verbose=False)
+ array([4.53978687e-05])
+
+
+ References
+ ----------
+
+ .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal Transport, Advances in Neural Information Processing Systems (NIPS) 26, 2013
+
+ .. [9] Schmitzer, B. (2016). Stabilized Sparse Scaling Algorithms for Entropy Regularized Transport Problems. arXiv preprint arXiv:1610.06519.
+
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016). Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+ '''
+
+ if a is None:
+ a = unif(np.shape(X_s)[0])
+ if b is None:
+ b = unif(np.shape(X_t)[0])
+
+ M = dist(X_s, X_t, metric=metric)
+
+ if log:
+ sinkhorn_loss, log = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ return sinkhorn_loss, log
+ else:
+ sinkhorn_loss = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ return sinkhorn_loss
+
+
+def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+ r'''
+ Compute the sinkhorn divergence loss from empirical data
+
+ The function solves the following optimization problems and return the
+ sinkhorn divergence :math:`S`:
+
+ .. math::
+
+ W &= \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+ W_a &= \min_{\gamma_a} <\gamma_a,M_a>_F + reg\cdot\Omega(\gamma_a)
+
+ W_b &= \min_{\gamma_b} <\gamma_b,M_b>_F + reg\cdot\Omega(\gamma_b)
+
+ S &= W - 1/2 * (W_a + W_b)
+
+ .. math::
+ s.t. \gamma 1 = a
+
+ \gamma^T 1= b
+
+ \gamma\geq 0
+
+ \gamma_a 1 = a
+
+ \gamma_a^T 1= a
+
+ \gamma_a\geq 0
+
+ \gamma_b 1 = b
+
+ \gamma_b^T 1= b
+
+ \gamma_b\geq 0
+ where :
+
+ - :math:`M` (resp. :math:`M_a, M_b`) is the (n_samples_a, n_samples_b) metric cost matrix (resp (n_samples_a, n_samples_a) and (n_samples_b, n_samples_b))
+ - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`a` and :math:`b` are source and target weights (sum to 1)
+
+
+ Parameters
+ ----------
+ X_s : ndarray, shape (n_samples_a, dim)
+ samples in the source domain
+ X_t : ndarray, shape (n_samples_b, dim)
+ samples in the target domain
+ reg : float
+ Regularization term >0
+ a : ndarray, shape (n_samples_a,)
+ samples weights in the source domain
+ b : ndarray, shape (n_samples_b,)
+ samples weights in the target domain
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+ Returns
+ -------
+ gamma : ndarray, shape (n_samples_a, n_samples_b)
+ Regularized optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary return only if log==True in parameters
+
+ Examples
+ --------
+ >>> n_samples_a = 2
+ >>> n_samples_b = 4
+ >>> reg = 0.1
+ >>> X_s = np.reshape(np.arange(n_samples_a), (n_samples_a, 1))
+ >>> X_t = np.reshape(np.arange(0, n_samples_b), (n_samples_b, 1))
+ >>> empirical_sinkhorn_divergence(X_s, X_t, reg) # doctest: +ELLIPSIS
+ array([1.499...])
+
+
+ References
+ ----------
+ .. [23] Aude Genevay, Gabriel Peyré, Marco Cuturi, Learning Generative Models with Sinkhorn Divergences, Proceedings of the Twenty-First International Conference on Artficial Intelligence and Statistics, (AISTATS) 21, 2018
+ '''
+ if log:
+ sinkhorn_loss_ab, log_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+ sinkhorn_loss_a, log_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+ sinkhorn_loss_b, log_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+ sinkhorn_div = sinkhorn_loss_ab - 1 / 2 * (sinkhorn_loss_a + sinkhorn_loss_b)
+
+ log = {}
+ log['sinkhorn_loss_ab'] = sinkhorn_loss_ab
+ log['sinkhorn_loss_a'] = sinkhorn_loss_a
+ log['sinkhorn_loss_b'] = sinkhorn_loss_b
+ log['log_sinkhorn_ab'] = log_ab
+ log['log_sinkhorn_a'] = log_a
+ log['log_sinkhorn_b'] = log_b
+
+ return max(0, sinkhorn_div), log
+
+ else:
+ sinkhorn_loss_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+ sinkhorn_loss_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+ sinkhorn_loss_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+
+ sinkhorn_div = sinkhorn_loss_ab - 1 / 2 * (sinkhorn_loss_a + sinkhorn_loss_b)
+ return max(0, sinkhorn_div)
+
+
+def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, restricted=True,
+ maxiter=10000, maxfun=10000, pgtol=1e-09, verbose=False, log=False):
+ r""""
+ Screening Sinkhorn Algorithm for Regularized Optimal Transport
+
+ The function solves an approximate dual of Sinkhorn divergence [2] which is written as the following optimization problem:
+
+ ..math::
+ (u, v) = \argmin_{u, v} 1_{ns}^T B(u,v) 1_{nt} - <\kappa u, a> - <v/\kappa, b>
+
+ where B(u,v) = \diag(e^u) K \diag(e^v), with K = e^{-M/reg} and
+
+ s.t. e^{u_i} \geq \epsilon / \kappa, for all i \in {1, ..., ns}
+
+ e^{v_j} \geq \epsilon \kappa, for all j \in {1, ..., nt}
+
+ The parameters \kappa and \epsilon are determined w.r.t the couple number budget of points (ns_budget, nt_budget), see Equation (5) in [26]
+
+
+ Parameters
+ ----------
+ a : `numpy.ndarray`, shape=(ns,)
+ samples weights in the source domain
+
+ b : `numpy.ndarray`, shape=(nt,)
+ samples weights in the target domain
+
+ M : `numpy.ndarray`, shape=(ns, nt)
+ Cost matrix
+
+ reg : `float`
+ Level of the entropy regularisation
+
+ ns_budget : `int`, deafult=None
+ Number budget of points to be keeped in the source domain
+ If it is None then 50% of the source sample points will be keeped
+
+ nt_budget : `int`, deafult=None
+ Number budget of points to be keeped in the target domain
+ If it is None then 50% of the target sample points will be keeped
+
+ uniform : `bool`, default=False
+ If `True`, the source and target distribution are supposed to be uniform, i.e., a_i = 1 / ns and b_j = 1 / nt
+
+ restricted : `bool`, default=True
+ If `True`, a warm-start initialization for the L-BFGS-B solver
+ using a restricted Sinkhorn algorithm with at most 5 iterations
+
+ maxiter : `int`, default=10000
+ Maximum number of iterations in LBFGS solver
+
+ maxfun : `int`, default=10000
+ Maximum number of function evaluations in LBFGS solver
+
+ pgtol : `float`, default=1e-09
+ Final objective function accuracy in LBFGS solver
+
+ verbose : `bool`, default=False
+ If `True`, dispaly informations about the cardinals of the active sets and the paramerters kappa
+ and epsilon
+
+ Dependency
+ ----------
+ To gain more efficiency, screenkhorn needs to call the "Bottleneck" package (https://pypi.org/project/Bottleneck/)
+ in the screening pre-processing step. If Bottleneck isn't installed, the following error message appears:
+ "Bottleneck module doesn't exist. Install it from https://pypi.org/project/Bottleneck/"
+
+
+ Returns
+ -------
+ gamma : `numpy.ndarray`, shape=(ns, nt)
+ Screened optimal transportation matrix for the given parameters
+
+ log : `dict`, default=False
+ Log dictionary return only if log==True in parameters
+
+
+ References
+ -----------
+ .. [26] Alaya M. Z., Bérar M., Gasso G., Rakotomamonjy A. (2019). Screening Sinkhorn Algorithm for Regularized Optimal Transport (NIPS) 33, 2019
+
+ """
+ # check if bottleneck module exists
+ try:
+ import bottleneck
+ except ImportError:
+ warnings.warn("Bottleneck module is not installed. Install it from https://pypi.org/project/Bottleneck/ for better performance.")
+ bottleneck = np
+
+ a = np.asarray(a, dtype=np.float64)
+ b = np.asarray(b, dtype=np.float64)
+ M = np.asarray(M, dtype=np.float64)
+ ns, nt = M.shape
+
+ # by default, we keep only 50% of the sample data points
+ if ns_budget is None:
+ ns_budget = int(np.floor(0.5 * ns))
+ if nt_budget is None:
+ nt_budget = int(np.floor(0.5 * nt))
+
+ # calculate the Gibbs kernel
+ K = np.empty_like(M)
+ np.divide(M, -reg, out=K)
+ np.exp(K, out=K)
+
+ def projection(u, epsilon):
+ u[u <= epsilon] = epsilon
+ return u
+
+ # ----------------------------------------------------------------------------------------------------------------#
+ # Step 1: Screening pre-processing #
+ # ----------------------------------------------------------------------------------------------------------------#
+
+ if ns_budget == ns and nt_budget == nt:
+ # full number of budget points (ns, nt) = (ns_budget, nt_budget)
+ Isel = np.ones(ns, dtype=bool)
+ Jsel = np.ones(nt, dtype=bool)
+ epsilon = 0.0
+ kappa = 1.0
+
+ cst_u = 0.
+ cst_v = 0.
+
+ bounds_u = [(0.0, np.inf)] * ns
+ bounds_v = [(0.0, np.inf)] * nt
+
+ a_I = a
+ b_J = b
+ K_IJ = K
+ K_IJc = []
+ K_IcJ = []
+
+ vec_eps_IJc = np.zeros(nt)
+ vec_eps_IcJ = np.zeros(ns)
+
+ else:
+ # sum of rows and columns of K
+ K_sum_cols = K.sum(axis=1)
+ K_sum_rows = K.sum(axis=0)
+
+ if uniform:
+ if ns / ns_budget < 4:
+ aK_sort = np.sort(K_sum_cols)
+ epsilon_u_square = a[0] / aK_sort[ns_budget - 1]
+ else:
+ aK_sort = bottleneck.partition(K_sum_cols, ns_budget - 1)[ns_budget - 1]
+ epsilon_u_square = a[0] / aK_sort
+
+ if nt / nt_budget < 4:
+ bK_sort = np.sort(K_sum_rows)
+ epsilon_v_square = b[0] / bK_sort[nt_budget - 1]
+ else:
+ bK_sort = bottleneck.partition(K_sum_rows, nt_budget - 1)[nt_budget - 1]
+ epsilon_v_square = b[0] / bK_sort
+ else:
+ aK = a / K_sum_cols
+ bK = b / K_sum_rows
+
+ aK_sort = np.sort(aK)[::-1]
+ epsilon_u_square = aK_sort[ns_budget - 1]
+
+ bK_sort = np.sort(bK)[::-1]
+ epsilon_v_square = bK_sort[nt_budget - 1]
+
+ # active sets I and J (see Lemma 1 in [26])
+ Isel = a >= epsilon_u_square * K_sum_cols
+ Jsel = b >= epsilon_v_square * K_sum_rows
+
+ if sum(Isel) != ns_budget:
+ if uniform:
+ aK = a / K_sum_cols
+ aK_sort = np.sort(aK)[::-1]
+ epsilon_u_square = aK_sort[ns_budget - 1:ns_budget + 1].mean()
+ Isel = a >= epsilon_u_square * K_sum_cols
+ ns_budget = sum(Isel)
+
+ if sum(Jsel) != nt_budget:
+ if uniform:
+ bK = b / K_sum_rows
+ bK_sort = np.sort(bK)[::-1]
+ epsilon_v_square = bK_sort[nt_budget - 1:nt_budget + 1].mean()
+ Jsel = b >= epsilon_v_square * K_sum_rows
+ nt_budget = sum(Jsel)
+
+ epsilon = (epsilon_u_square * epsilon_v_square) ** (1 / 4)
+ kappa = (epsilon_v_square / epsilon_u_square) ** (1 / 2)
+
+ if verbose:
+ print("epsilon = %s\n" % epsilon)
+ print("kappa = %s\n" % kappa)
+ print('Cardinality of selected points: |Isel| = %s \t |Jsel| = %s \n' % (sum(Isel), sum(Jsel)))
+
+ # Ic, Jc: complementary of the active sets I and J
+ Ic = ~Isel
+ Jc = ~Jsel
+
+ K_IJ = K[np.ix_(Isel, Jsel)]
+ K_IcJ = K[np.ix_(Ic, Jsel)]
+ K_IJc = K[np.ix_(Isel, Jc)]
+
+ K_min = K_IJ.min()
+ if K_min == 0:
+ K_min = np.finfo(float).tiny
+
+ # a_I, b_J, a_Ic, b_Jc
+ a_I = a[Isel]
+ b_J = b[Jsel]
+ if not uniform:
+ a_I_min = a_I.min()
+ a_I_max = a_I.max()
+ b_J_max = b_J.max()
+ b_J_min = b_J.min()
+ else:
+ a_I_min = a_I[0]
+ a_I_max = a_I[0]
+ b_J_max = b_J[0]
+ b_J_min = b_J[0]
+
+ # box constraints in L-BFGS-B (see Proposition 1 in [26])
+ bounds_u = [(max(a_I_min / ((nt - nt_budget) * epsilon + nt_budget * (b_J_max / (
+ ns * epsilon * kappa * K_min))), epsilon / kappa), a_I_max / (nt * epsilon * K_min))] * ns_budget
+
+ bounds_v = [(max(b_J_min / ((ns - ns_budget) * epsilon + ns_budget * (kappa * a_I_max / (nt * epsilon * K_min))),
+ epsilon * kappa), b_J_max / (ns * epsilon * K_min))] * nt_budget
+
+ # pre-calculated constants for the objective
+ vec_eps_IJc = epsilon * kappa * (K_IJc * np.ones(nt - nt_budget).reshape((1, -1))).sum(axis=1)
+ vec_eps_IcJ = (epsilon / kappa) * (np.ones(ns - ns_budget).reshape((-1, 1)) * K_IcJ).sum(axis=0)
+
+ # initialisation
+ u0 = np.full(ns_budget, (1. / ns_budget) + epsilon / kappa)
+ v0 = np.full(nt_budget, (1. / nt_budget) + epsilon * kappa)
+
+ # pre-calculed constants for Restricted Sinkhorn (see Algorithm 1 in supplementary of [26])
+ if restricted:
+ if ns_budget != ns or nt_budget != nt:
+ cst_u = kappa * epsilon * K_IJc.sum(axis=1)
+ cst_v = epsilon * K_IcJ.sum(axis=0) / kappa
+
+ cpt = 1
+ while cpt < 5: # 5 iterations
+ K_IJ_v = np.dot(K_IJ.T, u0) + cst_v
+ v0 = b_J / (kappa * K_IJ_v)
+ KIJ_u = np.dot(K_IJ, v0) + cst_u
+ u0 = (kappa * a_I) / KIJ_u
+ cpt += 1
+
+ u0 = projection(u0, epsilon / kappa)
+ v0 = projection(v0, epsilon * kappa)
+
+ else:
+ u0 = u0
+ v0 = v0
+
+ def restricted_sinkhorn(usc, vsc, max_iter=5):
+ """
+ Restricted Sinkhorn Algorithm as a warm-start initialized point for L-BFGS-B (see Algorithm 1 in supplementary of [26])
+ """
+ cpt = 1
+ while cpt < max_iter:
+ K_IJ_v = np.dot(K_IJ.T, usc) + cst_v
+ vsc = b_J / (kappa * K_IJ_v)
+ KIJ_u = np.dot(K_IJ, vsc) + cst_u
+ usc = (kappa * a_I) / KIJ_u
+ cpt += 1
+
+ usc = projection(usc, epsilon / kappa)
+ vsc = projection(vsc, epsilon * kappa)
+
+ return usc, vsc
+
+ def screened_obj(usc, vsc):
+ part_IJ = np.dot(np.dot(usc, K_IJ), vsc) - kappa * np.dot(a_I, np.log(usc)) - (1. / kappa) * np.dot(b_J, np.log(vsc))
+ part_IJc = np.dot(usc, vec_eps_IJc)
+ part_IcJ = np.dot(vec_eps_IcJ, vsc)
+ psi_epsilon = part_IJ + part_IJc + part_IcJ
+ return psi_epsilon
+
+ def screened_grad(usc, vsc):
+ # gradients of Psi_(kappa,epsilon) w.r.t u and v
+ grad_u = np.dot(K_IJ, vsc) + vec_eps_IJc - kappa * a_I / usc
+ grad_v = np.dot(K_IJ.T, usc) + vec_eps_IcJ - (1. / kappa) * b_J / vsc
+ return grad_u, grad_v
+
+ def bfgspost(theta):
+ u = theta[:ns_budget]
+ v = theta[ns_budget:]
+ # objective
+ f = screened_obj(u, v)
+ # gradient
+ g_u, g_v = screened_grad(u, v)
+ g = np.hstack([g_u, g_v])
+ return f, g
+
+ #----------------------------------------------------------------------------------------------------------------#
+ # Step 2: L-BFGS-B solver #
+ #----------------------------------------------------------------------------------------------------------------#
+
+ u0, v0 = restricted_sinkhorn(u0, v0)
+ theta0 = np.hstack([u0, v0])
+
+ bounds = bounds_u + bounds_v # constraint bounds
+
+ def obj(theta):
+ return bfgspost(theta)
+
+ theta, _, _ = fmin_l_bfgs_b(func=obj,
+ x0=theta0,
+ bounds=bounds,
+ maxfun=maxfun,
+ pgtol=pgtol,
+ maxiter=maxiter)
+
+ usc = theta[:ns_budget]
+ vsc = theta[ns_budget:]
+
+ usc_full = np.full(ns, epsilon / kappa)
+ vsc_full = np.full(nt, epsilon * kappa)
+ usc_full[Isel] = usc
+ vsc_full[Jsel] = vsc
+
+ if log:
+ log = {}
+ log['u'] = usc_full
+ log['v'] = vsc_full
+ log['Isel'] = Isel
+ log['Jsel'] = Jsel
+
+ gamma = usc_full[:, None] * K * vsc_full[None, :]
+ gamma = gamma / gamma.sum()
+
+ if log:
+ return gamma, log
+ else:
+ return gamma
diff --git a/ot/da.py b/ot/da.py
index bc09e3c..108a38d 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -6,6 +6,7 @@ Domain adaptation with optimal transport
# Author: Remi Flamary <remi.flamary@unice.fr>
# Nicolas Courty <ncourty@irisa.fr>
# Michael Perrot <michael.perrot@univ-st-etienne.fr>
+# Nathalie Gayraud <nat.gayraud@gmail.com>
#
# License: MIT License
@@ -16,6 +17,7 @@ from .bregman import sinkhorn
from .lp import emd
from .utils import unif, dist, kernel, cost_normalization
from .utils import check_params, BaseEstimator
+from .unbalanced import sinkhorn_unbalanced
from .optim import cg
from .optim import gcg
@@ -41,15 +43,15 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
where :
- M is the (ns,nt) metric cost matrix
- - :math:`\Omega_e` is the entropic regularization term
- :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - :math:`\Omega_g` is the group lasso regulaization term
+ - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e
+ (\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\Omega_g` is the group lasso regularization term
:math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1`
where :math:`\mathcal{I}_c` are the index of samples from class c
in the source domain.
- a and b are source and target weights (sum to 1)
- The algorithm used for solving the problem is the generalised conditional
+ The algorithm used for solving the problem is the generalized conditional
gradient as proposed in [5]_ [7]_
@@ -473,22 +475,24 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
Weight for the linear OT loss (>0)
eta : float, optional
Regularization term for the linear mapping L (>0)
- bias : bool,optional
- Estimate linear mapping with constant bias
kerneltype : str,optional
kernel used by calling function ot.utils.kernel (gaussian by default)
sigma : float, optional
Gaussian kernel bandwidth.
+ bias : bool,optional
+ Estimate linear mapping with constant bias
+ verbose : bool, optional
+ Print information along iterations
+ verbose2 : bool, optional
+ Print information along iterations
numItermax : int, optional
Max number of BCD iterations
- stopThr : float, optional
- Stop threshold on relative loss decrease (>0)
numInnerItermax : int, optional
Max number of iterations (inner CG solver)
stopInnerThr : float, optional
Stop threshold on error (inner CG solver) (>0)
- verbose : bool, optional
- Print information along iterations
+ stopThr : float, optional
+ Stop threshold on relative loss decrease (>0)
log : bool, optional
record log if True
@@ -643,7 +647,8 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None,
The function estimates the optimal linear operator that aligns the two
empirical distributions. This is equivalent to estimating the closed
form mapping between two Gaussian distributions :math:`N(\mu_s,\Sigma_s)`
- and :math:`N(\mu_t,\Sigma_t)` as proposed in [14] and discussed in remark 2.29 in [15].
+ and :math:`N(\mu_t,\Sigma_t)` as proposed in [14] and discussed in remark
+ 2.29 in [15].
The linear operator from source to target :math:`M`
@@ -1184,25 +1189,25 @@ class SinkhornTransport(BaseTransport):
algorithm if no it has not converged
tol : float, optional (default=10e-9)
The precision required to stop the optimization algorithm.
- mapping : string, optional (default="barycentric")
- The kind of mapping to apply to transport samples from a domain into
- another one.
- if "barycentric" only the samples used to estimate the coupling can
- be transported from a domain to another one.
+ verbose : bool, optional (default=False)
+ Controls the verbosity of the optimization algorithm
+ log : int, optional (default=False)
+ Controls the logs of the optimization algorithm
metric : string, optional (default="sqeuclidean")
The ground metric for the Wasserstein problem
norm : string, optional (default=None)
If given, normalize the ground metric to avoid numerical errors that
can occur with large metric values.
- distribution : string, optional (default="uniform")
+ distribution_estimation : callable, optional (defaults to the uniform)
The kind of distribution estimation to employ
- verbose : int, optional (default=0)
- Controls the verbosity of the optimization algorithm
- log : int, optional (default=0)
- Controls the logs of the optimization algorithm
+ out_of_sample_map : string, optional (default="ferradans")
+ The kind of out of sample mapping to apply to transport samples
+ from a domain into another one. Currently the only possible option is
+ "ferradans" which uses the method proposed in [6].
limit_max: float, optional (defaul=np.infty)
Controls the semi supervised mode. Transport between labeled source
- and target samples of different classes will exhibit an infinite cost
+ and target samples of different classes will exhibit an cost defined
+ by this variable
Attributes
----------
@@ -1287,22 +1292,19 @@ class EMDTransport(BaseTransport):
Parameters
----------
- mapping : string, optional (default="barycentric")
- The kind of mapping to apply to transport samples from a domain into
- another one.
- if "barycentric" only the samples used to estimate the coupling can
- be transported from a domain to another one.
metric : string, optional (default="sqeuclidean")
The ground metric for the Wasserstein problem
norm : string, optional (default=None)
If given, normalize the ground metric to avoid numerical errors that
can occur with large metric values.
- distribution : string, optional (default="uniform")
- The kind of distribution estimation to employ
- verbose : int, optional (default=0)
- Controls the verbosity of the optimization algorithm
- log : int, optional (default=0)
+ log : int, optional (default=False)
Controls the logs of the optimization algorithm
+ distribution_estimation : callable, optional (defaults to the uniform)
+ The kind of distribution estimation to employ
+ out_of_sample_map : string, optional (default="ferradans")
+ The kind of out of sample mapping to apply to transport samples
+ from a domain into another one. Currently the only possible option is
+ "ferradans" which uses the method proposed in [6].
limit_max: float, optional (default=10)
Controls the semi supervised mode. Transport between labeled source
and target samples of different classes will exhibit an infinite cost
@@ -1387,28 +1389,32 @@ class SinkhornLpl1Transport(BaseTransport):
Entropic regularization parameter
reg_cl : float, optional (default=0.1)
Class regularization parameter
- mapping : string, optional (default="barycentric")
- The kind of mapping to apply to transport samples from a domain into
- another one.
- if "barycentric" only the samples used to estimate the coupling can
- be transported from a domain to another one.
- metric : string, optional (default="sqeuclidean")
- The ground metric for the Wasserstein problem
- norm : string, optional (default=None)
- If given, normalize the ground metric to avoid numerical errors that
- can occur with large metric values.
- distribution : string, optional (default="uniform")
- The kind of distribution estimation to employ
max_iter : int, float, optional (default=10)
The minimum number of iteration before stopping the optimization
algorithm if no it has not converged
max_inner_iter : int, float, optional (default=200)
The number of iteration in the inner loop
- verbose : int, optional (default=0)
+ log : bool, optional (default=False)
+ Controls the logs of the optimization algorithm
+ tol : float, optional (default=10e-9)
+ Stop threshold on error (inner sinkhorn solver) (>0)
+ verbose : bool, optional (default=False)
Controls the verbosity of the optimization algorithm
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ norm : string, optional (default=None)
+ If given, normalize the ground metric to avoid numerical errors that
+ can occur with large metric values.
+ distribution_estimation : callable, optional (defaults to the uniform)
+ The kind of distribution estimation to employ
+ out_of_sample_map : string, optional (default="ferradans")
+ The kind of out of sample mapping to apply to transport samples
+ from a domain into another one. Currently the only possible option is
+ "ferradans" which uses the method proposed in [6].
limit_max: float, optional (defaul=np.infty)
Controls the semi supervised mode. Transport between labeled source
- and target samples of different classes will exhibit an infinite cost
+ and target samples of different classes will exhibit a cost defined by
+ limit_max.
Attributes
----------
@@ -1504,27 +1510,28 @@ class SinkhornL1l2Transport(BaseTransport):
Entropic regularization parameter
reg_cl : float, optional (default=0.1)
Class regularization parameter
- mapping : string, optional (default="barycentric")
- The kind of mapping to apply to transport samples from a domain into
- another one.
- if "barycentric" only the samples used to estimate the coupling can
- be transported from a domain to another one.
- metric : string, optional (default="sqeuclidean")
- The ground metric for the Wasserstein problem
- norm : string, optional (default=None)
- If given, normalize the ground metric to avoid numerical errors that
- can occur with large metric values.
- distribution : string, optional (default="uniform")
- The kind of distribution estimation to employ
max_iter : int, float, optional (default=10)
The minimum number of iteration before stopping the optimization
algorithm if no it has not converged
max_inner_iter : int, float, optional (default=200)
The number of iteration in the inner loop
- verbose : int, optional (default=0)
+ tol : float, optional (default=10e-9)
+ Stop threshold on error (inner sinkhorn solver) (>0)
+ verbose : bool, optional (default=False)
Controls the verbosity of the optimization algorithm
- log : int, optional (default=0)
+ log : bool, optional (default=False)
Controls the logs of the optimization algorithm
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ norm : string, optional (default=None)
+ If given, normalize the ground metric to avoid numerical errors that
+ can occur with large metric values.
+ distribution_estimation : callable, optional (defaults to the uniform)
+ The kind of distribution estimation to employ
+ out_of_sample_map : string, optional (default="ferradans")
+ The kind of out of sample mapping to apply to transport samples
+ from a domain into another one. Currently the only possible option is
+ "ferradans" which uses the method proposed in [6].
limit_max: float, optional (default=10)
Controls the semi supervised mode. Transport between labeled source
and target samples of different classes will exhibit an infinite cost
@@ -1646,10 +1653,12 @@ class MappingTransport(BaseEstimator):
Max number of iterations (inner CG solver)
inner_tol : float, optional (default=1e-6)
Stop threshold on error (inner CG solver) (>0)
- verbose : bool, optional (default=False)
- Print information along iterations
log : bool, optional (default=False)
record log if True
+ verbose : bool, optional (default=False)
+ Print information along iterations
+ verbose2 : bool, optional (default=False)
+ Print information along iterations
Attributes
----------
@@ -1786,3 +1795,122 @@ class MappingTransport(BaseEstimator):
transp_Xs = K.dot(self.mapping_)
return transp_Xs
+
+
+class UnbalancedSinkhornTransport(BaseTransport):
+
+ """Domain Adapatation unbalanced OT method based on sinkhorn algorithm
+
+ Parameters
+ ----------
+ reg_e : float, optional (default=1)
+ Entropic regularization parameter
+ reg_m : float, optional (default=0.1)
+ Mass regularization parameter
+ method : str
+ method used for the solver either 'sinkhorn', 'sinkhorn_stabilized' or
+ 'sinkhorn_epsilon_scaling', see those function for specific parameters
+ max_iter : int, float, optional (default=10)
+ The minimum number of iteration before stopping the optimization
+ algorithm if no it has not converged
+ tol : float, optional (default=10e-9)
+ Stop threshold on error (inner sinkhorn solver) (>0)
+ verbose : bool, optional (default=False)
+ Controls the verbosity of the optimization algorithm
+ log : bool, optional (default=False)
+ Controls the logs of the optimization algorithm
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ norm : string, optional (default=None)
+ If given, normalize the ground metric to avoid numerical errors that
+ can occur with large metric values.
+ distribution_estimation : callable, optional (defaults to the uniform)
+ The kind of distribution estimation to employ
+ out_of_sample_map : string, optional (default="ferradans")
+ The kind of out of sample mapping to apply to transport samples
+ from a domain into another one. Currently the only possible option is
+ "ferradans" which uses the method proposed in [6].
+ limit_max: float, optional (default=10)
+ Controls the semi supervised mode. Transport between labeled source
+ and target samples of different classes will exhibit an infinite cost
+ (10 times the maximum value of the cost matrix)
+
+ Attributes
+ ----------
+ coupling_ : array-like, shape (n_source_samples, n_target_samples)
+ The optimal coupling
+ log_ : dictionary
+ The dictionary of log, empty dic if parameter log is not True
+
+ References
+ ----------
+
+ .. [1] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016).
+ Scaling algorithms for unbalanced transport problems. arXiv preprint
+ arXiv:1607.05816.
+
+ """
+
+ def __init__(self, reg_e=1., reg_m=0.1, method='sinkhorn',
+ max_iter=10, tol=1e-9, verbose=False, log=False,
+ metric="sqeuclidean", norm=None,
+ distribution_estimation=distribution_estimation_uniform,
+ out_of_sample_map='ferradans', limit_max=10):
+
+ self.reg_e = reg_e
+ self.reg_m = reg_m
+ self.method = method
+ self.max_iter = max_iter
+ self.tol = tol
+ self.verbose = verbose
+ self.log = log
+ self.metric = metric
+ self.norm = norm
+ self.distribution_estimation = distribution_estimation
+ self.out_of_sample_map = out_of_sample_map
+ self.limit_max = limit_max
+
+ def fit(self, Xs, ys=None, Xt=None, yt=None):
+ """Build a coupling matrix from source and target sets of samples
+ (Xs, ys) and (Xt, yt)
+
+ Parameters
+ ----------
+ Xs : array-like, shape (n_source_samples, n_features)
+ The training input samples.
+ ys : array-like, shape (n_source_samples,)
+ The class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
+
+ Returns
+ -------
+ self : object
+ Returns self.
+ """
+
+ # check the necessary inputs parameters are here
+ if check_params(Xs=Xs, Xt=Xt):
+
+ super(UnbalancedSinkhornTransport, self).fit(Xs, ys, Xt, yt)
+
+ returned_ = sinkhorn_unbalanced(
+ a=self.mu_s, b=self.mu_t, M=self.cost_,
+ reg=self.reg_e, reg_m=self.reg_m, method=self.method,
+ numItermax=self.max_iter, stopThr=self.tol,
+ verbose=self.verbose, log=self.log)
+
+ # deal with the value of log
+ if self.log:
+ self.coupling_, self.log_ = returned_
+ else:
+ self.coupling_ = returned_
+ self.log_ = dict()
+
+ return self
diff --git a/ot/datasets.py b/ot/datasets.py
index e76e75d..ba0cfd9 100644
--- a/ot/datasets.py
+++ b/ot/datasets.py
@@ -17,7 +17,6 @@ def make_1D_gauss(n, m, s):
Parameters
----------
-
n : int
number of bins in the histogram
m : float
@@ -25,12 +24,10 @@ def make_1D_gauss(n, m, s):
s : float
standard deviaton of the gaussian distribution
-
Returns
-------
- h : np.array (n,)
- 1D histogram for a gaussian distribution
-
+ h : ndarray (n,)
+ 1D histogram for a gaussian distribution
"""
x = np.arange(n, dtype=np.float64)
h = np.exp(-(x - m)**2 / (2 * s**2))
@@ -44,16 +41,15 @@ def get_1D_gauss(n, m, sigma):
def make_2D_samples_gauss(n, m, sigma, random_state=None):
- """return n samples drawn from 2D gaussian N(m,sigma)
+ """Return n samples drawn from 2D gaussian N(m,sigma)
Parameters
----------
-
n : int
number of samples to make
- m : np.array (2,)
+ m : ndarray, shape (2,)
mean value of the gaussian distribution
- sigma : np.array (2,2)
+ sigma : ndarray, shape (2, 2)
covariance matrix of the gaussian distribution
random_state : int, RandomState instance or None, optional (default=None)
If int, random_state is the seed used by the random number generator;
@@ -63,9 +59,8 @@ def make_2D_samples_gauss(n, m, sigma, random_state=None):
Returns
-------
- X : np.array (n,2)
- n samples drawn from N(m,sigma)
-
+ X : ndarray, shape (n, 2)
+ n samples drawn from N(m, sigma).
"""
generator = check_random_state(random_state)
@@ -86,11 +81,10 @@ def get_2D_samples_gauss(n, m, sigma, random_state=None):
def make_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs):
- """ dataset generation for classification problems
+ """Dataset generation for classification problems
Parameters
----------
-
dataset : str
type of classification problem (see code)
n : int
@@ -105,13 +99,11 @@ def make_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs):
Returns
-------
- X : np.array (n,d)
- n observation of size d
- y : np.array (n,)
- labels of the samples
-
+ X : ndarray, shape (n, d)
+ n observation of size d
+ y : ndarray, shape (n,)
+ labels of the samples.
"""
-
generator = check_random_state(random_state)
if dataset.lower() == '3gauss':
diff --git a/ot/dr.py b/ot/dr.py
index d30ab30..680dabf 100644
--- a/ot/dr.py
+++ b/ot/dr.py
@@ -1,6 +1,12 @@
# -*- coding: utf-8 -*-
"""
Dimension reduction with optimal transport
+
+
+.. warning::
+ Note that by default the module is not import in :mod:`ot`. In order to
+ use it you need to explicitely import :mod:`ot.dr`
+
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
@@ -43,30 +49,25 @@ def split_classes(X, y):
def fda(X, y, p=2, reg=1e-16):
- """
- Fisher Discriminant Analysis
-
+ """Fisher Discriminant Analysis
Parameters
----------
- X : numpy.ndarray (n,d)
- Training samples
- y : np.ndarray (n,)
- labels for training samples
+ X : ndarray, shape (n, d)
+ Training samples.
+ y : ndarray, shape (n,)
+ Labels for training samples.
p : int, optional
- size of dimensionnality reduction
+ Size of dimensionnality reduction.
reg : float, optional
Regularization term >0 (ridge regularization)
-
Returns
-------
- P : (d x p) ndarray
+ P : ndarray, shape (d, p)
Optimal transportation matrix for the given parameters
- proj : fun
+ proj : callable
projection function including mean centering
-
-
"""
mx = np.mean(X)
@@ -124,37 +125,33 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None):
Parameters
----------
- X : numpy.ndarray (n,d)
- Training samples
- y : np.ndarray (n,)
- labels for training samples
+ X : ndarray, shape (n, d)
+ Training samples.
+ y : ndarray, shape (n,)
+ Labels for training samples.
p : int, optional
- size of dimensionnality reduction
+ Size of dimensionnality reduction.
reg : float, optional
Regularization term >0 (entropic regularization)
- solver : str, optional
- None for steepest decsent or 'TrustRegions' for trust regions algorithm
- else shoudl be a pymanopt.solvers
- P0 : numpy.ndarray (d,p)
- Initial starting point for projection
+ solver : None | str, optional
+ None for steepest descent or 'TrustRegions' for trust regions algorithm
+ else should be a pymanopt.solvers
+ P0 : ndarray, shape (d, p)
+ Initial starting point for projection.
verbose : int, optional
- Print information along iterations
-
-
+ Print information along iterations.
Returns
-------
- P : (d x p) ndarray
+ P : ndarray, shape (d, p)
Optimal transportation matrix for the given parameters
- proj : fun
- projection function including mean centering
-
+ proj : callable
+ Projection function including mean centering.
References
----------
-
- .. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016). Wasserstein Discriminant Analysis. arXiv preprint arXiv:1608.08063.
-
+ .. [11] Flamary, R., Cuturi, M., Courty, N., & Rakotomamonjy, A. (2016).
+ Wasserstein Discriminant Analysis. arXiv preprint arXiv:1608.08063.
""" # noqa
mx = np.mean(X)
diff --git a/ot/externals/funcsigs.py b/ot/externals/funcsigs.py
index c73fdc9..106bde7 100644
--- a/ot/externals/funcsigs.py
+++ b/ot/externals/funcsigs.py
@@ -126,8 +126,8 @@ def signature(obj):
new_params[arg_name] = param.replace(default=arg_value,
_partial_kwarg=True)
- elif (param.kind not in (_VAR_KEYWORD, _VAR_POSITIONAL) and
- not param._partial_kwarg):
+ elif (param.kind not in (_VAR_KEYWORD, _VAR_POSITIONAL)
+ and not param._partial_kwarg):
new_params.pop(arg_name)
return sig.replace(parameters=new_params.values())
@@ -333,11 +333,11 @@ class Parameter(object):
raise TypeError(msg)
def __eq__(self, other):
- return (issubclass(other.__class__, Parameter) and
- self._name == other._name and
- self._kind == other._kind and
- self._default == other._default and
- self._annotation == other._annotation)
+ return (issubclass(other.__class__, Parameter)
+ and self._name == other._name
+ and self._kind == other._kind
+ and self._default == other._default
+ and self._annotation == other._annotation)
def __ne__(self, other):
return not self.__eq__(other)
@@ -372,8 +372,8 @@ class BoundArguments(object):
def args(self):
args = []
for param_name, param in self._signature.parameters.items():
- if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or
- param._partial_kwarg):
+ if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY)
+ or param._partial_kwarg):
# Keyword arguments mapped by 'functools.partial'
# (Parameter._partial_kwarg is True) are mapped
# in 'BoundArguments.kwargs', along with VAR_KEYWORD &
@@ -402,8 +402,8 @@ class BoundArguments(object):
kwargs_started = False
for param_name, param in self._signature.parameters.items():
if not kwargs_started:
- if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY) or
- param._partial_kwarg):
+ if (param.kind in (_VAR_KEYWORD, _KEYWORD_ONLY)
+ or param._partial_kwarg):
kwargs_started = True
else:
if param_name not in self.arguments:
@@ -432,9 +432,9 @@ class BoundArguments(object):
raise TypeError(msg)
def __eq__(self, other):
- return (issubclass(other.__class__, BoundArguments) and
- self.signature == other.signature and
- self.arguments == other.arguments)
+ return (issubclass(other.__class__, BoundArguments)
+ and self.signature == other.signature
+ and self.arguments == other.arguments)
def __ne__(self, other):
return not self.__eq__(other)
@@ -612,9 +612,9 @@ class Signature(object):
raise TypeError(msg)
def __eq__(self, other):
- if (not issubclass(type(other), Signature) or
- self.return_annotation != other.return_annotation or
- len(self.parameters) != len(other.parameters)):
+ if (not issubclass(type(other), Signature)
+ or self.return_annotation != other.return_annotation
+ or len(self.parameters) != len(other.parameters)):
return False
other_positions = dict((param, idx)
@@ -635,8 +635,8 @@ class Signature(object):
except KeyError:
return False
else:
- if (idx != other_idx or
- param != other.parameters[param_name]):
+ if (idx != other_idx
+ or param != other.parameters[param_name]):
return False
return True
@@ -688,8 +688,8 @@ class Signature(object):
raise TypeError(msg)
parameters_ex = (param,)
break
- elif (param.kind == _VAR_KEYWORD or
- param.default is not _empty):
+ elif (param.kind == _VAR_KEYWORD
+ or param.default is not _empty):
# That's fine too - we have a default value for this
# parameter. So, lets start parsing `kwargs`, starting
# with the current parameter
@@ -755,8 +755,8 @@ class Signature(object):
# if it has a default value, or it is an '*args'-like
# parameter, left alone by the processing of positional
# arguments.
- if (not partial and param.kind != _VAR_POSITIONAL and
- param.default is _empty):
+ if (not partial and param.kind != _VAR_POSITIONAL
+ and param.default is _empty):
raise TypeError('{arg!r} parameter lacking default value'.
format(arg=param_name))
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
index deda6b1..1ab95bb 100644
--- a/ot/gpu/__init__.py
+++ b/ot/gpu/__init__.py
@@ -5,11 +5,15 @@ This module provides GPU implementation for several OT solvers and utility
functions. The GPU backend in handled by `cupy
<https://cupy.chainer.org/>`_.
+.. warning::
+ Note that by default the module is not import in :mod:`ot`. In order to
+ use it you need to explicitely import :mod:`ot.gpu` .
+
By default, the functions in this module accept and return numpy arrays
in order to proide drop-in replacement for the other POT function but
the transfer between CPU en GPU comes with a significant overhead.
-In order to get the best erformances, we recommend to give only cupy
+In order to get the best performances, we recommend to give only cupy
arrays to the functions and desactivate the conversion to numpy of the
result of the function with parameter ``to_numpy=False``.
diff --git a/ot/gpu/bregman.py b/ot/gpu/bregman.py
index 978b307..2e2df83 100644
--- a/ot/gpu/bregman.py
+++ b/ot/gpu/bregman.py
@@ -70,17 +70,6 @@ def sinkhorn_knopp(a, b, M, reg, numItermax=1000, stopThr=1e-9,
log : dict
log dictionary return only if log==True in parameters
- Examples
- --------
-
- >>> import ot
- >>> a=[.5,.5]
- >>> b=[.5,.5]
- >>> M=[[0.,1.],[1.,0.]]
- >>> ot.sinkhorn(a,b,M,1)
- array([[ 0.36552929, 0.13447071],
- [ 0.13447071, 0.36552929]])
-
References
----------
diff --git a/ot/gromov.py b/ot/gromov.py
index 0278e99..9869341 100644
--- a/ot/gromov.py
+++ b/ot/gromov.py
@@ -1,26 +1,25 @@
-
# -*- coding: utf-8 -*-
"""
Gromov-Wasserstein transport method
-
-
"""
# Author: Erwan Vautier <erwan.vautier@gmail.com>
# Nicolas Courty <ncourty@irisa.fr>
# Rémi Flamary <remi.flamary@unice.fr>
+# Titouan Vayer <titouan.vayer@irisa.fr>
#
# License: MIT License
import numpy as np
+
from .bregman import sinkhorn
-from .utils import dist
+from .utils import dist, UndefinedParameter
from .optim import cg
-def init_matrix(C1, C2, T, p, q, loss_fun='square_loss'):
- """ Return loss matrices and tensors for Gromov-Wasserstein fast computation
+def init_matrix(C1, C2, p, q, loss_fun='square_loss'):
+ """Return loss matrices and tensors for Gromov-Wasserstein fast computation
Returns the value of \mathcal{L}(C1,C2) \otimes T with the selected loss
function as the loss function of Gromow-Wasserstein discrepancy.
@@ -32,14 +31,14 @@ def init_matrix(C1, C2, T, p, q, loss_fun='square_loss'):
* C2 : Metric cost matrix in the target space
* T : A coupling between those two spaces
- The square-loss function L(a,b)=(1/2)*|a-b|^2 is read as :
+ The square-loss function L(a,b)=|a-b|^2 is read as :
L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with :
- * f1(a)=(a^2)/2
- * f2(b)=(b^2)/2
+ * f1(a)=(a^2)
+ * f2(b)=(b^2)
* h1(a)=a
- * h2(b)=b
+ * h2(b)=2*b
- The kl-loss function L(a,b)=(1/2)*|a-b|^2 is read as :
+ The kl-loss function L(a,b)=a*log(a/b)-a+b is read as :
L(a,b) = f1(a)+f2(b)-h1(a)*h2(b) with :
* f1(a)=a*log(a)-a
* f2(b)=b
@@ -49,44 +48,42 @@ def init_matrix(C1, C2, T, p, q, loss_fun='square_loss'):
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
+ Metric costfr matrix in the target space
T : ndarray, shape (ns, nt)
- Coupling between source and target spaces
+ Coupling between source and target spaces
p : ndarray, shape (ns,)
-
Returns
-------
-
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
+ h2(C) matrix in Eq. (6)
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
if loss_fun == 'square_loss':
def f1(a):
- return (a**2) / 2
+ return (a**2)
def f2(b):
- return (b**2) / 2
+ return (b**2)
def h1(a):
return a
def h2(b):
- return b
+ return 2 * b
elif loss_fun == 'kl_loss':
def f1(a):
return a * np.log(a + 1e-15) - a
@@ -112,31 +109,29 @@ def init_matrix(C1, C2, T, p, q, loss_fun='square_loss'):
def tensor_product(constC, hC1, hC2, T):
- """ Return the tensor for Gromov-Wasserstein fast computation
+ """Return the tensor for Gromov-Wasserstein fast computation
The tensor is computed as described in Proposition 1 Eq. (6) in [12].
Parameters
----------
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
-
+ h2(C) matrix in Eq. (6)
Returns
-------
-
tens : ndarray, shape (ns, nt)
- \mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
+ \mathcal{L}(C1,C2) \otimes T tensor-matrix multiplication result
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
A = -np.dot(hC1, T).dot(hC2.T)
@@ -146,32 +141,31 @@ def tensor_product(constC, hC1, hC2, T):
def gwloss(constC, hC1, hC2, T):
- """ Return the Loss for Gromov-Wasserstein
+ """Return the Loss for Gromov-Wasserstein
The loss is computed as described in Proposition 1 Eq. (6) in [12].
Parameters
----------
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
+ h2(C) matrix in Eq. (6)
T : ndarray, shape (ns, nt)
- Current value of transport matrix T
+ Current value of transport matrix T
Returns
-------
-
loss : float
- Gromov Wasserstein loss
+ Gromov Wasserstein loss
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
@@ -181,32 +175,31 @@ def gwloss(constC, hC1, hC2, T):
def gwggrad(constC, hC1, hC2, T):
- """ Return the gradient for Gromov-Wasserstein
+ """Return the gradient for Gromov-Wasserstein
The gradient is computed as described in Proposition 2 in [12].
Parameters
----------
constC : ndarray, shape (ns, nt)
- Constant C matrix in Eq. (6)
+ Constant C matrix in Eq. (6)
hC1 : ndarray, shape (ns, ns)
- h1(C1) matrix in Eq. (6)
+ h1(C1) matrix in Eq. (6)
hC2 : ndarray, shape (nt, nt)
- h2(C) matrix in Eq. (6)
+ h2(C) matrix in Eq. (6)
T : ndarray, shape (ns, nt)
- Current value of transport matrix T
+ Current value of transport matrix T
Returns
-------
-
grad : ndarray, shape (ns, nt)
Gromov Wasserstein gradient
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
return 2 * tensor_product(constC, hC1, hC2,
@@ -220,19 +213,19 @@ def update_square_loss(p, lambdas, T, Cs):
Parameters
----------
- p : ndarray, shape (N,)
- masses in the targeted barycenter
+ p : ndarray, shape (N,)
+ Masses in the targeted barycenter.
lambdas : list of float
- list of the S spaces' weights
- T : list of S np.ndarray(ns,N)
- the S Ts couplings calculated at each iteration
+ List of the S spaces' weights.
+ T : list of S np.ndarray of shape (ns,N)
+ The S Ts couplings calculated at each iteration.
Cs : list of S ndarray, shape(ns,ns)
- Metric cost matrices
+ Metric cost matrices.
Returns
----------
- C : ndarray, shape (nt,nt)
- updated C matrix
+ C : ndarray, shape (nt, nt)
+ Updated C matrix.
"""
tmpsum = sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s])
for s in range(len(T))])
@@ -249,12 +242,12 @@ def update_kl_loss(p, lambdas, T, Cs):
Parameters
----------
p : ndarray, shape (N,)
- weights in the targeted barycenter
+ Weights in the targeted barycenter.
lambdas : list of the S spaces' weights
- T : list of S np.ndarray(ns,N)
- the S Ts couplings calculated at each iteration
+ T : list of S np.ndarray of shape (ns,N)
+ The S Ts couplings calculated at each iteration.
Cs : list of S ndarray, shape(ns,ns)
- Metric cost matrices
+ Metric cost matrices.
Returns
----------
@@ -268,34 +261,33 @@ def update_kl_loss(p, lambdas, T, Cs):
return np.exp(np.divide(tmpsum, ppt))
-def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, **kwargs):
+def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs):
"""
Returns the gromov-wasserstein transport between (C1,p) and (C2,q)
The function solves the following optimization problem:
.. math::
- \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- p : ndarray, shape (ns,)
- distribution in the source space
- q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string
+ Metric costfr matrix in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space
+ q : ndarray, shape (nt,)
+ Distribution in the target space
+ loss_fun : str
loss function used for the solver either 'square_loss' or 'kl_loss'
max_iter : int, optional
@@ -306,16 +298,19 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, **kwargs):
Print information along iterations
log : bool, optional
record log if True
+ armijo : bool, optional
+ If True the steps of the line-search is found via an armijo research. Else closed form is used.
+ If there is convergence issues use False.
**kwargs : dict
- parameters can be directly pased to the ot.optim.cg solver
+ parameters can be directly passed to the ot.optim.cg solver
Returns
-------
T : ndarray, shape (ns, nt)
- coupling between the two spaces that minimizes :
+ Doupling between the two spaces that minimizes:
\sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
log : dict
- convergence information and loss
+ Convergence information and loss.
References
----------
@@ -329,9 +324,7 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, **kwargs):
"""
- T = np.eye(len(p), len(q))
-
- constC, hC1, hC2 = init_matrix(C1, C2, T, p, q, loss_fun)
+ constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
G0 = p[:, None] * q[None, :]
@@ -342,43 +335,41 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, **kwargs):
return gwggrad(constC, hC1, hC2, G)
if log:
- res, log = cg(p, q, 0, 1, f, df, G0, log=True, **kwargs)
+ res, log = cg(p, q, 0, 1, f, df, G0, log=True, armijo=armijo, C1=C1, C2=C2, constC=constC, **kwargs)
log['gw_dist'] = gwloss(constC, hC1, hC2, res)
return res, log
else:
- return cg(p, q, 0, 1, f, df, G0, **kwargs)
+ return cg(p, q, 0, 1, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, **kwargs)
-def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, **kwargs):
+def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs):
"""
Returns the gromov-wasserstein discrepancy between (C1,p) and (C2,q)
The function solves the following optimization problem:
.. math::
- \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+ GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
- p : ndarray, shape (ns,)
- distribution in the source space
+ Metric cost matrix in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space.
q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string
+ Distribution in the target space.
+ loss_fun : str
loss function used for the solver either 'square_loss' or 'kl_loss'
-
max_iter : int, optional
Max number of iterations
tol : float, optional
@@ -387,6 +378,9 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, **kwargs):
Print information along iterations
log : bool, optional
record log if True
+ armijo : bool, optional
+ If True the steps of the line-search is found via an armijo research. Else closed form is used.
+ If there is convergence issues use False.
Returns
-------
@@ -407,9 +401,88 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, **kwargs):
"""
- T = np.eye(len(p), len(q))
+ constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
+
+ G0 = p[:, None] * q[None, :]
+
+ def f(G):
+ return gwloss(constC, hC1, hC2, G)
+
+ def df(G):
+ return gwggrad(constC, hC1, hC2, G)
+ res, log_gw = cg(p, q, 0, 1, f, df, G0, log=True, armijo=armijo, C1=C1, C2=C2, constC=constC, **kwargs)
+ log_gw['gw_dist'] = gwloss(constC, hC1, hC2, res)
+ log_gw['T'] = res
+ if log:
+ return log_gw['gw_dist'], log_gw
+ else:
+ return log_gw['gw_dist']
+
+
+def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, log=False, **kwargs):
+ """
+ Computes the FGW transport between two graphs see [24]
+
+ .. math::
+ \gamma = arg\min_\gamma (1-\\alpha)*<\gamma,M>_F + \\alpha* \sum_{i,j,k,l}
+ L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+
+ s.t. \gamma 1 = p
+ \gamma^T 1= q
+ \gamma\geq 0
+
+ where :
+ - M is the (ns,nt) metric cost matrix
+ - :math:`f` is the regularization term ( and df is its gradient)
+ - a and b are source and target weights (sum to 1)
+ - L is a loss function to account for the misfit between the similarity matrices
- constC, hC1, hC2 = init_matrix(C1, C2, T, p, q, loss_fun)
+ The algorithm used for solving the problem is conditional gradient as discussed in [24]_
+
+ Parameters
+ ----------
+ M : ndarray, shape (ns, nt)
+ Metric cost matrix between features across domains
+ C1 : ndarray, shape (ns, ns)
+ Metric cost matrix representative of the structure in the source space
+ C2 : ndarray, shape (nt, nt)
+ Metric cost matrix representative of the structure in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space
+ q : ndarray, shape (nt,)
+ Distribution in the target space
+ loss_fun : str, optional
+ Loss function used for the solver
+ max_iter : int, optional
+ Max number of iterations
+ tol : float, optional
+ Stop threshold on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+ armijo : bool, optional
+ If True the steps of the line-search is found via an armijo research. Else closed form is used.
+ If there is convergence issues use False.
+ **kwargs : dict
+ parameters can be directly passed to the ot.optim.cg solver
+
+ Returns
+ -------
+ gamma : ndarray, shape (ns, nt)
+ Optimal transportation matrix for the given parameters.
+ log : dict
+ Log dictionary return only if log==True in parameters.
+
+ References
+ ----------
+ .. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
+ and Courty Nicolas "Optimal Transport for structured data with
+ application on graphs", International Conference on Machine Learning
+ (ICML). 2019.
+ """
+
+ constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
G0 = p[:, None] * q[None, :]
@@ -418,13 +491,95 @@ def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, **kwargs):
def df(G):
return gwggrad(constC, hC1, hC2, G)
- res, log = cg(p, q, 0, 1, f, df, G0, log=True, **kwargs)
- log['gw_dist'] = gwloss(constC, hC1, hC2, res)
- log['T'] = res
+
if log:
- return log['gw_dist'], log
+ res, log = cg(p, q, M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, log=True, **kwargs)
+ log['fgw_dist'] = log['loss'][::-1][0]
+ return res, log
else:
- return log['gw_dist']
+ return cg(p, q, M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, **kwargs)
+
+
+def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, log=False, **kwargs):
+ """
+ Computes the FGW distance between two graphs see [24]
+
+ .. math::
+ \min_\gamma (1-\\alpha)*<\gamma,M>_F + \\alpha* \sum_{i,j,k,l}
+ L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+
+
+ s.t. \gamma 1 = p
+ \gamma^T 1= q
+ \gamma\geq 0
+
+ where :
+ - M is the (ns,nt) metric cost matrix
+ - :math:`f` is the regularization term ( and df is its gradient)
+ - a and b are source and target weights (sum to 1)
+ - L is a loss function to account for the misfit between the similarity matrices
+ The algorithm used for solving the problem is conditional gradient as discussed in [1]_
+
+ Parameters
+ ----------
+ M : ndarray, shape (ns, nt)
+ Metric cost matrix between features across domains
+ C1 : ndarray, shape (ns, ns)
+ Metric cost matrix respresentative of the structure in the source space.
+ C2 : ndarray, shape (nt, nt)
+ Metric cost matrix espresentative of the structure in the target space.
+ p : ndarray, shape (ns,)
+ Distribution in the source space.
+ q : ndarray, shape (nt,)
+ Distribution in the target space.
+ loss_fun : str, optional
+ Loss function used for the solver.
+ max_iter : int, optional
+ Max number of iterations
+ tol : float, optional
+ Stop threshold on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ Record log if True.
+ armijo : bool, optional
+ If True the steps of the line-search is found via an armijo research.
+ Else closed form is used. If there is convergence issues use False.
+ **kwargs : dict
+ Parameters can be directly pased to the ot.optim.cg solver.
+
+ Returns
+ -------
+ gamma : ndarray, shape (ns, nt)
+ Optimal transportation matrix for the given parameters.
+ log : dict
+ Log dictionary return only if log==True in parameters.
+
+ References
+ ----------
+ .. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
+ and Courty Nicolas
+ "Optimal Transport for structured data with application on graphs"
+ International Conference on Machine Learning (ICML). 2019.
+ """
+
+ constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
+
+ G0 = p[:, None] * q[None, :]
+
+ def f(G):
+ return gwloss(constC, hC1, hC2, G)
+
+ def df(G):
+ return gwggrad(constC, hC1, hC2, G)
+
+ res, log = cg(p, q, M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, log=True, **kwargs)
+ if log:
+ log['fgw_dist'] = log['loss'][::-1][0]
+ log['T'] = res
+ return log['fgw_dist'], log
+ else:
+ return log['fgw_dist']
def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
@@ -437,56 +592,55 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
The function solves the following optimization problem:
.. math::
- \GW = arg\min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+ GW = arg\min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
- s.t. \GW 1 = p
+ s.t. T 1 = p
- \GW^T 1= q
+ T^T 1= q
- \GW\geq 0
+ T\geq 0
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+ - H : entropy
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
+ Metric costfr matrix in the target space
p : ndarray, shape (ns,)
- distribution in the source space
+ Distribution in the source space
q : ndarray, shape (nt,)
- distribution in the target space
+ Distribution in the target space
loss_fun : string
- loss function used for the solver either 'square_loss' or 'kl_loss'
+ Loss function used for the solver either 'square_loss' or 'kl_loss'
epsilon : float
Regularization term >0
max_iter : int, optional
- Max number of iterations
+ Max number of iterations
tol : float, optional
Stop threshold on error (>0)
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
+ Record log if True.
Returns
-------
T : ndarray, shape (ns, nt)
- coupling between the two spaces that minimizes :
- \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+ Optimal coupling between the two spaces
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
@@ -495,7 +649,7 @@ def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
T = np.outer(p, q) # Initialization
- constC, hC1, hC2 = init_matrix(C1, C2, T, p, q, loss_fun)
+ constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
cpt = 0
err = 1
@@ -545,28 +699,28 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
The function solves the following optimization problem:
.. math::
- \GW_Dist = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
+ GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}-\epsilon(H(T))
Where :
- C1 : Metric cost matrix in the source space
- C2 : Metric cost matrix in the target space
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- H : entropy
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+ - H : entropy
Parameters
----------
C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
+ Metric cost matrix in the source space
C2 : ndarray, shape (nt, nt)
- Metric costfr matrix in the target space
+ Metric costfr matrix in the target space
p : ndarray, shape (ns,)
- distribution in the source space
+ Distribution in the source space
q : ndarray, shape (nt,)
- distribution in the target space
- loss_fun : string
- loss function used for the solver either 'square_loss' or 'kl_loss'
+ Distribution in the target space
+ loss_fun : str
+ Loss function used for the solver either 'square_loss' or 'kl_loss'
epsilon : float
Regularization term >0
max_iter : int, optional
@@ -576,7 +730,7 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
verbose : bool, optional
Print information along iterations
log : bool, optional
- record log if True
+ Record log if True.
Returns
-------
@@ -586,11 +740,10 @@ def entropic_gromov_wasserstein2(C1, C2, p, q, loss_fun, epsilon,
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
-
gw, logv = entropic_gromov_wasserstein(
C1, C2, p, q, loss_fun, epsilon, max_iter, tol, verbose, log=True)
@@ -612,29 +765,31 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
The function solves the following optimization problem:
.. math::
- C = argmin_C\in R^{NxN} \sum_s \lambda_s GW(C,Cs,p,ps)
+ C = argmin_{C\in R^{NxN}} \sum_s \lambda_s GW(C,C_s,p,p_s)
Where :
- Cs : metric cost matrix
- ps : distribution
+ - :math:`C_s` : metric cost matrix
+ - :math:`p_s` : distribution
Parameters
----------
- N : Integer
- Size of the targeted barycenter
- Cs : list of S np.ndarray(ns,ns)
- Metric cost matrices
- ps : list of S np.ndarray(ns,)
- sample weights in the S spaces
- p : ndarray, shape(N,)
- weights in the targeted barycenter
+ N : int
+ Size of the targeted barycenter
+ Cs : list of S np.ndarray of shape (ns,ns)
+ Metric cost matrices
+ ps : list of S np.ndarray of shape (ns,)
+ Sample weights in the S spaces
+ p : ndarray, shape(N,)
+ Weights in the targeted barycenter
lambdas : list of float
- list of the S spaces' weights
- loss_fun : tensor-matrix multiplication function based on specific loss function
- update : function(p,lambdas,T,Cs) that updates C according to a specific Kernel
- with the S Ts couplings calculated at each iteration
+ List of the S spaces' weights.
+ loss_fun : callable
+ Tensor-matrix multiplication function based on specific loss function.
+ update : callable
+ function(p,lambdas,T,Cs) that updates C according to a specific Kernel
+ with the S Ts couplings calculated at each iteration
epsilon : float
Regularization term >0
max_iter : int, optional
@@ -642,11 +797,11 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
tol : float, optional
Stop threshol on error (>0)
verbose : bool, optional
- Print information along iterations
+ Print information along iterations.
log : bool, optional
- record log if True
- init_C : bool, ndarray, shape(N,N)
- random initial value for the C matrix provided by user
+ Record log if True.
+ init_C : bool | ndarray, shape (N, N)
+ Random initial value for the C matrix provided by user.
Returns
-------
@@ -656,9 +811,8 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
-
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
S = len(Cs)
@@ -668,6 +822,7 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
# Initialization of C : random SPD matrix (if not provided by user)
if init_C is None:
+ # XXX use random state
xalea = np.random.randn(N, 2)
C = dist(xalea, xalea)
C /= C.max()
@@ -679,7 +834,7 @@ def entropic_gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun, epsilon,
error = []
- while(err > tol and cpt < max_iter):
+ while (err > tol) and (cpt < max_iter):
Cprev = C
T = [entropic_gromov_wasserstein(Cs[s], C, ps[s], p, loss_fun, epsilon,
@@ -723,37 +878,36 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
.. math::
C = argmin_C\in R^NxN \sum_s \lambda_s GW(C,Cs,p,ps)
-
Where :
- Cs : metric cost matrix
- ps : distribution
+ - Cs : metric cost matrix
+ - ps : distribution
Parameters
----------
- N : Integer
- Size of the targeted barycenter
- Cs : list of S np.ndarray(ns,ns)
- Metric cost matrices
- ps : list of S np.ndarray(ns,)
- sample weights in the S spaces
- p : ndarray, shape(N,)
- weights in the targeted barycenter
+ N : int
+ Size of the targeted barycenter
+ Cs : list of S np.ndarray of shape (ns, ns)
+ Metric cost matrices
+ ps : list of S np.ndarray of shape (ns,)
+ Sample weights in the S spaces
+ p : ndarray, shape (N,)
+ Weights in the targeted barycenter
lambdas : list of float
- list of the S spaces' weights
+ List of the S spaces' weights
loss_fun : tensor-matrix multiplication function based on specific loss function
update : function(p,lambdas,T,Cs) that updates C according to a specific Kernel
with the S Ts couplings calculated at each iteration
max_iter : int, optional
Max number of iterations
tol : float, optional
- Stop threshol on error (>0)
+ Stop threshol on error (>0).
verbose : bool, optional
- Print information along iterations
+ Print information along iterations.
log : bool, optional
- record log if True
- init_C : bool, ndarray, shape(N,N)
- random initial value for the C matrix provided by user
+ Record log if True.
+ init_C : bool | ndarray, shape(N,N)
+ Random initial value for the C matrix provided by user.
Returns
-------
@@ -763,11 +917,10 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
References
----------
.. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
"""
-
S = len(Cs)
Cs = [np.asarray(Cs[s], dtype=np.float64) for s in range(S)]
@@ -775,6 +928,7 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
# Initialization of C : random SPD matrix (if not provided by user)
if init_C is None:
+ # XXX : should use a random state and not use the global seed
xalea = np.random.randn(N, 2)
C = dist(xalea, xalea)
C /= C.max()
@@ -815,3 +969,209 @@ def gromov_barycenters(N, Cs, ps, p, lambdas, loss_fun,
cpt += 1
return C
+
+
+def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_features=False,
+ p=None, loss_fun='square_loss', max_iter=100, tol=1e-9,
+ verbose=False, log=False, init_C=None, init_X=None):
+ """Compute the fgw barycenter as presented eq (5) in [24].
+
+ Parameters
+ ----------
+ N : integer
+ Desired number of samples of the target barycenter
+ Ys: list of ndarray, each element has shape (ns,d)
+ Features of all samples
+ Cs : list of ndarray, each element has shape (ns,ns)
+ Structure matrices of all samples
+ ps : list of ndarray, each element has shape (ns,)
+ Masses of all samples.
+ lambdas : list of float
+ List of the S spaces' weights
+ alpha : float
+ Alpha parameter for the fgw distance
+ fixed_structure : bool
+ Whether to fix the structure of the barycenter during the updates
+ fixed_features : bool
+ Whether to fix the feature of the barycenter during the updates
+ init_C : ndarray, shape (N,N), optional
+ Initialization for the barycenters' structure matrix. If not set
+ a random init is used.
+ init_X : ndarray, shape (N,d), optional
+ Initialization for the barycenters' features. If not set a
+ random init is used.
+
+ Returns
+ -------
+ X : ndarray, shape (N, d)
+ Barycenters' features
+ C : ndarray, shape (N, N)
+ Barycenters' structure matrix
+ log_: dict
+ Only returned when log=True. It contains the keys:
+ T : list of (N,ns) transport matrices
+ Ms : all distance matrices between the feature of the barycenter and the
+ other features dist(X,Ys) shape (N,ns)
+
+ References
+ ----------
+ .. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
+ and Courty Nicolas
+ "Optimal Transport for structured data with application on graphs"
+ International Conference on Machine Learning (ICML). 2019.
+ """
+ S = len(Cs)
+ d = Ys[0].shape[1] # dimension on the node features
+ if p is None:
+ p = np.ones(N) / N
+
+ Cs = [np.asarray(Cs[s], dtype=np.float64) for s in range(S)]
+ Ys = [np.asarray(Ys[s], dtype=np.float64) for s in range(S)]
+
+ lambdas = np.asarray(lambdas, dtype=np.float64)
+
+ if fixed_structure:
+ if init_C is None:
+ raise UndefinedParameter('If C is fixed it must be initialized')
+ else:
+ C = init_C
+ else:
+ if init_C is None:
+ xalea = np.random.randn(N, 2)
+ C = dist(xalea, xalea)
+ else:
+ C = init_C
+
+ if fixed_features:
+ if init_X is None:
+ raise UndefinedParameter('If X is fixed it must be initialized')
+ else:
+ X = init_X
+ else:
+ if init_X is None:
+ X = np.zeros((N, d))
+ else:
+ X = init_X
+
+ T = [np.outer(p, q) for q in ps]
+
+ Ms = [np.asarray(dist(X, Ys[s]), dtype=np.float64) for s in range(len(Ys))] # Ms is N,ns
+
+ cpt = 0
+ err_feature = 1
+ err_structure = 1
+
+ if log:
+ log_ = {}
+ log_['err_feature'] = []
+ log_['err_structure'] = []
+ log_['Ts_iter'] = []
+
+ while((err_feature > tol or err_structure > tol) and cpt < max_iter):
+ Cprev = C
+ Xprev = X
+
+ if not fixed_features:
+ Ys_temp = [y.T for y in Ys]
+ X = update_feature_matrix(lambdas, Ys_temp, T, p).T
+
+ Ms = [np.asarray(dist(X, Ys[s]), dtype=np.float64) for s in range(len(Ys))]
+
+ if not fixed_structure:
+ if loss_fun == 'square_loss':
+ T_temp = [t.T for t in T]
+ C = update_sructure_matrix(p, lambdas, T_temp, Cs)
+
+ T = [fused_gromov_wasserstein((1 - alpha) * Ms[s], C, Cs[s], p, ps[s], loss_fun, alpha,
+ numItermax=max_iter, stopThr=1e-5, verbose=verbose) for s in range(S)]
+
+ # T is N,ns
+ err_feature = np.linalg.norm(X - Xprev.reshape(N, d))
+ err_structure = np.linalg.norm(C - Cprev)
+
+ if log:
+ log_['err_feature'].append(err_feature)
+ log_['err_structure'].append(err_structure)
+ log_['Ts_iter'].append(T)
+
+ if verbose:
+ if cpt % 200 == 0:
+ print('{:5s}|{:12s}'.format(
+ 'It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err_structure))
+ print('{:5d}|{:8e}|'.format(cpt, err_feature))
+
+ cpt += 1
+
+ if log:
+ log_['T'] = T # from target to Ys
+ log_['p'] = p
+ log_['Ms'] = Ms
+
+ if log:
+ return X, C, log_
+ else:
+ return X, C
+
+
+def update_sructure_matrix(p, lambdas, T, Cs):
+ """Updates C according to the L2 Loss kernel with the S Ts couplings.
+
+ It is calculated at each iteration
+
+ Parameters
+ ----------
+ p : ndarray, shape (N,)
+ Masses in the targeted barycenter.
+ lambdas : list of float
+ List of the S spaces' weights.
+ T : list of S ndarray of shape (ns, N)
+ The S Ts couplings calculated at each iteration.
+ Cs : list of S ndarray, shape (ns, ns)
+ Metric cost matrices.
+
+ Returns
+ -------
+ C : ndarray, shape (nt, nt)
+ Updated C matrix.
+ """
+ tmpsum = sum([lambdas[s] * np.dot(T[s].T, Cs[s]).dot(T[s]) for s in range(len(T))])
+ ppt = np.outer(p, p)
+
+ return np.divide(tmpsum, ppt)
+
+
+def update_feature_matrix(lambdas, Ys, Ts, p):
+ """Updates the feature with respect to the S Ts couplings.
+
+
+ See "Solving the barycenter problem with Block Coordinate Descent (BCD)"
+ in [24] calculated at each iteration
+
+ Parameters
+ ----------
+ p : ndarray, shape (N,)
+ masses in the targeted barycenter
+ lambdas : list of float
+ List of the S spaces' weights
+ Ts : list of S np.ndarray(ns,N)
+ the S Ts couplings calculated at each iteration
+ Ys : list of S ndarray, shape(d,ns)
+ The features.
+
+ Returns
+ -------
+ X : ndarray, shape (d, N)
+
+ References
+ ----------
+ .. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
+ and Courty Nicolas
+ "Optimal Transport for structured data with application on graphs"
+ International Conference on Machine Learning (ICML). 2019.
+ """
+ p = np.array(1. / p).reshape(-1,)
+
+ tmpsum = sum([lambdas[s] * np.dot(Ys[s], Ts[s].T) * p[None, :] for s in range(len(Ts))])
+
+ return tmpsum
diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h
index f42e222..2adaace 100644
--- a/ot/lp/EMD.h
+++ b/ot/lp/EMD.h
@@ -32,4 +32,9 @@ enum ProblemType {
int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter);
+int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D,
+ long *iG, long *jG, double *G, long * nG,
+ double* alpha, double* beta, double *cost, int maxIter);
+
+
#endif
diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp
index fc7ca63..28e4af2 100644
--- a/ot/lp/EMD_wrapper.cpp
+++ b/ot/lp/EMD_wrapper.cpp
@@ -17,13 +17,13 @@
int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G,
double* alpha, double* beta, double *cost, int maxIter) {
-// beware M and C anre strored in row major C style!!!
- int n, m, i, cur;
+ // beware M and C anre strored in row major C style!!!
+ int n, m, i, cur;
typedef FullBipartiteDigraph Digraph;
- DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
+ DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
- // Get the number of non zero coordinates for r and c
+ // Get the number of non zero coordinates for r and c
n=0;
for (int i=0; i<n1; i++) {
double val=*(X+i);
@@ -105,3 +105,186 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G,
return ret;
}
+
+
+int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D,
+ long *iG, long *jG, double *G, long * nG,
+ double* alpha, double* beta, double *cost, int maxIter) {
+ // beware M and C anre strored in row major C style!!!
+
+ // Get the number of non zero coordinates for r and c and vectors
+ int n, m, i, cur;
+
+ typedef FullBipartiteDigraph Digraph;
+ DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
+
+ // Get the number of non zero coordinates for r and c
+ n=0;
+ for (int i=0; i<n1; i++) {
+ double val=*(X+i);
+ if (val>0) {
+ n++;
+ }else if(val<0){
+ return INFEASIBLE;
+ }
+ }
+ m=0;
+ for (int i=0; i<n2; i++) {
+ double val=*(Y+i);
+ if (val>0) {
+ m++;
+ }else if(val<0){
+ return INFEASIBLE;
+ }
+ }
+
+ // Define the graph
+
+ std::vector<int> indI(n), indJ(m);
+ std::vector<double> weights1(n), weights2(m);
+ Digraph di(n, m);
+ NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, maxIter);
+
+ // Set supply and demand, don't account for 0 values (faster)
+
+ cur=0;
+ for (int i=0; i<n1; i++) {
+ double val=*(X+i);
+ if (val>0) {
+ weights1[ cur ] = val;
+ indI[cur++]=i;
+ }
+ }
+
+ // Demand is actually negative supply...
+
+ cur=0;
+ for (int i=0; i<n2; i++) {
+ double val=*(Y+i);
+ if (val>0) {
+ weights2[ cur ] = -val;
+ indJ[cur++]=i;
+ }
+ }
+
+ // Define the graph
+ net.supplyMap(&weights1[0], n, &weights2[0], m);
+
+ // Set the cost of each edge
+ for (int i=0; i<n; i++) {
+ for (int j=0; j<m; j++) {
+ double val=*(D+indI[i]*n2+indJ[j]);
+ net.setCost(di.arcFromId(i*m+j), val);
+ }
+ }
+
+
+ // Solve the problem with the network simplex algorithm
+
+ int ret=net.run();
+ if (ret==(int)net.OPTIMAL || ret==(int)net.MAX_ITER_REACHED) {
+ *cost = 0;
+ Arc a; di.first(a);
+ cur=0;
+ for (; a != INVALID; di.next(a)) {
+ int i = di.source(a);
+ int j = di.target(a);
+ double flow = net.flow(a);
+ if (flow>0)
+ {
+ *cost += flow * (*(D+indI[i]*n2+indJ[j-n]));
+
+ *(G+cur) = flow;
+ *(iG+cur) = indI[i];
+ *(jG+cur) = indJ[j-n];
+ *(alpha + indI[i]) = -net.potential(i);
+ *(beta + indJ[j-n]) = net.potential(j);
+ cur++;
+ }
+ }
+ *nG=cur; // nb of value +1 for numpy indexing
+
+ }
+
+
+ return ret;
+}
+
+int EMD_wrap_all_sparse(int n1, int n2, double *X, double *Y,
+ long *iD, long *jD, double *D, long nD,
+ long *iG, long *jG, double *G, long * nG,
+ double* alpha, double* beta, double *cost, int maxIter) {
+ // beware M and C anre strored in row major C style!!!
+
+ // Get the number of non zero coordinates for r and c and vectors
+ int n, m, cur;
+
+ typedef FullBipartiteDigraph Digraph;
+ DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
+
+ n=n1;
+ m=n2;
+
+
+ // Define the graph
+
+
+ std::vector<double> weights2(m);
+ Digraph di(n, m);
+ NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, maxIter);
+
+ // Set supply and demand, don't account for 0 values (faster)
+
+
+ // Demand is actually negative supply...
+
+ cur=0;
+ for (int i=0; i<n2; i++) {
+ double val=*(Y+i);
+ if (val>0) {
+ weights2[ cur ] = -val;
+ }
+ }
+
+ // Define the graph
+ net.supplyMap(X, n, &weights2[0], m);
+
+ // Set the cost of each edge
+ for (int k=0; k<nD; k++) {
+ int i = iD[k];
+ int j = jD[k];
+ net.setCost(di.arcFromId(i*m+j), D[k]);
+
+ }
+
+
+ // Solve the problem with the network simplex algorithm
+
+ int ret=net.run();
+ if (ret==(int)net.OPTIMAL || ret==(int)net.MAX_ITER_REACHED) {
+ *cost = net.totalCost();
+ Arc a; di.first(a);
+ cur=0;
+ for (; a != INVALID; di.next(a)) {
+ int i = di.source(a);
+ int j = di.target(a);
+ double flow = net.flow(a);
+ if (flow>0)
+ {
+
+ *(G+cur) = flow;
+ *(iG+cur) = i;
+ *(jG+cur) = j-n;
+ *(alpha + i) = -net.potential(i);
+ *(beta + j-n) = net.potential(j);
+ cur++;
+ }
+ }
+ *nG=cur; // nb of value +1 for numpy indexing
+
+ }
+
+
+ return ret;
+}
+
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 02cbd8c..cdd505d 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -1,6 +1,9 @@
# -*- coding: utf-8 -*-
"""
Solvers for the original linear program OT problem
+
+
+
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
@@ -8,22 +11,171 @@ Solvers for the original linear program OT problem
# License: MIT License
import multiprocessing
-
+import sys
import numpy as np
+from scipy.sparse import coo_matrix
from .import cvx
# import compiled emd
-from .emd_wrap import emd_c, check_result
+from .emd_wrap import emd_c, check_result, emd_1d_sorted
from ..utils import parmap
from .cvx import barycenter
from ..utils import dist
-__all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx']
+__all__ = ['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
+ 'emd_1d', 'emd2_1d', 'wasserstein_1d']
-def emd(a, b, M, numItermax=100000, log=False):
- """Solves the Earth Movers distance problem and returns the OT matrix
+def center_ot_dual(alpha0, beta0, a=None, b=None):
+ r"""Center dual OT potentials w.r.t. theirs weights
+
+ The main idea of this function is to find unique dual potentials
+ that ensure some kind of centering/fairness. The main idea is to find dual potentials that lead to the same final objective value for both source and targets (see below for more details). It will help having
+ stability when multiple calling of the OT solver with small changes.
+
+ Basically we add another constraint to the potential that will not
+ change the objective value but will ensure unicity. The constraint
+ is the following:
+
+ .. math::
+ \alpha^T a= \beta^T b
+
+ in addition to the OT problem constraints.
+
+ since :math:`\sum_i a_i=\sum_j b_j` this can be solved by adding/removing
+ a constant from both :math:`\alpha_0` and :math:`\beta_0`.
+
+ .. math::
+ c=\frac{\beta0^T b-\alpha_0^T a}{1^Tb+1^Ta}
+
+ \alpha=\alpha_0+c
+
+ \beta=\beta0+c
+
+ Parameters
+ ----------
+ alpha0 : (ns,) numpy.ndarray, float64
+ Source dual potential
+ beta0 : (nt,) numpy.ndarray, float64
+ Target dual potential
+ a : (ns,) numpy.ndarray, float64
+ Source histogram (uniform weight if empty list)
+ b : (nt,) numpy.ndarray, float64
+ Target histogram (uniform weight if empty list)
+
+ Returns
+ -------
+ alpha : (ns,) numpy.ndarray, float64
+ Source centered dual potential
+ beta : (nt,) numpy.ndarray, float64
+ Target centered dual potential
+
+ """
+ # if no weights are provided, use uniform
+ if a is None:
+ a = np.ones(alpha0.shape[0]) / alpha0.shape[0]
+ if b is None:
+ b = np.ones(beta0.shape[0]) / beta0.shape[0]
+
+ # compute constant that balances the weighted sums of the duals
+ c = (b.dot(beta0) - a.dot(alpha0)) / (a.sum() + b.sum())
+
+ # update duals
+ alpha = alpha0 + c
+ beta = beta0 - c
+
+ return alpha, beta
+
+
+def estimate_dual_null_weights(alpha0, beta0, a, b, M):
+ r"""Estimate feasible values for 0-weighted dual potentials
+
+ The feasible values are computed efficiently but rather coarsely.
+
+ .. warning::
+ This function is necessary because the C++ solver in emd_c
+ discards all samples in the distributions with
+ zeros weights. This means that while the primal variable (transport
+ matrix) is exact, the solver only returns feasible dual potentials
+ on the samples with weights different from zero.
+
+ First we compute the constraints violations:
+
+ .. math::
+ V=\alpha+\beta^T-M
+
+ Next we compute the max amount of violation per row (alpha) and
+ columns (beta)
+
+ .. math::
+ v^a_i=\max_j V_{i,j}
+
+ v^b_j=\max_i V_{i,j}
+
+ Finally we update the dual potential with 0 weights if a
+ constraint is violated
+
+ .. math::
+ \alpha_i = \alpha_i -v^a_i \quad \text{ if } a_i=0 \text{ and } v^a_i>0
+
+ \beta_j = \beta_j -v^b_j \quad \text{ if } b_j=0 \text{ and } v^b_j>0
+
+ In the end the dual potentials are centered using function
+ :ref:`center_ot_dual`.
+
+ Note that all those updates do not change the objective value of the
+ solution but provide dual potentials that do not violate the constraints.
+
+ Parameters
+ ----------
+ alpha0 : (ns,) numpy.ndarray, float64
+ Source dual potential
+ beta0 : (nt,) numpy.ndarray, float64
+ Target dual potential
+ alpha0 : (ns,) numpy.ndarray, float64
+ Source dual potential
+ beta0 : (nt,) numpy.ndarray, float64
+ Target dual potential
+ a : (ns,) numpy.ndarray, float64
+ Source distribution (uniform weights if empty list)
+ b : (nt,) numpy.ndarray, float64
+ Target distribution (uniform weights if empty list)
+ M : (ns,nt) numpy.ndarray, float64
+ Loss matrix (c-order array with type float64)
+
+ Returns
+ -------
+ alpha : (ns,) numpy.ndarray, float64
+ Source corrected dual potential
+ beta : (nt,) numpy.ndarray, float64
+ Target corrected dual potential
+
+ """
+
+ # binary indexing of non-zeros weights
+ asel = a != 0
+ bsel = b != 0
+
+ # compute dual constraints violation
+ constraint_violation = alpha0[:, None] + beta0[None, :] - M
+
+ # Compute largest violation per line and columns
+ aviol = np.max(constraint_violation, 1)
+ bviol = np.max(constraint_violation, 0)
+
+ # update corrects violation of
+ alpha_up = -1 * ~asel * np.maximum(aviol, 0)
+ beta_up = -1 * ~bsel * np.maximum(bviol, 0)
+
+ alpha = alpha0 + alpha_up
+ beta = beta0 + beta_up
+
+ return center_ot_dual(alpha, beta, a, b)
+
+
+def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
+ r"""Solves the Earth Movers distance problem and returns the OT matrix
.. math::
@@ -37,26 +189,37 @@ def emd(a, b, M, numItermax=100000, log=False):
- M is the metric cost matrix
- a and b are the sample weights
+ .. warning::
+ Note that the M matrix needs to be a C-order numpy.array in float64
+ format.
+
Uses the algorithm proposed in [1]_
Parameters
----------
- a : (ns,) ndarray, float64
- Source histogram (uniform weigth if empty list)
- b : (nt,) ndarray, float64
- Target histogram (uniform weigth if empty list)
- M : (ns,nt) ndarray, float64
- loss matrix
+ a : (ns,) numpy.ndarray, float64
+ Source histogram (uniform weight if empty list)
+ b : (nt,) numpy.ndarray, float64
+ Target histogram (uniform weight if empty list)
+ M : (ns,nt) numpy.ndarray, float64
+ Loss matrix (c-order array with type float64)
numItermax : int, optional (default=100000)
The maximum number of iterations before stopping the optimization
algorithm if it has not converged.
- log: boolean, optional (default=False)
+ log: bool, optional (default=False)
If True, returns a dictionary containing the cost and dual
variables. Otherwise returns only the optimal transportation matrix.
+ dense: boolean, optional (default=True)
+ If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
+ Otherwise returns a sparse representation using scipy's `coo_matrix`
+ format.
+ center_dual: boolean, optional (default=True)
+ If True, centers the dual potential using function
+ :ref:`center_ot_dual`.
Returns
-------
- gamma: (ns x nt) ndarray
+ gamma: (ns x nt) numpy.ndarray
Optimal transportation matrix for the given parameters
log: dict
If input log is true, a dictionary containing the cost and dual
@@ -74,8 +237,8 @@ def emd(a, b, M, numItermax=100000, log=False):
>>> b=[.5,.5]
>>> M=[[0.,1.],[1.,0.]]
>>> ot.emd(a,b,M)
- array([[ 0.5, 0. ],
- [ 0. , 0.5]])
+ array([[0.5, 0. ],
+ [0. , 0.5]])
References
----------
@@ -94,13 +257,37 @@ def emd(a, b, M, numItermax=100000, log=False):
b = np.asarray(b, dtype=np.float64)
M = np.asarray(M, dtype=np.float64)
- # if empty array given then use unifor distributions
+ # if empty array given then use uniform distributions
if len(a) == 0:
a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
if len(b) == 0:
b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
- G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
+ assert (a.shape[0] == M.shape[0] and b.shape[0] == M.shape[1]), \
+ "Dimension mismatch, check dimensions of M with a and b"
+
+ asel = a != 0
+ bsel = b != 0
+
+ if dense:
+ G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
+
+ if center_dual:
+ u, v = center_ot_dual(u, v, a, b)
+
+ if np.any(~asel) or np.any(~bsel):
+ u, v = estimate_dual_null_weights(u, v, a, b, M)
+
+ else:
+ Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
+ G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+
+ if center_dual:
+ u, v = center_ot_dual(u, v, a, b)
+
+ if np.any(~asel) or np.any(~bsel):
+ u, v = estimate_dual_null_weights(u, v, a, b, M)
+
result_code_string = check_result(result_code)
if log:
log = {}
@@ -114,8 +301,9 @@ def emd(a, b, M, numItermax=100000, log=False):
def emd2(a, b, M, processes=multiprocessing.cpu_count(),
- numItermax=100000, log=False, return_matrix=False):
- """Solves the Earth Movers distance problem and returns the loss
+ numItermax=100000, log=False, dense=True, return_matrix=False,
+ center_dual=True):
+ r"""Solves the Earth Movers distance problem and returns the loss
.. math::
\gamma = arg\min_\gamma <\gamma,M>_F
@@ -128,16 +316,22 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
- M is the metric cost matrix
- a and b are the sample weights
+ .. warning::
+ Note that the M matrix needs to be a C-order numpy.array in float64
+ format.
+
Uses the algorithm proposed in [1]_
Parameters
----------
- a : (ns,) ndarray, float64
- Source histogram (uniform weigth if empty list)
- b : (nt,) ndarray, float64
- Target histogram (uniform weigth if empty list)
- M : (ns,nt) ndarray, float64
- loss matrix
+ a : (ns,) numpy.ndarray, float64
+ Source histogram (uniform weight if empty list)
+ b : (nt,) numpy.ndarray, float64
+ Target histogram (uniform weight if empty list)
+ M : (ns,nt) numpy.ndarray, float64
+ Loss matrix (c-order array with type float64)
+ processes : int, optional (default=nb cpu)
+ Nb of processes used for multiple emd computation (not used on windows)
numItermax : int, optional (default=100000)
The maximum number of iterations before stopping the optimization
algorithm if it has not converged.
@@ -146,12 +340,19 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
variables. Otherwise returns only the optimal transportation cost.
return_matrix: boolean, optional (default=False)
If True, returns the optimal transportation matrix in the log.
+ dense: boolean, optional (default=True)
+ If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
+ Otherwise returns a sparse representation using scipy's `coo_matrix`
+ format.
+ center_dual: boolean, optional (default=True)
+ If True, centers the dual potential using function
+ :ref:`center_ot_dual`.
Returns
-------
gamma: (ns x nt) ndarray
Optimal transportation matrix for the given parameters
- log: dict
+ log: dictnp
If input log is true, a dictionary containing the cost and dual
variables and exit status
@@ -187,27 +388,61 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
b = np.asarray(b, dtype=np.float64)
M = np.asarray(M, dtype=np.float64)
- # if empty array given then use unifor distributions
+ # problem with pikling Forks
+ if sys.platform.endswith('win32'):
+ processes = 1
+
+ # if empty array given then use uniform distributions
if len(a) == 0:
a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
if len(b) == 0:
b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
+ assert (a.shape[0] == M.shape[0] and b.shape[0] == M.shape[1]), \
+ "Dimension mismatch, check dimensions of M with a and b"
+
+ asel = a != 0
+
if log or return_matrix:
def f(b):
- G, cost, u, v, resultCode = emd_c(a, b, M, numItermax)
- result_code_string = check_result(resultCode)
+ bsel = b != 0
+ if dense:
+ G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
+ else:
+ Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
+ G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+
+ if center_dual:
+ u, v = center_ot_dual(u, v, a, b)
+
+ if np.any(~asel) or np.any(~bsel):
+ u, v = estimate_dual_null_weights(u, v, a, b, M)
+
+ result_code_string = check_result(result_code)
log = {}
if return_matrix:
log['G'] = G
log['u'] = u
log['v'] = v
log['warning'] = result_code_string
- log['result_code'] = resultCode
+ log['result_code'] = result_code
return [cost, log]
else:
def f(b):
- G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
+ bsel = b != 0
+ if dense:
+ G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
+ else:
+ Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
+ G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+
+ if center_dual:
+ u, v = center_ot_dual(u, v, a, b)
+
+ if np.any(~asel) or np.any(~bsel):
+ u, v = estimate_dual_null_weights(u, v, a, b, M)
+
+ result_code_string = check_result(result_code)
check_result(result_code)
return cost
@@ -215,9 +450,12 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
return f(b)
nb = b.shape[1]
- res = parmap(f, [b[:, i] for i in range(nb)], processes)
- return res
+ if processes > 1:
+ res = parmap(f, [b[:, i] for i in range(nb)], processes)
+ else:
+ res = list(map(f, [b[:, i].copy() for i in range(nb)]))
+ return res
def free_support_barycenter(measures_locations, measures_weights, X_init, b=None, weights=None, numItermax=100, stopThr=1e-7, verbose=False, log=None):
@@ -231,9 +469,9 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
Parameters
----------
- measures_locations : list of (k_i,d) np.ndarray
+ measures_locations : list of (k_i,d) numpy.ndarray
The discrete support of a measure supported on k_i locations of a d-dimensional space (k_i can be different for each element of the list)
- measures_weights : list of (k_i,) np.ndarray
+ measures_weights : list of (k_i,) numpy.ndarray
Numpy arrays where each numpy array has k_i non-negatives values summing to one representing the weights of each discrete input measure
X_init : (k,d) np.ndarray
@@ -246,7 +484,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
numItermax : int, optional
Max number of iterations
stopThr : float, optional
- Stop threshol on error (>0)
+ Stop threshold on error (>0)
verbose : bool, optional
Print information along iterations
log : bool, optional
@@ -272,7 +510,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
k = X_init.shape[0]
d = X_init.shape[1]
if b is None:
- b = np.ones((k,))/k
+ b = np.ones((k,)) / k
if weights is None:
weights = np.ones((N,)) / N
@@ -283,7 +521,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
displacement_square_norm = stopThr + 1.
- while ( displacement_square_norm > stopThr and iter_count < numItermax ):
+ while (displacement_square_norm > stopThr and iter_count < numItermax):
T_sum = np.zeros((k, d))
@@ -293,7 +531,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
T_i = emd(b, measure_weights_i, M_i)
T_sum = T_sum + weight_i * np.reshape(1. / b, (-1, 1)) * np.matmul(T_i, measure_locations_i)
- displacement_square_norm = np.sum(np.square(T_sum-X))
+ displacement_square_norm = np.sum(np.square(T_sum - X))
if log:
displacement_square_norms.append(displacement_square_norm)
@@ -308,4 +546,288 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
log_dict['displacement_square_norms'] = displacement_square_norms
return X, log_dict
else:
- return X \ No newline at end of file
+ return X
+
+
+def emd_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
+ log=False):
+ r"""Solves the Earth Movers distance problem between 1d measures and returns
+ the OT matrix
+
+
+ .. math::
+ \gamma = arg\min_\gamma \sum_i \sum_j \gamma_{ij} d(x_a[i], x_b[j])
+
+ s.t. \gamma 1 = a,
+ \gamma^T 1= b,
+ \gamma\geq 0
+ where :
+
+ - d is the metric
+ - x_a and x_b are the samples
+ - a and b are the sample weights
+
+ When 'minkowski' is used as a metric, :math:`d(x, y) = |x - y|^p`.
+
+ Uses the algorithm detailed in [1]_
+
+ Parameters
+ ----------
+ x_a : (ns,) or (ns, 1) ndarray, float64
+ Source dirac locations (on the real line)
+ x_b : (nt,) or (ns, 1) ndarray, float64
+ Target dirac locations (on the real line)
+ a : (ns,) ndarray, float64, optional
+ Source histogram (default is uniform weight)
+ b : (nt,) ndarray, float64, optional
+ Target histogram (default is uniform weight)
+ metric: str, optional (default='sqeuclidean')
+ Metric to be used. Only strings listed in :func:`ot.dist` are accepted.
+ Due to implementation details, this function runs faster when
+ `'sqeuclidean'`, `'cityblock'`, or `'euclidean'` metrics are used.
+ p: float, optional (default=1.0)
+ The p-norm to apply for if metric='minkowski'
+ dense: boolean, optional (default=True)
+ If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
+ Otherwise returns a sparse representation using scipy's `coo_matrix`
+ format. Due to implementation details, this function runs faster when
+ `'sqeuclidean'`, `'minkowski'`, `'cityblock'`, or `'euclidean'` metrics
+ are used.
+ log: boolean, optional (default=False)
+ If True, returns a dictionary containing the cost.
+ Otherwise returns only the optimal transportation matrix.
+
+ Returns
+ -------
+ gamma: (ns, nt) ndarray
+ Optimal transportation matrix for the given parameters
+ log: dict
+ If input log is True, a dictionary containing the cost
+
+
+ Examples
+ --------
+
+ Simple example with obvious solution. The function emd_1d accepts lists and
+ performs automatic conversion to numpy arrays
+
+ >>> import ot
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> x_a = [2., 0.]
+ >>> x_b = [0., 3.]
+ >>> ot.emd_1d(x_a, x_b, a, b)
+ array([[0. , 0.5],
+ [0.5, 0. ]])
+ >>> ot.emd_1d(x_a, x_b)
+ array([[0. , 0.5],
+ [0.5, 0. ]])
+
+ References
+ ----------
+
+ .. [1] Peyré, G., & Cuturi, M. (2017). "Computational Optimal
+ Transport", 2018.
+
+ See Also
+ --------
+ ot.lp.emd : EMD for multidimensional distributions
+ ot.lp.emd2_1d : EMD for 1d distributions (returns cost instead of the
+ transportation matrix)
+ """
+ a = np.asarray(a, dtype=np.float64)
+ b = np.asarray(b, dtype=np.float64)
+ x_a = np.asarray(x_a, dtype=np.float64)
+ x_b = np.asarray(x_b, dtype=np.float64)
+
+ assert (x_a.ndim == 1 or x_a.ndim == 2 and x_a.shape[1] == 1), \
+ "emd_1d should only be used with monodimensional data"
+ assert (x_b.ndim == 1 or x_b.ndim == 2 and x_b.shape[1] == 1), \
+ "emd_1d should only be used with monodimensional data"
+
+ # if empty array given then use uniform distributions
+ if a.ndim == 0 or len(a) == 0:
+ a = np.ones((x_a.shape[0],), dtype=np.float64) / x_a.shape[0]
+ if b.ndim == 0 or len(b) == 0:
+ b = np.ones((x_b.shape[0],), dtype=np.float64) / x_b.shape[0]
+
+ x_a_1d = x_a.reshape((-1, ))
+ x_b_1d = x_b.reshape((-1, ))
+ perm_a = np.argsort(x_a_1d)
+ perm_b = np.argsort(x_b_1d)
+
+ G_sorted, indices, cost = emd_1d_sorted(a, b,
+ x_a_1d[perm_a], x_b_1d[perm_b],
+ metric=metric, p=p)
+ G = coo_matrix((G_sorted, (perm_a[indices[:, 0]], perm_b[indices[:, 1]])),
+ shape=(a.shape[0], b.shape[0]))
+ if dense:
+ G = G.toarray()
+ if log:
+ log = {'cost': cost}
+ return G, log
+ return G
+
+
+def emd2_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
+ log=False):
+ r"""Solves the Earth Movers distance problem between 1d measures and returns
+ the loss
+
+
+ .. math::
+ \gamma = arg\min_\gamma \sum_i \sum_j \gamma_{ij} d(x_a[i], x_b[j])
+
+ s.t. \gamma 1 = a,
+ \gamma^T 1= b,
+ \gamma\geq 0
+ where :
+
+ - d is the metric
+ - x_a and x_b are the samples
+ - a and b are the sample weights
+
+ When 'minkowski' is used as a metric, :math:`d(x, y) = |x - y|^p`.
+
+ Uses the algorithm detailed in [1]_
+
+ Parameters
+ ----------
+ x_a : (ns,) or (ns, 1) ndarray, float64
+ Source dirac locations (on the real line)
+ x_b : (nt,) or (ns, 1) ndarray, float64
+ Target dirac locations (on the real line)
+ a : (ns,) ndarray, float64, optional
+ Source histogram (default is uniform weight)
+ b : (nt,) ndarray, float64, optional
+ Target histogram (default is uniform weight)
+ metric: str, optional (default='sqeuclidean')
+ Metric to be used. Only strings listed in :func:`ot.dist` are accepted.
+ Due to implementation details, this function runs faster when
+ `'sqeuclidean'`, `'minkowski'`, `'cityblock'`, or `'euclidean'` metrics
+ are used.
+ p: float, optional (default=1.0)
+ The p-norm to apply for if metric='minkowski'
+ dense: boolean, optional (default=True)
+ If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
+ Otherwise returns a sparse representation using scipy's `coo_matrix`
+ format. Only used if log is set to True. Due to implementation details,
+ this function runs faster when dense is set to False.
+ log: boolean, optional (default=False)
+ If True, returns a dictionary containing the transportation matrix.
+ Otherwise returns only the loss.
+
+ Returns
+ -------
+ loss: float
+ Cost associated to the optimal transportation
+ log: dict
+ If input log is True, a dictionary containing the Optimal transportation
+ matrix for the given parameters
+
+
+ Examples
+ --------
+
+ Simple example with obvious solution. The function emd2_1d accepts lists and
+ performs automatic conversion to numpy arrays
+
+ >>> import ot
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> x_a = [2., 0.]
+ >>> x_b = [0., 3.]
+ >>> ot.emd2_1d(x_a, x_b, a, b)
+ 0.5
+ >>> ot.emd2_1d(x_a, x_b)
+ 0.5
+
+ References
+ ----------
+
+ .. [1] Peyré, G., & Cuturi, M. (2017). "Computational Optimal
+ Transport", 2018.
+
+ See Also
+ --------
+ ot.lp.emd2 : EMD for multidimensional distributions
+ ot.lp.emd_1d : EMD for 1d distributions (returns the transportation matrix
+ instead of the cost)
+ """
+ # If we do not return G (log==False), then we should not to cast it to dense
+ # (useless overhead)
+ G, log_emd = emd_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric=metric, p=p,
+ dense=dense and log, log=True)
+ cost = log_emd['cost']
+ if log:
+ log_emd = {'G': G}
+ return cost, log_emd
+ return cost
+
+
+def wasserstein_1d(x_a, x_b, a=None, b=None, p=1.):
+ r"""Solves the p-Wasserstein distance problem between 1d measures and returns
+ the distance
+
+ .. math::
+ \min_\gamma \left( \sum_i \sum_j \gamma_{ij} \|x_a[i] - x_b[j]\|^p \right)^{1/p}
+
+ s.t. \gamma 1 = a,
+ \gamma^T 1= b,
+ \gamma\geq 0
+
+ where :
+
+ - x_a and x_b are the samples
+ - a and b are the sample weights
+
+ Uses the algorithm detailed in [1]_
+
+ Parameters
+ ----------
+ x_a : (ns,) or (ns, 1) ndarray, float64
+ Source dirac locations (on the real line)
+ x_b : (nt,) or (ns, 1) ndarray, float64
+ Target dirac locations (on the real line)
+ a : (ns,) ndarray, float64, optional
+ Source histogram (default is uniform weight)
+ b : (nt,) ndarray, float64, optional
+ Target histogram (default is uniform weight)
+ p: float, optional (default=1.0)
+ The order of the p-Wasserstein distance to be computed
+
+ Returns
+ -------
+ dist: float
+ p-Wasserstein distance
+
+
+ Examples
+ --------
+
+ Simple example with obvious solution. The function wasserstein_1d accepts
+ lists and performs automatic conversion to numpy arrays
+
+ >>> import ot
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> x_a = [2., 0.]
+ >>> x_b = [0., 3.]
+ >>> ot.wasserstein_1d(x_a, x_b, a, b)
+ 0.5
+ >>> ot.wasserstein_1d(x_a, x_b)
+ 0.5
+
+ References
+ ----------
+
+ .. [1] Peyré, G., & Cuturi, M. (2017). "Computational Optimal
+ Transport", 2018.
+
+ See Also
+ --------
+ ot.lp.emd_1d : EMD for 1d distributions
+ """
+ cost_emd = emd2_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
+ dense=False, log=False)
+ return np.power(cost_emd, 1. / p)
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index 83ee6aa..d345fd4 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -10,13 +10,19 @@ Cython linker with C solver
import numpy as np
cimport numpy as np
+from ..utils import dist
+
cimport cython
+cimport libc.math as math
import warnings
cdef extern from "EMD.h":
int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter)
+ int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D,
+ long *iG, long *jG, double *G, long * nG,
+ double* alpha, double* beta, double *cost, int maxIter)
cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED
@@ -34,9 +40,11 @@ def check_result(result_code):
return message
+
+
@cython.boundscheck(False)
@cython.wraparound(False)
-def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter):
+def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter, bint dense):
"""
Solves the Earth Movers distance problem and returns the optimal transport matrix
@@ -55,33 +63,50 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
- M is the metric cost matrix
- a and b are the sample weights
+ .. warning::
+ Note that the M matrix needs to be a C-order :py.cls:`numpy.array`
+
+ .. warning::
+ The C++ solver discards all samples in the distributions with
+ zeros weights. This means that while the primal variable (transport
+ matrix) is exact, the solver only returns feasible dual potentials
+ on the samples with weights different from zero.
+
Parameters
----------
- a : (ns,) ndarray, float64
+ a : (ns,) numpy.ndarray, float64
source histogram
- b : (nt,) ndarray, float64
+ b : (nt,) numpy.ndarray, float64
target histogram
- M : (ns,nt) ndarray, float64
+ M : (ns,nt) numpy.ndarray, float64
loss matrix
max_iter : int
The maximum number of iterations before stopping the optimization
algorithm if it has not converged.
-
+ dense : bool
+ Return a sparse transport matrix if set to False
Returns
-------
- gamma: (ns x nt) ndarray
+ gamma: (ns x nt) numpy.ndarray
Optimal transportation matrix for the given parameters
"""
cdef int n1= M.shape[0]
cdef int n2= M.shape[1]
+ cdef int nmax=n1+n2-1
+ cdef int result_code = 0
+ cdef int nG=0
cdef double cost=0
- cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2])
cdef np.ndarray[double, ndim=1, mode="c"] alpha=np.zeros(n1)
cdef np.ndarray[double, ndim=1, mode="c"] beta=np.zeros(n2)
+ cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([0, 0])
+
+ cdef np.ndarray[double, ndim=1, mode="c"] Gv=np.zeros(0)
+ cdef np.ndarray[long, ndim=1, mode="c"] iG=np.zeros(0,dtype=np.int)
+ cdef np.ndarray[long, ndim=1, mode="c"] jG=np.zeros(0,dtype=np.int)
if not len(a):
a=np.ones((n1,))/n1
@@ -89,7 +114,112 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
if not len(b):
b=np.ones((n2,))/n2
- # calling the function
- cdef int result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
+ if dense:
+ # init OT matrix
+ G=np.zeros([n1, n2])
+
+ # calling the function
+ result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
+
+ return G, cost, alpha, beta, result_code
+
+
+ else:
+
+ # init sparse OT matrix
+ Gv=np.zeros(nmax)
+ iG=np.zeros(nmax,dtype=np.int)
+ jG=np.zeros(nmax,dtype=np.int)
+
+
+ result_code = EMD_wrap_return_sparse(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <long*> iG.data, <long*> jG.data, <double*> Gv.data, <long*> &nG, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
- return G, cost, alpha, beta, result_code
+
+ return Gv[:nG], iG[:nG], jG[:nG], cost, alpha, beta, result_code
+
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def emd_1d_sorted(np.ndarray[double, ndim=1, mode="c"] u_weights,
+ np.ndarray[double, ndim=1, mode="c"] v_weights,
+ np.ndarray[double, ndim=1, mode="c"] u,
+ np.ndarray[double, ndim=1, mode="c"] v,
+ str metric='sqeuclidean',
+ double p=1.):
+ r"""
+ Solves the Earth Movers distance problem between sorted 1d measures and
+ returns the OT matrix and the associated cost
+
+ Parameters
+ ----------
+ u_weights : (ns,) ndarray, float64
+ Source histogram
+ v_weights : (nt,) ndarray, float64
+ Target histogram
+ u : (ns,) ndarray, float64
+ Source dirac locations (on the real line)
+ v : (nt,) ndarray, float64
+ Target dirac locations (on the real line)
+ metric: str, optional (default='sqeuclidean')
+ Metric to be used. Only strings listed in :func:`ot.dist` are accepted.
+ Due to implementation details, this function runs faster when
+ `'sqeuclidean'`, `'minkowski'`, `'cityblock'`, or `'euclidean'` metrics
+ are used.
+ p: float, optional (default=1.0)
+ The p-norm to apply for if metric='minkowski'
+
+ Returns
+ -------
+ gamma: (n, ) ndarray, float64
+ Values in the Optimal transportation matrix
+ indices: (n, 2) ndarray, int64
+ Indices of the values stored in gamma for the Optimal transportation
+ matrix
+ cost
+ cost associated to the optimal transportation
+ """
+ cdef double cost = 0.
+ cdef int n = u_weights.shape[0]
+ cdef int m = v_weights.shape[0]
+
+ cdef int i = 0
+ cdef double w_i = u_weights[0]
+ cdef int j = 0
+ cdef double w_j = v_weights[0]
+
+ cdef double m_ij = 0.
+
+ cdef np.ndarray[double, ndim=1, mode="c"] G = np.zeros((n + m - 1, ),
+ dtype=np.float64)
+ cdef np.ndarray[long, ndim=2, mode="c"] indices = np.zeros((n + m - 1, 2),
+ dtype=np.int)
+ cdef int cur_idx = 0
+ while i < n and j < m:
+ if metric == 'sqeuclidean':
+ m_ij = (u[i] - v[j]) * (u[i] - v[j])
+ elif metric == 'cityblock' or metric == 'euclidean':
+ m_ij = math.fabs(u[i] - v[j])
+ elif metric == 'minkowski':
+ m_ij = math.pow(math.fabs(u[i] - v[j]), p)
+ else:
+ m_ij = dist(u[i].reshape((1, 1)), v[j].reshape((1, 1)),
+ metric=metric)[0, 0]
+ if w_i < w_j or j == m - 1:
+ cost += m_ij * w_i
+ G[cur_idx] = w_i
+ indices[cur_idx, 0] = i
+ indices[cur_idx, 1] = j
+ i += 1
+ w_j -= w_i
+ w_i = u_weights[i]
+ else:
+ cost += m_ij * w_j
+ G[cur_idx] = w_j
+ indices[cur_idx, 0] = i
+ indices[cur_idx, 1] = j
+ j += 1
+ w_i -= w_j
+ w_j = v_weights[j]
+ cur_idx += 1
+ return G[:cur_idx], indices[:cur_idx], cost
diff --git a/ot/lp/network_simplex_simple.h b/ot/lp/network_simplex_simple.h
index 7c6a4ce..498e921 100644
--- a/ot/lp/network_simplex_simple.h
+++ b/ot/lp/network_simplex_simple.h
@@ -686,7 +686,7 @@ namespace lemon {
/// \see resetParams(), reset()
ProblemType run() {
#if DEBUG_LVL>0
- std::cout << "OPTIMAL = " << OPTIMAL << "\nINFEASIBLE = " << INFEASIBLE << "\nUNBOUNDED = " << UNBOUNDED << "\nMAX_ITER_REACHED" << MAX_ITER_REACHED\n";
+ std::cout << "OPTIMAL = " << OPTIMAL << "\nINFEASIBLE = " << INFEASIBLE << "\nUNBOUNDED = " << UNBOUNDED << "\nMAX_ITER_REACHED" << MAX_ITER_REACHED << "\n" ;
#endif
if (!init()) return INFEASIBLE;
diff --git a/ot/optim.py b/ot/optim.py
index f31fae2..4012e0d 100644
--- a/ot/optim.py
+++ b/ot/optim.py
@@ -4,6 +4,7 @@ Optimization algorithms for OT
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
+# Titouan Vayer <titouan.vayer@irisa.fr>
#
# License: MIT License
@@ -25,14 +26,13 @@ def line_search_armijo(f, xk, pk, gfk, old_fval,
Parameters
----------
-
- f : function
+ f : callable
loss function
- xk : np.ndarray
+ xk : ndarray
initial position
- pk : np.ndarray
+ pk : ndarray
descent direction
- gfk : np.ndarray
+ gfk : ndarray
gradient of f at xk
old_fval : float
loss value at xk
@@ -72,8 +72,70 @@ def line_search_armijo(f, xk, pk, gfk, old_fval,
return alpha, fc[0], phi1
-def cg(a, b, M, reg, f, df, G0=None, numItermax=200,
- stopThr=1e-9, verbose=False, log=False):
+def solve_linesearch(cost, G, deltaG, Mi, f_val,
+ armijo=True, C1=None, C2=None, reg=None, Gc=None, constC=None, M=None):
+ """
+ Solve the linesearch in the FW iterations
+ Parameters
+ ----------
+ cost : method
+ Cost in the FW for the linesearch
+ G : ndarray, shape(ns,nt)
+ The transport map at a given iteration of the FW
+ deltaG : ndarray (ns,nt)
+ Difference between the optimal map found by linearization in the FW algorithm and the value at a given iteration
+ Mi : ndarray (ns,nt)
+ Cost matrix of the linearized transport problem. Corresponds to the gradient of the cost
+ f_val : float
+ Value of the cost at G
+ armijo : bool, optional
+ If True the steps of the line-search is found via an armijo research. Else closed form is used.
+ If there is convergence issues use False.
+ C1 : ndarray (ns,ns), optional
+ Structure matrix in the source domain. Only used and necessary when armijo=False
+ C2 : ndarray (nt,nt), optional
+ Structure matrix in the target domain. Only used and necessary when armijo=False
+ reg : float, optional
+ Regularization parameter. Only used and necessary when armijo=False
+ Gc : ndarray (ns,nt)
+ Optimal map found by linearization in the FW algorithm. Only used and necessary when armijo=False
+ constC : ndarray (ns,nt)
+ Constant for the gromov cost. See [24]. Only used and necessary when armijo=False
+ M : ndarray (ns,nt), optional
+ Cost matrix between the features. Only used and necessary when armijo=False
+ Returns
+ -------
+ alpha : float
+ The optimal step size of the FW
+ fc : int
+ nb of function call. Useless here
+ f_val : float
+ The value of the cost for the next iteration
+ References
+ ----------
+ .. [24] Vayer Titouan, Chapel Laetitia, Flamary R{\'e}mi, Tavenard Romain
+ and Courty Nicolas
+ "Optimal Transport for structured data with application on graphs"
+ International Conference on Machine Learning (ICML). 2019.
+ """
+ if armijo:
+ alpha, fc, f_val = line_search_armijo(cost, G, deltaG, Mi, f_val)
+ else: # requires symetric matrices
+ dot1 = np.dot(C1, deltaG)
+ dot12 = dot1.dot(C2)
+ a = -2 * reg * np.sum(dot12 * deltaG)
+ b = np.sum((M + reg * constC) * deltaG) - 2 * reg * (np.sum(dot12 * G) + np.sum(np.dot(C1, G).dot(C2) * deltaG))
+ c = cost(G)
+
+ alpha = solve_1d_linesearch_quad(a, b, c)
+ fc = None
+ f_val = cost(G + alpha * deltaG)
+
+ return alpha, fc, f_val
+
+
+def cg(a, b, M, reg, f, df, G0=None, numItermax=200, numItermaxEmd=100000,
+ stopThr=1e-9, stopThr2=1e-9, verbose=False, log=False, **kwargs):
"""
Solve the general regularized OT problem with conditional gradient
@@ -98,24 +160,30 @@ def cg(a, b, M, reg, f, df, G0=None, numItermax=200,
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (ns,)
samples weights in the source domain
- b : np.ndarray (nt,)
+ b : ndarray, shape (nt,)
samples in the target domain
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (ns, nt)
loss matrix
reg : float
Regularization term >0
- G0 : np.ndarray (ns,nt), optional
+ G0 : ndarray, shape (ns,nt), optional
initial guess (default is indep joint density)
numItermax : int, optional
Max number of iterations
+ numItermaxEmd : int, optional
+ Max number of iterations for emd
stopThr : float, optional
- Stop threshol on error (>0)
+ Stop threshol on the relative variation (>0)
+ stopThr2 : float, optional
+ Stop threshol on the absolute variation (>0)
verbose : bool, optional
Print information along iterations
log : bool, optional
record log if True
+ **kwargs : dict
+ Parameters for linesearch
Returns
-------
@@ -157,9 +225,9 @@ def cg(a, b, M, reg, f, df, G0=None, numItermax=200,
it = 0
if verbose:
- print('{:5s}|{:12s}|{:8s}'.format(
- 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
- print('{:5d}|{:8e}|{:8e}'.format(it, f_val, 0))
+ print('{:5s}|{:12s}|{:8s}|{:8s}'.format(
+ 'It.', 'Loss', 'Relative loss', 'Absolute loss') + '\n' + '-' * 48)
+ print('{:5d}|{:8e}|{:8e}|{:8e}'.format(it, f_val, 0, 0))
while loop:
@@ -172,12 +240,12 @@ def cg(a, b, M, reg, f, df, G0=None, numItermax=200,
Mi += Mi.min()
# solve linear program
- Gc = emd(a, b, Mi)
+ Gc = emd(a, b, Mi, numItermax=numItermaxEmd)
deltaG = Gc - G
# line search
- alpha, fc, f_val = line_search_armijo(cost, G, deltaG, Mi, f_val)
+ alpha, fc, f_val = solve_linesearch(cost, G, deltaG, Mi, f_val, reg=reg, M=M, Gc=Gc, **kwargs)
G = G + alpha * deltaG
@@ -185,8 +253,9 @@ def cg(a, b, M, reg, f, df, G0=None, numItermax=200,
if it >= numItermax:
loop = 0
- delta_fval = (f_val - old_fval) / abs(f_val)
- if abs(delta_fval) < stopThr:
+ abs_delta_fval = abs(f_val - old_fval)
+ relative_delta_fval = abs_delta_fval / abs(f_val)
+ if relative_delta_fval < stopThr or abs_delta_fval < stopThr2:
loop = 0
if log:
@@ -194,9 +263,9 @@ def cg(a, b, M, reg, f, df, G0=None, numItermax=200,
if verbose:
if it % 20 == 0:
- print('{:5s}|{:12s}|{:8s}'.format(
- 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
- print('{:5d}|{:8e}|{:8e}'.format(it, f_val, delta_fval))
+ print('{:5s}|{:12s}|{:8s}|{:8s}'.format(
+ 'It.', 'Loss', 'Relative loss', 'Absolute loss') + '\n' + '-' * 48)
+ print('{:5d}|{:8e}|{:8e}|{:8e}'.format(it, f_val, relative_delta_fval, abs_delta_fval))
if log:
return G, log
@@ -205,7 +274,7 @@ def cg(a, b, M, reg, f, df, G0=None, numItermax=200,
def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10,
- numInnerItermax=200, stopThr=1e-9, verbose=False, log=False):
+ numInnerItermax=200, stopThr=1e-9, stopThr2=1e-9, verbose=False, log=False):
"""
Solve the general regularized OT problem with the generalized conditional gradient
@@ -231,24 +300,26 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10,
Parameters
----------
- a : np.ndarray (ns,)
+ a : ndarray, shape (ns,)
samples weights in the source domain
- b : np.ndarray (nt,)
+ b : ndarrayv (nt,)
samples in the target domain
- M : np.ndarray (ns,nt)
+ M : ndarray, shape (ns, nt)
loss matrix
reg1 : float
Entropic Regularization term >0
reg2 : float
Second Regularization term >0
- G0 : np.ndarray (ns,nt), optional
+ G0 : ndarray, shape (ns, nt), optional
initial guess (default is indep joint density)
numItermax : int, optional
Max number of iterations
numInnerItermax : int, optional
Max number of iterations of Sinkhorn
stopThr : float, optional
- Stop threshol on error (>0)
+ Stop threshol on the relative variation (>0)
+ stopThr2 : float, optional
+ Stop threshol on the absolute variation (>0)
verbose : bool, optional
Print information along iterations
log : bool, optional
@@ -256,15 +327,13 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10,
Returns
-------
- gamma : (ns x nt) ndarray
+ gamma : ndarray, shape (ns, nt)
Optimal transportation matrix for the given parameters
log : dict
log dictionary return only if log==True in parameters
-
References
----------
-
.. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
.. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567.
@@ -294,9 +363,9 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10,
it = 0
if verbose:
- print('{:5s}|{:12s}|{:8s}'.format(
- 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
- print('{:5d}|{:8e}|{:8e}'.format(it, f_val, 0))
+ print('{:5s}|{:12s}|{:8s}|{:8s}'.format(
+ 'It.', 'Loss', 'Relative loss', 'Absolute loss') + '\n' + '-' * 48)
+ print('{:5d}|{:8e}|{:8e}|{:8e}'.format(it, f_val, 0, 0))
while loop:
@@ -322,8 +391,10 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10,
if it >= numItermax:
loop = 0
- delta_fval = (f_val - old_fval) / abs(f_val)
- if abs(delta_fval) < stopThr:
+ abs_delta_fval = abs(f_val - old_fval)
+ relative_delta_fval = abs_delta_fval / abs(f_val)
+
+ if relative_delta_fval < stopThr or abs_delta_fval < stopThr2:
loop = 0
if log:
@@ -331,11 +402,41 @@ def gcg(a, b, M, reg1, reg2, f, df, G0=None, numItermax=10,
if verbose:
if it % 20 == 0:
- print('{:5s}|{:12s}|{:8s}'.format(
- 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
- print('{:5d}|{:8e}|{:8e}'.format(it, f_val, delta_fval))
+ print('{:5s}|{:12s}|{:8s}|{:8s}'.format(
+ 'It.', 'Loss', 'Relative loss', 'Absolute loss') + '\n' + '-' * 48)
+ print('{:5d}|{:8e}|{:8e}|{:8e}'.format(it, f_val, relative_delta_fval, abs_delta_fval))
if log:
return G, log
else:
return G
+
+
+def solve_1d_linesearch_quad(a, b, c):
+ """
+ For any convex or non-convex 1d quadratic function f, solve on [0,1] the following problem:
+ .. math::
+ \argmin f(x)=a*x^{2}+b*x+c
+
+ Parameters
+ ----------
+ a,b,c : float
+ The coefficients of the quadratic function
+
+ Returns
+ -------
+ x : float
+ The optimal value which leads to the minimal cost
+ """
+ f0 = c
+ df0 = b
+ f1 = a + f0 + df0
+
+ if a > 0: # convex
+ minimum = min(1, max(0, np.divide(-b, 2.0 * a)))
+ return minimum
+ else: # non convex
+ if f0 > f1:
+ return 1
+ else:
+ return 0
diff --git a/ot/plot.py b/ot/plot.py
index 784a372..f403e98 100644
--- a/ot/plot.py
+++ b/ot/plot.py
@@ -1,5 +1,11 @@
"""
Functions for plotting OT matrices
+
+.. warning::
+ Note that by default the module is not import in :mod:`ot`. In order to
+ use it you need to explicitely import :mod:`ot.plot`
+
+
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
@@ -20,11 +26,11 @@ def plot1D_mat(a, b, M, title=''):
Parameters
----------
- a : np.array, shape (na,)
+ a : ndarray, shape (na,)
Source distribution
- b : np.array, shape (nb,)
+ b : ndarray, shape (nb,)
Target distribution
- M : np.array, shape (na,nb)
+ M : ndarray, shape (na, nb)
Matrix to plot
"""
na, nb = M.shape
diff --git a/ot/stochastic.py b/ot/stochastic.py
index 4795d88..13ed9cc 100644
--- a/ot/stochastic.py
+++ b/ot/stochastic.py
@@ -1,3 +1,9 @@
+"""
+Stochastic solvers for regularized OT.
+
+
+"""
+
# Author: Kilian Fatras <kilian.fatras@gmail.com>
#
# License: MIT License
@@ -11,7 +17,7 @@ import numpy as np
def coordinate_grad_semi_dual(b, M, reg, beta, i):
- '''
+ r'''
Compute the coordinate gradient update for regularized discrete distributions for (i, :)
The function computes the gradient of the semi dual problem:
@@ -32,51 +38,49 @@ def coordinate_grad_semi_dual(b, M, reg, beta, i):
Parameters
----------
-
- b : np.ndarray(nt,)
- target measure
- M : np.ndarray(ns, nt)
- cost matrix
- reg : float nu
- Regularization term > 0
- v : np.ndarray(nt,)
- dual variable
- i : number int
- picked number i
+ b : ndarray, shape (nt,)
+ Target measure.
+ M : ndarray, shape (ns, nt)
+ Cost matrix.
+ reg : float
+ Regularization term > 0.
+ v : ndarray, shape (nt,)
+ Dual variable.
+ i : int
+ Picked number i.
Returns
-------
-
- coordinate gradient : np.ndarray(nt,)
+ coordinate gradient : ndarray, shape (nt,)
Examples
--------
-
+ >>> import ot
+ >>> np.random.seed(0)
>>> n_source = 7
>>> n_target = 4
- >>> reg = 1
- >>> numItermax = 300000
>>> a = ot.utils.unif(n_source)
>>> b = ot.utils.unif(n_target)
- >>> rng = np.random.RandomState(0)
- >>> X_source = rng.randn(n_source, 2)
- >>> Y_target = rng.randn(n_target, 2)
+ >>> X_source = np.random.randn(n_source, 2)
+ >>> Y_target = np.random.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> method = "ASGD"
- >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
- method, numItermax)
- >>> print(asgd_pi)
+ >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000)
+ array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06],
+ [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03],
+ [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07],
+ [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04],
+ [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01],
+ [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01],
+ [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
+
References
----------
-
[Genevay et al., 2016] :
- Stochastic Optimization for Large-scale Optimal Transport,
- Advances in Neural Information Processing Systems (2016),
- arXiv preprint arxiv:1605.08527.
-
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
'''
-
r = M[i, :] - beta
exp_beta = np.exp(-r / reg) * b
khi = exp_beta / (np.sum(exp_beta))
@@ -84,7 +88,7 @@ def coordinate_grad_semi_dual(b, M, reg, beta, i):
def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
- '''
+ r'''
Compute the SAG algorithm to solve the regularized discrete measures
optimal transport max problem
@@ -112,42 +116,43 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
Parameters
----------
- a : np.ndarray(ns,),
- source measure
- b : np.ndarray(nt,),
- target measure
- M : np.ndarray(ns, nt),
- cost matrix
- reg : float number,
+ a : ndarray, shape (ns,),
+ Source measure.
+ b : ndarray, shape (nt,),
+ Target measure.
+ M : ndarray, shape (ns, nt),
+ Cost matrix.
+ reg : float
Regularization term > 0
- numItermax : int number
- number of iteration
- lr : float number
- learning rate
+ numItermax : int
+ Number of iteration.
+ lr : float
+ Learning rate.
Returns
-------
-
- v : np.ndarray(nt,)
- dual variable
+ v : ndarray, shape (nt,)
+ Dual variable.
Examples
--------
-
+ >>> import ot
+ >>> np.random.seed(0)
>>> n_source = 7
>>> n_target = 4
- >>> reg = 1
- >>> numItermax = 300000
>>> a = ot.utils.unif(n_source)
>>> b = ot.utils.unif(n_target)
- >>> rng = np.random.RandomState(0)
- >>> X_source = rng.randn(n_source, 2)
- >>> Y_target = rng.randn(n_target, 2)
+ >>> X_source = np.random.randn(n_source, 2)
+ >>> Y_target = np.random.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> method = "ASGD"
- >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
- method, numItermax)
- >>> print(asgd_pi)
+ >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000)
+ array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06],
+ [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03],
+ [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07],
+ [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04],
+ [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01],
+ [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01],
+ [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
References
----------
@@ -176,7 +181,7 @@ def sag_entropic_transport(a, b, M, reg, numItermax=10000, lr=None):
def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
- '''
+ r'''
Compute the ASGD algorithm to solve the regularized semi continous measures optimal transport max problem
The function solves the following optimization problem:
@@ -202,50 +207,49 @@ def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
Parameters
----------
-
- b : np.ndarray(nt,)
+ b : ndarray, shape (nt,)
target measure
- M : np.ndarray(ns, nt)
+ M : ndarray, shape (ns, nt)
cost matrix
- reg : float number
+ reg : float
Regularization term > 0
- numItermax : int number
- number of iteration
- lr : float number
- learning rate
-
+ numItermax : int
+ Number of iteration.
+ lr : float
+ Learning rate.
Returns
-------
-
- ave_v : np.ndarray(nt,)
+ ave_v : ndarray, shape (nt,)
dual variable
Examples
--------
-
+ >>> import ot
+ >>> np.random.seed(0)
>>> n_source = 7
>>> n_target = 4
- >>> reg = 1
- >>> numItermax = 300000
>>> a = ot.utils.unif(n_source)
>>> b = ot.utils.unif(n_target)
- >>> rng = np.random.RandomState(0)
- >>> X_source = rng.randn(n_source, 2)
- >>> Y_target = rng.randn(n_target, 2)
+ >>> X_source = np.random.randn(n_source, 2)
+ >>> Y_target = np.random.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> method = "ASGD"
- >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
- method, numItermax)
- >>> print(asgd_pi)
+ >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000)
+ array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06],
+ [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03],
+ [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07],
+ [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04],
+ [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01],
+ [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01],
+ [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
References
----------
[Genevay et al., 2016] :
- Stochastic Optimization for Large-scale Optimal Transport,
- Advances in Neural Information Processing Systems (2016),
- arXiv preprint arxiv:1605.08527.
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
'''
if lr is None:
@@ -264,7 +268,7 @@ def averaged_sgd_entropic_transport(a, b, M, reg, numItermax=300000, lr=None):
def c_transform_entropic(b, M, reg, beta):
- '''
+ r'''
The goal is to recover u from the c-transform.
The function computes the c_transform of a dual variable from the other
@@ -285,47 +289,47 @@ def c_transform_entropic(b, M, reg, beta):
Parameters
----------
-
- b : np.ndarray(nt,)
- target measure
- M : np.ndarray(ns, nt)
- cost matrix
+ b : ndarray, shape (nt,)
+ Target measure
+ M : ndarray, shape (ns, nt)
+ Cost matrix
reg : float
- regularization term > 0
- v : np.ndarray(nt,)
- dual variable
+ Regularization term > 0
+ v : ndarray, shape (nt,)
+ Dual variable.
Returns
-------
-
- u : np.ndarray(ns,)
- dual variable
+ u : ndarray, shape (ns,)
+ Dual variable.
Examples
--------
-
+ >>> import ot
+ >>> np.random.seed(0)
>>> n_source = 7
>>> n_target = 4
- >>> reg = 1
- >>> numItermax = 300000
>>> a = ot.utils.unif(n_source)
>>> b = ot.utils.unif(n_target)
- >>> rng = np.random.RandomState(0)
- >>> X_source = rng.randn(n_source, 2)
- >>> Y_target = rng.randn(n_target, 2)
+ >>> X_source = np.random.randn(n_source, 2)
+ >>> Y_target = np.random.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> method = "ASGD"
- >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
- method, numItermax)
- >>> print(asgd_pi)
+ >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000)
+ array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06],
+ [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03],
+ [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07],
+ [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04],
+ [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01],
+ [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01],
+ [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
References
----------
[Genevay et al., 2016] :
- Stochastic Optimization for Large-scale Optimal Transport,
- Advances in Neural Information Processing Systems (2016),
- arXiv preprint arxiv:1605.08527.
+ Stochastic Optimization for Large-scale Optimal Transport,
+ Advances in Neural Information Processing Systems (2016),
+ arXiv preprint arxiv:1605.08527.
'''
n_source = np.shape(M)[0]
@@ -340,7 +344,7 @@ def c_transform_entropic(b, M, reg, beta):
def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
log=False):
- '''
+ r'''
Compute the transportation matrix to solve the regularized discrete
measures optimal transport max problem
@@ -348,8 +352,11 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
.. math::
\gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
s.t. \gamma 1 = a
+
\gamma^T 1= b
+
\gamma \geq 0
Where :
@@ -364,52 +371,53 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
Parameters
----------
- a : np.ndarray(ns,)
+ a : ndarray, shape (ns,)
source measure
- b : np.ndarray(nt,)
+ b : ndarray, shape (nt,)
target measure
- M : np.ndarray(ns, nt)
+ M : ndarray, shape (ns, nt)
cost matrix
- reg : float number
+ reg : float
Regularization term > 0
methode : str
used method (SAG or ASGD)
- numItermax : int number
+ numItermax : int
number of iteration
- lr : float number
+ lr : float
learning rate
- n_source : int number
+ n_source : int
size of the source measure
- n_target : int number
+ n_target : int
size of the target measure
log : bool, optional
record log if True
Returns
-------
-
- pi : np.ndarray(ns, nt)
+ pi : ndarray, shape (ns, nt)
transportation matrix
log : dict
log dictionary return only if log==True in parameters
Examples
--------
-
+ >>> import ot
+ >>> np.random.seed(0)
>>> n_source = 7
>>> n_target = 4
- >>> reg = 1
- >>> numItermax = 300000
>>> a = ot.utils.unif(n_source)
>>> b = ot.utils.unif(n_target)
- >>> rng = np.random.RandomState(0)
- >>> X_source = rng.randn(n_source, 2)
- >>> Y_target = rng.randn(n_target, 2)
+ >>> X_source = np.random.randn(n_source, 2)
+ >>> Y_target = np.random.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> method = "ASGD"
- >>> asgd_pi = stochastic.solve_semi_dual_entropic(a, b, M, reg,
- method, numItermax)
- >>> print(asgd_pi)
+ >>> ot.stochastic.solve_semi_dual_entropic(a, b, M, reg=1, method="ASGD", numItermax=300000)
+ array([[2.53942342e-02, 9.98640673e-02, 1.75945647e-02, 4.27664307e-06],
+ [1.21556999e-01, 1.26350515e-02, 1.30491795e-03, 7.36017394e-03],
+ [3.54070702e-03, 7.63581358e-02, 6.29581672e-02, 1.32812798e-07],
+ [2.60578198e-02, 3.35916645e-02, 8.28023223e-02, 4.05336238e-04],
+ [9.86808864e-03, 7.59774324e-04, 1.08702729e-02, 1.21359007e-01],
+ [2.17218856e-02, 9.12931802e-04, 1.87962526e-03, 1.18342700e-01],
+ [4.14237512e-02, 2.67487857e-02, 7.23016955e-02, 2.38291052e-03]])
References
----------
@@ -448,7 +456,7 @@ def solve_semi_dual_entropic(a, b, M, reg, method, numItermax=10000, lr=None,
def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
batch_beta):
- '''
+ r'''
Computes the partial gradient of the dual optimal transport problem.
For each (i,j) in a batch of coordinates, the partial gradients are :
@@ -475,53 +483,55 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
Parameters
----------
-
- a : np.ndarray(ns,)
+ a : ndarray, shape (ns,)
source measure
- b : np.ndarray(nt,)
+ b : ndarray, shape (nt,)
target measure
- M : np.ndarray(ns, nt)
+ M : ndarray, shape (ns, nt)
cost matrix
- reg : float number
+ reg : float
Regularization term > 0
- alpha : np.ndarray(ns,)
+ alpha : ndarray, shape (ns,)
dual variable
- beta : np.ndarray(nt,)
+ beta : ndarray, shape (nt,)
dual variable
- batch_size : int number
+ batch_size : int
size of the batch
- batch_alpha : np.ndarray(bs,)
+ batch_alpha : ndarray, shape (bs,)
batch of index of alpha
- batch_beta : np.ndarray(bs,)
+ batch_beta : ndarray, shape (bs,)
batch of index of beta
Returns
-------
-
- grad : np.ndarray(ns,)
+ grad : ndarray, shape (ns,)
partial grad F
Examples
--------
-
+ >>> import ot
+ >>> np.random.seed(0)
>>> n_source = 7
>>> n_target = 4
- >>> reg = 1
- >>> numItermax = 20000
- >>> lr = 0.1
- >>> batch_size = 3
- >>> log = True
>>> a = ot.utils.unif(n_source)
>>> b = ot.utils.unif(n_target)
- >>> rng = np.random.RandomState(0)
- >>> X_source = rng.randn(n_source, 2)
- >>> Y_target = rng.randn(n_target, 2)
+ >>> X_source = np.random.randn(n_source, 2)
+ >>> Y_target = np.random.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
- batch_size,
- numItermax, lr, log)
- >>> print(log['alpha'], log['beta'])
- >>> print(sgd_dual_pi)
+ >>> sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, M, reg=1, batch_size=3, numItermax=30000, lr=0.1, log=True)
+ >>> log['alpha']
+ array([0.71759102, 1.57057384, 0.85576566, 0.1208211 , 0.59190466,
+ 1.197148 , 0.17805133])
+ >>> log['beta']
+ array([0.49741367, 0.57478564, 1.40075528, 2.75890102])
+ >>> sgd_dual_pi
+ array([[2.09730063e-02, 8.38169324e-02, 7.50365455e-03, 8.72731415e-09],
+ [5.58432437e-03, 5.89881299e-04, 3.09558411e-05, 8.35469849e-07],
+ [3.26489515e-03, 7.15536035e-02, 2.99778211e-02, 3.02601593e-10],
+ [4.05390622e-02, 5.31085068e-02, 6.65191787e-02, 1.55812785e-06],
+ [7.82299812e-02, 6.12099102e-03, 4.44989098e-02, 2.37719187e-03],
+ [5.06266486e-02, 2.16230494e-03, 2.26215141e-03, 6.81514609e-04],
+ [6.06713990e-02, 3.98139808e-02, 5.46829338e-02, 8.62371424e-06]])
References
----------
@@ -530,22 +540,21 @@ def batch_grad_dual(a, b, M, reg, alpha, beta, batch_size, batch_alpha,
International Conference on Learning Representation (2018),
arXiv preprint arxiv:1711.02283.
'''
-
G = - (np.exp((alpha[batch_alpha, None] + beta[None, batch_beta] -
M[batch_alpha, :][:, batch_beta]) / reg) *
a[batch_alpha, None] * b[None, batch_beta])
grad_beta = np.zeros(np.shape(M)[1])
grad_alpha = np.zeros(np.shape(M)[0])
- grad_beta[batch_beta] = (b[batch_beta] * len(batch_alpha) / np.shape(M)[0] +
- G.sum(0))
- grad_alpha[batch_alpha] = (a[batch_alpha] * len(batch_beta) /
- np.shape(M)[1] + G.sum(1))
+ grad_beta[batch_beta] = (b[batch_beta] * len(batch_alpha) / np.shape(M)[0]
+ + G.sum(0))
+ grad_alpha[batch_alpha] = (a[batch_alpha] * len(batch_beta)
+ / np.shape(M)[1] + G.sum(1))
return grad_alpha, grad_beta
def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
- '''
+ r'''
Compute the sgd algorithm to solve the regularized discrete measures
optimal transport dual problem
@@ -568,33 +577,31 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
Parameters
----------
-
- a : np.ndarray(ns,)
+ a : ndarray, shape (ns,)
source measure
- b : np.ndarray(nt,)
+ b : ndarray, shape (nt,)
target measure
- M : np.ndarray(ns, nt)
+ M : ndarray, shape (ns, nt)
cost matrix
- reg : float number
+ reg : float
Regularization term > 0
- batch_size : int number
+ batch_size : int
size of the batch
- numItermax : int number
+ numItermax : int
number of iteration
- lr : float number
+ lr : float
learning rate
Returns
-------
-
- alpha : np.ndarray(ns,)
+ alpha : ndarray, shape (ns,)
dual variable
- beta : np.ndarray(nt,)
+ beta : ndarray, shape (nt,)
dual variable
Examples
--------
-
+ >>> import ot
>>> n_source = 7
>>> n_target = 4
>>> reg = 1
@@ -608,18 +615,26 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
>>> X_source = rng.randn(n_source, 2)
>>> Y_target = rng.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
- batch_size,
- numItermax, lr, log)
- >>> print(log['alpha'], log['beta'])
- >>> print(sgd_dual_pi)
+ >>> sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size, numItermax, lr, log)
+ >>> log['alpha']
+ array([0.64171798, 1.27932201, 0.78132257, 0.15638935, 0.54888354,
+ 1.03663469, 0.20595781])
+ >>> log['beta']
+ array([0.51207194, 0.58033189, 1.28922676, 2.26859736])
+ >>> sgd_dual_pi
+ array([[1.97276541e-02, 7.81248547e-02, 6.22136048e-03, 4.95442423e-09],
+ [4.23494310e-03, 4.43286263e-04, 2.06927079e-05, 3.82389139e-07],
+ [3.07542414e-03, 6.67897769e-02, 2.48904999e-02, 1.72030247e-10],
+ [4.26271990e-02, 5.53375455e-02, 6.16535024e-02, 9.88812650e-07],
+ [7.60423265e-02, 5.89585256e-03, 3.81267087e-02, 1.39458256e-03],
+ [4.37557504e-02, 1.85189176e-03, 1.72335760e-03, 3.55491279e-04],
+ [6.33096109e-02, 4.11683954e-02, 5.02962051e-02, 5.43097516e-06]])
References
----------
-
[Seguy et al., 2018] :
- International Conference on Learning Representation (2018),
- arXiv preprint arxiv:1711.02283.
+ International Conference on Learning Representation (2018),
+ arXiv preprint arxiv:1711.02283.
'''
n_source = np.shape(M)[0]
@@ -641,7 +656,7 @@ def sgd_entropic_regularization(a, b, M, reg, batch_size, numItermax, lr):
def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
log=False):
- '''
+ r'''
Compute the transportation matrix to solve the regularized discrete measures
optimal transport dual problem
@@ -664,35 +679,33 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
Parameters
----------
-
- a : np.ndarray(ns,)
+ a : ndarray, shape (ns,)
source measure
- b : np.ndarray(nt,)
+ b : ndarray, shape (nt,)
target measure
- M : np.ndarray(ns, nt)
+ M : ndarray, shape (ns, nt)
cost matrix
- reg : float number
+ reg : float
Regularization term > 0
- batch_size : int number
+ batch_size : int
size of the batch
- numItermax : int number
+ numItermax : int
number of iteration
- lr : float number
+ lr : float
learning rate
log : bool, optional
record log if True
Returns
-------
-
- pi : np.ndarray(ns, nt)
+ pi : ndarray, shape (ns, nt)
transportation matrix
log : dict
log dictionary return only if log==True in parameters
Examples
--------
-
+ >>> import ot
>>> n_source = 7
>>> n_target = 4
>>> reg = 1
@@ -706,18 +719,27 @@ def solve_dual_entropic(a, b, M, reg, batch_size, numItermax=10000, lr=1,
>>> X_source = rng.randn(n_source, 2)
>>> Y_target = rng.randn(n_target, 2)
>>> M = ot.dist(X_source, Y_target)
- >>> sgd_dual_pi, log = stochastic.solve_dual_entropic(a, b, M, reg,
- batch_size,
- numItermax, lr, log)
- >>> print(log['alpha'], log['beta'])
- >>> print(sgd_dual_pi)
+ >>> sgd_dual_pi, log = ot.stochastic.solve_dual_entropic(a, b, M, reg, batch_size, numItermax, lr, log)
+ >>> log['alpha']
+ array([0.64057733, 1.2683513 , 0.75610161, 0.16024284, 0.54926534,
+ 1.0514201 , 0.19958936])
+ >>> log['beta']
+ array([0.51372571, 0.58843489, 1.27993921, 2.24344807])
+ >>> sgd_dual_pi
+ array([[1.97377795e-02, 7.86706853e-02, 6.15682001e-03, 4.82586997e-09],
+ [4.19566963e-03, 4.42016865e-04, 2.02777272e-05, 3.68823708e-07],
+ [3.00379244e-03, 6.56562018e-02, 2.40462171e-02, 1.63579656e-10],
+ [4.28626062e-02, 5.60031599e-02, 6.13193826e-02, 9.67977735e-07],
+ [7.61972739e-02, 5.94609051e-03, 3.77886693e-02, 1.36046648e-03],
+ [4.44810042e-02, 1.89476742e-03, 1.73285847e-03, 3.51826036e-04],
+ [6.30118293e-02, 4.12398660e-02, 4.95148998e-02, 5.26247246e-06]])
References
----------
[Seguy et al., 2018] :
- International Conference on Learning Representation (2018),
- arXiv preprint arxiv:1711.02283.
+ International Conference on Learning Representation (2018),
+ arXiv preprint arxiv:1711.02283.
'''
opt_alpha, opt_beta = sgd_entropic_regularization(a, b, M, reg, batch_size,
diff --git a/ot/unbalanced.py b/ot/unbalanced.py
new file mode 100644
index 0000000..23f6607
--- /dev/null
+++ b/ot/unbalanced.py
@@ -0,0 +1,1023 @@
+# -*- coding: utf-8 -*-
+"""
+Regularized Unbalanced OT
+"""
+
+# Author: Hicham Janati <hicham.janati@inria.fr>
+# License: MIT License
+
+from __future__ import division
+import warnings
+import numpy as np
+from scipy.special import logsumexp
+
+# from .utils import unif, dist
+
+
+def sinkhorn_unbalanced(a, b, M, reg, reg_m, method='sinkhorn', numItermax=1000,
+ stopThr=1e-6, verbose=False, log=False, **kwargs):
+ r"""
+ Solve the unbalanced entropic regularization optimal transport problem
+ and return the OT plan
+
+ The function solves the following optimization problem:
+
+ .. math::
+ W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + reg_m KL(\gamma 1, a) + reg_m KL(\gamma^T 1, b)
+
+ s.t.
+ \gamma\geq 0
+ where :
+
+ - M is the (dim_a, dim_b) metric cost matrix
+ - :math:`\Omega` is the entropic regularization
+ term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target unbalanced distributions
+ - KL is the Kullback-Leibler divergence
+
+ The algorithm used for solving the problem is the generalized
+ Sinkhorn-Knopp matrix scaling algorithm as proposed in [10, 23]_
+
+
+ Parameters
+ ----------
+ a : np.ndarray (dim_a,)
+ Unnormalized histogram of dimension dim_a
+ b : np.ndarray (dim_b,) or np.ndarray (dim_b, n_hists)
+ One or multiple unnormalized histograms of dimension dim_b
+ If many, compute all the OT distances (a, b_i)
+ M : np.ndarray (dim_a, dim_b)
+ loss matrix
+ reg : float
+ Entropy regularization term > 0
+ reg_m: float
+ Marginal relaxation term > 0
+ method : str
+ method used for the solver either 'sinkhorn', 'sinkhorn_stabilized' or
+ 'sinkhorn_reg_scaling', see those function for specific parameters
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ if n_hists == 1:
+ gamma : (dim_a x dim_b) ndarray
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary returned only if `log` is `True`
+ else:
+ ot_distance : (n_hists,) ndarray
+ the OT distance between `a` and each of the histograms `b_i`
+ log : dict
+ log dictionary returned only if `log` is `True`
+
+ Examples
+ --------
+
+ >>> import ot
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.], [1., 0.]]
+ >>> ot.sinkhorn_unbalanced(a, b, M, 1, 1)
+ array([[0.51122823, 0.18807035],
+ [0.18807035, 0.51122823]])
+
+
+ References
+ ----------
+
+ .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal
+ Transport, Advances in Neural Information Processing Systems
+ (NIPS) 26, 2013
+
+ .. [9] Schmitzer, B. (2016). Stabilized Sparse Scaling Algorithms for
+ Entropy Regularized Transport Problems. arXiv preprint arXiv:1610.06519.
+
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016).
+ Scaling algorithms for unbalanced transport problems. arXiv preprint
+ arXiv:1607.05816.
+
+ .. [25] Frogner C., Zhang C., Mobahi H., Araya-Polo M., Poggio T. :
+ Learning with a Wasserstein Loss, Advances in Neural Information
+ Processing Systems (NIPS) 2015
+
+
+ See Also
+ --------
+ ot.unbalanced.sinkhorn_knopp_unbalanced : Unbalanced Classic Sinkhorn [10]
+ ot.unbalanced.sinkhorn_stabilized_unbalanced:
+ Unbalanced Stabilized sinkhorn [9][10]
+ ot.unbalanced.sinkhorn_reg_scaling_unbalanced:
+ Unbalanced Sinkhorn with epslilon scaling [9][10]
+
+ """
+
+ if method.lower() == 'sinkhorn':
+ return sinkhorn_knopp_unbalanced(a, b, M, reg, reg_m,
+ numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose,
+ log=log, **kwargs)
+
+ elif method.lower() == 'sinkhorn_stabilized':
+ return sinkhorn_stabilized_unbalanced(a, b, M, reg, reg_m,
+ numItermax=numItermax,
+ stopThr=stopThr,
+ verbose=verbose,
+ log=log, **kwargs)
+ elif method.lower() in ['sinkhorn_reg_scaling']:
+ warnings.warn('Method not implemented yet. Using classic Sinkhorn Knopp')
+ return sinkhorn_knopp_unbalanced(a, b, M, reg, reg_m,
+ numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose,
+ log=log, **kwargs)
+ else:
+ raise ValueError("Unknown method '%s'." % method)
+
+
+def sinkhorn_unbalanced2(a, b, M, reg, reg_m, method='sinkhorn',
+ numItermax=1000, stopThr=1e-6, verbose=False,
+ log=False, **kwargs):
+ r"""
+ Solve the entropic regularization unbalanced optimal transport problem and
+ return the loss
+
+ The function solves the following optimization problem:
+
+ .. math::
+ W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + reg_m KL(\gamma 1, a) + reg_m KL(\gamma^T 1, b)
+
+ s.t.
+ \gamma\geq 0
+ where :
+
+ - M is the (dim_a, dim_b) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term
+ :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target unbalanced distributions
+ - KL is the Kullback-Leibler divergence
+
+ The algorithm used for solving the problem is the generalized
+ Sinkhorn-Knopp matrix scaling algorithm as proposed in [10, 23]_
+
+
+ Parameters
+ ----------
+ a : np.ndarray (dim_a,)
+ Unnormalized histogram of dimension dim_a
+ b : np.ndarray (dim_b,) or np.ndarray (dim_b, n_hists)
+ One or multiple unnormalized histograms of dimension dim_b
+ If many, compute all the OT distances (a, b_i)
+ M : np.ndarray (dim_a, dim_b)
+ loss matrix
+ reg : float
+ Entropy regularization term > 0
+ reg_m: float
+ Marginal relaxation term > 0
+ method : str
+ method used for the solver either 'sinkhorn', 'sinkhorn_stabilized' or
+ 'sinkhorn_reg_scaling', see those function for specific parameters
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ ot_distance : (n_hists,) ndarray
+ the OT distance between `a` and each of the histograms `b_i`
+ log : dict
+ log dictionary returned only if `log` is `True`
+
+ Examples
+ --------
+
+ >>> import ot
+ >>> a=[.5, .10]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.],[1., 0.]]
+ >>> ot.unbalanced.sinkhorn_unbalanced2(a, b, M, 1., 1.)
+ array([0.31912866])
+
+
+
+ References
+ ----------
+
+ .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal
+ Transport, Advances in Neural Information Processing Systems
+ (NIPS) 26, 2013
+
+ .. [9] Schmitzer, B. (2016). Stabilized Sparse Scaling Algorithms for
+ Entropy Regularized Transport Problems. arXiv preprint arXiv:1610.06519.
+
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016).
+ Scaling algorithms for unbalanced transport problems. arXiv preprint
+ arXiv:1607.05816.
+
+ .. [25] Frogner C., Zhang C., Mobahi H., Araya-Polo M., Poggio T. :
+ Learning with a Wasserstein Loss, Advances in Neural Information
+ Processing Systems (NIPS) 2015
+
+ See Also
+ --------
+ ot.unbalanced.sinkhorn_knopp : Unbalanced Classic Sinkhorn [10]
+ ot.unbalanced.sinkhorn_stabilized: Unbalanced Stabilized sinkhorn [9][10]
+ ot.unbalanced.sinkhorn_reg_scaling: Unbalanced Sinkhorn with epslilon scaling [9][10]
+
+ """
+ b = np.asarray(b, dtype=np.float64)
+ if len(b.shape) < 2:
+ b = b[:, None]
+ if method.lower() == 'sinkhorn':
+ return sinkhorn_knopp_unbalanced(a, b, M, reg, reg_m,
+ numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose,
+ log=log, **kwargs)
+
+ elif method.lower() == 'sinkhorn_stabilized':
+ return sinkhorn_stabilized_unbalanced(a, b, M, reg, reg_m,
+ numItermax=numItermax,
+ stopThr=stopThr,
+ verbose=verbose,
+ log=log, **kwargs)
+ elif method.lower() in ['sinkhorn_reg_scaling']:
+ warnings.warn('Method not implemented yet. Using classic Sinkhorn Knopp')
+ return sinkhorn_knopp_unbalanced(a, b, M, reg, reg_m,
+ numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose,
+ log=log, **kwargs)
+ else:
+ raise ValueError('Unknown method %s.' % method)
+
+
+def sinkhorn_knopp_unbalanced(a, b, M, reg, reg_m, numItermax=1000,
+ stopThr=1e-6, verbose=False, log=False, **kwargs):
+ r"""
+ Solve the entropic regularization unbalanced optimal transport problem and return the loss
+
+ The function solves the following optimization problem:
+
+ .. math::
+ W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + \reg_m KL(\gamma 1, a) + \reg_m KL(\gamma^T 1, b)
+
+ s.t.
+ \gamma\geq 0
+ where :
+
+ - M is the (dim_a, dim_b) metric cost matrix
+ - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target unbalanced distributions
+ - KL is the Kullback-Leibler divergence
+
+ The algorithm used for solving the problem is the generalized Sinkhorn-Knopp matrix scaling algorithm as proposed in [10, 23]_
+
+
+ Parameters
+ ----------
+ a : np.ndarray (dim_a,)
+ Unnormalized histogram of dimension dim_a
+ b : np.ndarray (dim_b,) or np.ndarray (dim_b, n_hists)
+ One or multiple unnormalized histograms of dimension dim_b
+ If many, compute all the OT distances (a, b_i)
+ M : np.ndarray (dim_a, dim_b)
+ loss matrix
+ reg : float
+ Entropy regularization term > 0
+ reg_m: float
+ Marginal relaxation term > 0
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (> 0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ if n_hists == 1:
+ gamma : (dim_a x dim_b) ndarray
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary returned only if `log` is `True`
+ else:
+ ot_distance : (n_hists,) ndarray
+ the OT distance between `a` and each of the histograms `b_i`
+ log : dict
+ log dictionary returned only if `log` is `True`
+ Examples
+ --------
+
+ >>> import ot
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.],[1., 0.]]
+ >>> ot.unbalanced.sinkhorn_knopp_unbalanced(a, b, M, 1., 1.)
+ array([[0.51122823, 0.18807035],
+ [0.18807035, 0.51122823]])
+
+ References
+ ----------
+
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016).
+ Scaling algorithms for unbalanced transport problems. arXiv preprint
+ arXiv:1607.05816.
+
+ .. [25] Frogner C., Zhang C., Mobahi H., Araya-Polo M., Poggio T. :
+ Learning with a Wasserstein Loss, Advances in Neural Information
+ Processing Systems (NIPS) 2015
+
+ See Also
+ --------
+ ot.lp.emd : Unregularized OT
+ ot.optim.cg : General regularized OT
+
+ """
+
+ a = np.asarray(a, dtype=np.float64)
+ b = np.asarray(b, dtype=np.float64)
+ M = np.asarray(M, dtype=np.float64)
+
+ dim_a, dim_b = M.shape
+
+ if len(a) == 0:
+ a = np.ones(dim_a, dtype=np.float64) / dim_a
+ if len(b) == 0:
+ b = np.ones(dim_b, dtype=np.float64) / dim_b
+
+ if len(b.shape) > 1:
+ n_hists = b.shape[1]
+ else:
+ n_hists = 0
+
+ if log:
+ log = {'err': []}
+
+ # we assume that no distances are null except those of the diagonal of
+ # distances
+ if n_hists:
+ u = np.ones((dim_a, 1)) / dim_a
+ v = np.ones((dim_b, n_hists)) / dim_b
+ a = a.reshape(dim_a, 1)
+ else:
+ u = np.ones(dim_a) / dim_a
+ v = np.ones(dim_b) / dim_b
+
+ # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
+ K = np.empty(M.shape, dtype=M.dtype)
+ np.divide(M, -reg, out=K)
+ np.exp(K, out=K)
+
+ fi = reg_m / (reg_m + reg)
+
+ err = 1.
+
+ for i in range(numItermax):
+ uprev = u
+ vprev = v
+
+ Kv = K.dot(v)
+ u = (a / Kv) ** fi
+ Ktu = K.T.dot(u)
+ v = (b / Ktu) ** fi
+
+ if (np.any(Ktu == 0.)
+ or np.any(np.isnan(u)) or np.any(np.isnan(v))
+ or np.any(np.isinf(u)) or np.any(np.isinf(v))):
+ # we have reached the machine precision
+ # come back to previous solution and quit loop
+ warnings.warn('Numerical errors at iteration %s' % i)
+ u = uprev
+ v = vprev
+ break
+
+ err_u = abs(u - uprev).max() / max(abs(u).max(), abs(uprev).max(), 1.)
+ err_v = abs(v - vprev).max() / max(abs(v).max(), abs(vprev).max(), 1.)
+ err = 0.5 * (err_u + err_v)
+ if log:
+ log['err'].append(err)
+ if verbose:
+ if i % 50 == 0:
+ print(
+ '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(i, err))
+ if err < stopThr:
+ break
+
+ if log:
+ log['logu'] = np.log(u + 1e-300)
+ log['logv'] = np.log(v + 1e-300)
+
+ if n_hists: # return only loss
+ res = np.einsum('ik,ij,jk,ij->k', u, K, v, M)
+ if log:
+ return res, log
+ else:
+ return res
+
+ else: # return OT matrix
+
+ if log:
+ return u[:, None] * K * v[None, :], log
+ else:
+ return u[:, None] * K * v[None, :]
+
+
+def sinkhorn_stabilized_unbalanced(a, b, M, reg, reg_m, tau=1e5, numItermax=1000,
+ stopThr=1e-6, verbose=False, log=False,
+ **kwargs):
+ r"""
+ Solve the entropic regularization unbalanced optimal transport
+ problem and return the loss
+
+ The function solves the following optimization problem using log-domain
+ stabilization as proposed in [10]:
+
+ .. math::
+ W = \min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma) + reg_m KL(\gamma 1, a) + reg_m KL(\gamma^T 1, b)
+
+ s.t.
+ \gamma\geq 0
+ where :
+
+ - M is the (dim_a, dim_b) metric cost matrix
+ - :math:`\Omega` is the entropic regularization
+ term :math:`\Omega(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are source and target unbalanced distributions
+ - KL is the Kullback-Leibler divergence
+
+ The algorithm used for solving the problem is the generalized
+ Sinkhorn-Knopp matrix scaling algorithm as proposed in [10, 23]_
+
+
+ Parameters
+ ----------
+ a : np.ndarray (dim_a,)
+ Unnormalized histogram of dimension dim_a
+ b : np.ndarray (dim_b,) or np.ndarray (dim_b, n_hists)
+ One or multiple unnormalized histograms of dimension dim_b
+ If many, compute all the OT distances (a, b_i)
+ M : np.ndarray (dim_a, dim_b)
+ loss matrix
+ reg : float
+ Entropy regularization term > 0
+ reg_m: float
+ Marginal relaxation term > 0
+ tau : float
+ thershold for max value in u or v for log scaling
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ if n_hists == 1:
+ gamma : (dim_a x dim_b) ndarray
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary returned only if `log` is `True`
+ else:
+ ot_distance : (n_hists,) ndarray
+ the OT distance between `a` and each of the histograms `b_i`
+ log : dict
+ log dictionary returned only if `log` is `True`
+ Examples
+ --------
+
+ >>> import ot
+ >>> a=[.5, .5]
+ >>> b=[.5, .5]
+ >>> M=[[0., 1.],[1., 0.]]
+ >>> ot.unbalanced.sinkhorn_stabilized_unbalanced(a, b, M, 1., 1.)
+ array([[0.51122823, 0.18807035],
+ [0.18807035, 0.51122823]])
+
+ References
+ ----------
+
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016).
+ Scaling algorithms for unbalanced transport problems. arXiv preprint arXiv:1607.05816.
+
+ .. [25] Frogner C., Zhang C., Mobahi H., Araya-Polo M., Poggio T. :
+ Learning with a Wasserstein Loss, Advances in Neural Information
+ Processing Systems (NIPS) 2015
+
+ See Also
+ --------
+ ot.lp.emd : Unregularized OT
+ ot.optim.cg : General regularized OT
+
+ """
+
+ a = np.asarray(a, dtype=np.float64)
+ b = np.asarray(b, dtype=np.float64)
+ M = np.asarray(M, dtype=np.float64)
+
+ dim_a, dim_b = M.shape
+
+ if len(a) == 0:
+ a = np.ones(dim_a, dtype=np.float64) / dim_a
+ if len(b) == 0:
+ b = np.ones(dim_b, dtype=np.float64) / dim_b
+
+ if len(b.shape) > 1:
+ n_hists = b.shape[1]
+ else:
+ n_hists = 0
+
+ if log:
+ log = {'err': []}
+
+ # we assume that no distances are null except those of the diagonal of
+ # distances
+ if n_hists:
+ u = np.ones((dim_a, n_hists)) / dim_a
+ v = np.ones((dim_b, n_hists)) / dim_b
+ a = a.reshape(dim_a, 1)
+ else:
+ u = np.ones(dim_a) / dim_a
+ v = np.ones(dim_b) / dim_b
+
+ # print(reg)
+ # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
+ K = np.empty(M.shape, dtype=M.dtype)
+ np.divide(M, -reg, out=K)
+ np.exp(K, out=K)
+
+ fi = reg_m / (reg_m + reg)
+
+ cpt = 0
+ err = 1.
+ alpha = np.zeros(dim_a)
+ beta = np.zeros(dim_b)
+ while (err > stopThr and cpt < numItermax):
+ uprev = u
+ vprev = v
+
+ Kv = K.dot(v)
+ f_alpha = np.exp(- alpha / (reg + reg_m))
+ f_beta = np.exp(- beta / (reg + reg_m))
+
+ if n_hists:
+ f_alpha = f_alpha[:, None]
+ f_beta = f_beta[:, None]
+ u = ((a / (Kv + 1e-16)) ** fi) * f_alpha
+ Ktu = K.T.dot(u)
+ v = ((b / (Ktu + 1e-16)) ** fi) * f_beta
+ absorbing = False
+ if (u > tau).any() or (v > tau).any():
+ absorbing = True
+ if n_hists:
+ alpha = alpha + reg * np.log(np.max(u, 1))
+ beta = beta + reg * np.log(np.max(v, 1))
+ else:
+ alpha = alpha + reg * np.log(np.max(u))
+ beta = beta + reg * np.log(np.max(v))
+ K = np.exp((alpha[:, None] + beta[None, :] -
+ M) / reg)
+ v = np.ones_like(v)
+ Kv = K.dot(v)
+
+ if (np.any(Ktu == 0.)
+ or np.any(np.isnan(u)) or np.any(np.isnan(v))
+ or np.any(np.isinf(u)) or np.any(np.isinf(v))):
+ # we have reached the machine precision
+ # come back to previous solution and quit loop
+ warnings.warn('Numerical errors at iteration %s' % cpt)
+ u = uprev
+ v = vprev
+ break
+ if (cpt % 10 == 0 and not absorbing) or cpt == 0:
+ # we can speed up the process by checking for the error only all
+ # the 10th iterations
+ err = abs(u - uprev).max() / max(abs(u).max(), abs(uprev).max(),
+ 1.)
+ if log:
+ log['err'].append(err)
+ if verbose:
+ if cpt % 200 == 0:
+ print(
+ '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
+ cpt = cpt + 1
+
+ if err > stopThr:
+ warnings.warn("Stabilized Unbalanced Sinkhorn did not converge." +
+ "Try a larger entropy `reg` or a lower mass `reg_m`." +
+ "Or a larger absorption threshold `tau`.")
+ if n_hists:
+ logu = alpha[:, None] / reg + np.log(u)
+ logv = beta[:, None] / reg + np.log(v)
+ else:
+ logu = alpha / reg + np.log(u)
+ logv = beta / reg + np.log(v)
+ if log:
+ log['logu'] = logu
+ log['logv'] = logv
+ if n_hists: # return only loss
+ res = logsumexp(np.log(M + 1e-100)[:, :, None] + logu[:, None, :] +
+ logv[None, :, :] - M[:, :, None] / reg, axis=(0, 1))
+ res = np.exp(res)
+ if log:
+ return res, log
+ else:
+ return res
+
+ else: # return OT matrix
+ ot_matrix = np.exp(logu[:, None] + logv[None, :] - M / reg)
+ if log:
+ return ot_matrix, log
+ else:
+ return ot_matrix
+
+
+def barycenter_unbalanced_stabilized(A, M, reg, reg_m, weights=None, tau=1e3,
+ numItermax=1000, stopThr=1e-6,
+ verbose=False, log=False):
+ r"""Compute the entropic unbalanced wasserstein barycenter of A with stabilization.
+
+ The function solves the following optimization problem:
+
+ .. math::
+ \mathbf{a} = arg\min_\mathbf{a} \sum_i Wu_{reg}(\mathbf{a},\mathbf{a}_i)
+
+ where :
+
+ - :math:`Wu_{reg}(\cdot,\cdot)` is the unbalanced entropic regularized
+ Wasserstein distance (see ot.unbalanced.sinkhorn_unbalanced)
+ - :math:`\mathbf{a}_i` are training distributions in the columns of
+ matrix :math:`\mathbf{A}`
+ - reg and :math:`\mathbf{M}` are respectively the regularization term and
+ the cost matrix for OT
+ - reg_mis the marginal relaxation hyperparameter
+ The algorithm used for solving the problem is the generalized
+ Sinkhorn-Knopp matrix scaling algorithm as proposed in [10]_
+
+ Parameters
+ ----------
+ A : np.ndarray (dim, n_hists)
+ `n_hists` training distributions a_i of dimension dim
+ M : np.ndarray (dim, dim)
+ ground metric matrix for OT.
+ reg : float
+ Entropy regularization term > 0
+ reg_m : float
+ Marginal relaxation term > 0
+ tau : float
+ Stabilization threshold for log domain absorption.
+ weights : np.ndarray (n_hists,) optional
+ Weight of each distribution (barycentric coodinates)
+ If None, uniform weights are used.
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (> 0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ a : (dim,) ndarray
+ Unbalanced Wasserstein barycenter
+ log : dict
+ log dictionary return only if log==True in parameters
+
+
+ References
+ ----------
+
+ .. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré,
+ G. (2015). Iterative Bregman projections for regularized transportation
+ problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016).
+ Scaling algorithms for unbalanced transport problems. arXiv preprint
+ arXiv:1607.05816.
+
+
+ """
+ dim, n_hists = A.shape
+ if weights is None:
+ weights = np.ones(n_hists) / n_hists
+ else:
+ assert(len(weights) == A.shape[1])
+
+ if log:
+ log = {'err': []}
+
+ fi = reg_m / (reg_m + reg)
+
+ u = np.ones((dim, n_hists)) / dim
+ v = np.ones((dim, n_hists)) / dim
+
+ # print(reg)
+ # Next 3 lines equivalent to K= np.exp(-M/reg), but faster to compute
+ K = np.empty(M.shape, dtype=M.dtype)
+ np.divide(M, -reg, out=K)
+ np.exp(K, out=K)
+
+ fi = reg_m / (reg_m + reg)
+
+ cpt = 0
+ err = 1.
+ alpha = np.zeros(dim)
+ beta = np.zeros(dim)
+ q = np.ones(dim) / dim
+ for i in range(numItermax):
+ qprev = q.copy()
+ Kv = K.dot(v)
+ f_alpha = np.exp(- alpha / (reg + reg_m))
+ f_beta = np.exp(- beta / (reg + reg_m))
+ f_alpha = f_alpha[:, None]
+ f_beta = f_beta[:, None]
+ u = ((A / (Kv + 1e-16)) ** fi) * f_alpha
+ Ktu = K.T.dot(u)
+ q = (Ktu ** (1 - fi)) * f_beta
+ q = q.dot(weights) ** (1 / (1 - fi))
+ Q = q[:, None]
+ v = ((Q / (Ktu + 1e-16)) ** fi) * f_beta
+ absorbing = False
+ if (u > tau).any() or (v > tau).any():
+ absorbing = True
+ alpha = alpha + reg * np.log(np.max(u, 1))
+ beta = beta + reg * np.log(np.max(v, 1))
+ K = np.exp((alpha[:, None] + beta[None, :] -
+ M) / reg)
+ v = np.ones_like(v)
+ Kv = K.dot(v)
+ if (np.any(Ktu == 0.)
+ or np.any(np.isnan(u)) or np.any(np.isnan(v))
+ or np.any(np.isinf(u)) or np.any(np.isinf(v))):
+ # we have reached the machine precision
+ # come back to previous solution and quit loop
+ warnings.warn('Numerical errors at iteration %s' % cpt)
+ q = qprev
+ break
+ if (i % 10 == 0 and not absorbing) or i == 0:
+ # we can speed up the process by checking for the error only all
+ # the 10th iterations
+ err = abs(q - qprev).max() / max(abs(q).max(),
+ abs(qprev).max(), 1.)
+ if log:
+ log['err'].append(err)
+ if verbose:
+ if i % 50 == 0:
+ print(
+ '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(i, err))
+ if err < stopThr:
+ break
+
+ if err > stopThr:
+ warnings.warn("Stabilized Unbalanced Sinkhorn did not converge." +
+ "Try a larger entropy `reg` or a lower mass `reg_m`." +
+ "Or a larger absorption threshold `tau`.")
+ if log:
+ log['niter'] = i
+ log['logu'] = np.log(u + 1e-300)
+ log['logv'] = np.log(v + 1e-300)
+ return q, log
+ else:
+ return q
+
+
+def barycenter_unbalanced_sinkhorn(A, M, reg, reg_m, weights=None,
+ numItermax=1000, stopThr=1e-6,
+ verbose=False, log=False):
+ r"""Compute the entropic unbalanced wasserstein barycenter of A.
+
+ The function solves the following optimization problem with a
+
+ .. math::
+ \mathbf{a} = arg\min_\mathbf{a} \sum_i Wu_{reg}(\mathbf{a},\mathbf{a}_i)
+
+ where :
+
+ - :math:`Wu_{reg}(\cdot,\cdot)` is the unbalanced entropic regularized
+ Wasserstein distance (see ot.unbalanced.sinkhorn_unbalanced)
+ - :math:`\mathbf{a}_i` are training distributions in the columns of matrix
+ :math:`\mathbf{A}`
+ - reg and :math:`\mathbf{M}` are respectively the regularization term and
+ the cost matrix for OT
+ - reg_mis the marginal relaxation hyperparameter
+ The algorithm used for solving the problem is the generalized
+ Sinkhorn-Knopp matrix scaling algorithm as proposed in [10]_
+
+ Parameters
+ ----------
+ A : np.ndarray (dim, n_hists)
+ `n_hists` training distributions a_i of dimension dim
+ M : np.ndarray (dim, dim)
+ ground metric matrix for OT.
+ reg : float
+ Entropy regularization term > 0
+ reg_m: float
+ Marginal relaxation term > 0
+ weights : np.ndarray (n_hists,) optional
+ Weight of each distribution (barycentric coodinates)
+ If None, uniform weights are used.
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (> 0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ a : (dim,) ndarray
+ Unbalanced Wasserstein barycenter
+ log : dict
+ log dictionary return only if log==True in parameters
+
+
+ References
+ ----------
+
+ .. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G.
+ (2015). Iterative Bregman projections for regularized transportation
+ problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016).
+ Scaling algorithms for unbalanced transport problems. arXiv preprin
+ arXiv:1607.05816.
+
+
+ """
+ dim, n_hists = A.shape
+ if weights is None:
+ weights = np.ones(n_hists) / n_hists
+ else:
+ assert(len(weights) == A.shape[1])
+
+ if log:
+ log = {'err': []}
+
+ K = np.exp(- M / reg)
+
+ fi = reg_m / (reg_m + reg)
+
+ v = np.ones((dim, n_hists))
+ u = np.ones((dim, 1))
+ q = np.ones(dim)
+ err = 1.
+
+ for i in range(numItermax):
+ uprev = u.copy()
+ vprev = v.copy()
+ qprev = q.copy()
+
+ Kv = K.dot(v)
+ u = (A / Kv) ** fi
+ Ktu = K.T.dot(u)
+ q = ((Ktu ** (1 - fi)).dot(weights))
+ q = q ** (1 / (1 - fi))
+ Q = q[:, None]
+ v = (Q / Ktu) ** fi
+
+ if (np.any(Ktu == 0.)
+ or np.any(np.isnan(u)) or np.any(np.isnan(v))
+ or np.any(np.isinf(u)) or np.any(np.isinf(v))):
+ # we have reached the machine precision
+ # come back to previous solution and quit loop
+ warnings.warn('Numerical errors at iteration %s' % i)
+ u = uprev
+ v = vprev
+ q = qprev
+ break
+ # compute change in barycenter
+ err = abs(q - qprev).max()
+ err /= max(abs(q).max(), abs(qprev).max(), 1.)
+ if log:
+ log['err'].append(err)
+ # if barycenter did not change + at least 10 iterations - stop
+ if err < stopThr and i > 10:
+ break
+
+ if verbose:
+ if i % 10 == 0:
+ print(
+ '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(i, err))
+
+ if log:
+ log['niter'] = i
+ log['logu'] = np.log(u + 1e-300)
+ log['logv'] = np.log(v + 1e-300)
+ return q, log
+ else:
+ return q
+
+
+def barycenter_unbalanced(A, M, reg, reg_m, method="sinkhorn", weights=None,
+ numItermax=1000, stopThr=1e-6,
+ verbose=False, log=False, **kwargs):
+ r"""Compute the entropic unbalanced wasserstein barycenter of A.
+
+ The function solves the following optimization problem with a
+
+ .. math::
+ \mathbf{a} = arg\min_\mathbf{a} \sum_i Wu_{reg}(\mathbf{a},\mathbf{a}_i)
+
+ where :
+
+ - :math:`Wu_{reg}(\cdot,\cdot)` is the unbalanced entropic regularized
+ Wasserstein distance (see ot.unbalanced.sinkhorn_unbalanced)
+ - :math:`\mathbf{a}_i` are training distributions in the columns of matrix
+ :math:`\mathbf{A}`
+ - reg and :math:`\mathbf{M}` are respectively the regularization term and
+ the cost matrix for OT
+ - reg_mis the marginal relaxation hyperparameter
+ The algorithm used for solving the problem is the generalized
+ Sinkhorn-Knopp matrix scaling algorithm as proposed in [10]_
+
+ Parameters
+ ----------
+ A : np.ndarray (dim, n_hists)
+ `n_hists` training distributions a_i of dimension dim
+ M : np.ndarray (dim, dim)
+ ground metric matrix for OT.
+ reg : float
+ Entropy regularization term > 0
+ reg_m: float
+ Marginal relaxation term > 0
+ weights : np.ndarray (n_hists,) optional
+ Weight of each distribution (barycentric coodinates)
+ If None, uniform weights are used.
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshol on error (> 0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ a : (dim,) ndarray
+ Unbalanced Wasserstein barycenter
+ log : dict
+ log dictionary return only if log==True in parameters
+
+
+ References
+ ----------
+
+ .. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G.
+ (2015). Iterative Bregman projections for regularized transportation
+ problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
+ .. [10] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016).
+ Scaling algorithms for unbalanced transport problems. arXiv preprin
+ arXiv:1607.05816.
+
+ """
+
+ if method.lower() == 'sinkhorn':
+ return barycenter_unbalanced_sinkhorn(A, M, reg, reg_m,
+ weights=weights,
+ numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose,
+ log=log, **kwargs)
+
+ elif method.lower() == 'sinkhorn_stabilized':
+ return barycenter_unbalanced_stabilized(A, M, reg, reg_m,
+ weights=weights,
+ numItermax=numItermax,
+ stopThr=stopThr,
+ verbose=verbose,
+ log=log, **kwargs)
+ elif method.lower() in ['sinkhorn_reg_scaling']:
+ warnings.warn('Method not implemented yet. Using classic Sinkhorn Knopp')
+ return barycenter_unbalanced(A, M, reg, reg_m,
+ weights=weights,
+ numItermax=numItermax,
+ stopThr=stopThr, verbose=verbose,
+ log=log, **kwargs)
+ else:
+ raise ValueError("Unknown method '%s'." % method)
diff --git a/ot/utils.py b/ot/utils.py
index bb21b38..b71458b 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
-Various function that can be usefull
+Various useful functions
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
@@ -111,12 +111,12 @@ def dist(x1, x2=None, metric='sqeuclidean'):
Parameters
----------
- x1 : np.array (n1,d)
+ x1 : ndarray, shape (n1,d)
matrix with n1 samples of size d
- x2 : np.array (n2,d), optional
+ x2 : array, shape (n2,d), optional
matrix with n2 samples of size d (if None then x2=x1)
- metric : str, fun, optional
- name of the metric to be computed (full list in the doc of scipy), If a string,
+ metric : str | callable, optional
+ Name of the metric to be computed (full list in the doc of scipy), If a string,
the distance function can be 'braycurtis', 'canberra', 'chebyshev', 'cityblock',
'correlation', 'cosine', 'dice', 'euclidean', 'hamming', 'jaccard', 'kulsinski',
'mahalanobis', 'matching', 'minkowski', 'rogerstanimoto', 'russellrao', 'seuclidean',
@@ -138,26 +138,21 @@ def dist(x1, x2=None, metric='sqeuclidean'):
def dist0(n, method='lin_square'):
- """Compute standard cost matrices of size (n,n) for OT problems
+ """Compute standard cost matrices of size (n, n) for OT problems
Parameters
----------
-
n : int
- size of the cost matrix
+ Size of the cost matrix.
method : str, optional
Type of loss matrix chosen from:
* 'lin_square' : linear sampling between 0 and n-1, quadratic loss
-
Returns
-------
-
- M : np.array (n1,n2)
- distance matrix computed with given metric
-
-
+ M : ndarray, shape (n1,n2)
+ Distance matrix computed with given metric.
"""
res = 0
if method == 'lin_square':
@@ -169,33 +164,34 @@ def dist0(n, method='lin_square'):
def cost_normalization(C, norm=None):
""" Apply normalization to the loss matrix
-
Parameters
----------
- C : np.array (n1, n2)
+ C : ndarray, shape (n1, n2)
The cost matrix to normalize.
norm : str
- type of normalization from 'median','max','log','loglog'. Any other
- value do not normalize.
-
+ Type of normalization from 'median', 'max', 'log', 'loglog'. Any
+ other value do not normalize.
Returns
-------
-
- C : np.array (n1, n2)
+ C : ndarray, shape (n1, n2)
The input cost matrix normalized according to given norm.
-
"""
- if norm == "median":
+ if norm is None:
+ pass
+ elif norm == "median":
C /= float(np.median(C))
elif norm == "max":
C /= float(np.max(C))
elif norm == "log":
C = np.log(1 + C)
elif norm == "loglog":
- C = np.log(1 + np.log(1 + C))
-
+ C = np.log1p(np.log1p(C))
+ else:
+ raise ValueError('Norm %s is not a valid option.\n'
+ 'Valid options are:\n'
+ 'median, max, log, loglog' % norm)
return C
@@ -214,23 +210,28 @@ def fun(f, q_in, q_out):
def parmap(f, X, nprocs=multiprocessing.cpu_count()):
- """ paralell map for multiprocessing """
- q_in = multiprocessing.Queue(1)
- q_out = multiprocessing.Queue()
+ """ paralell map for multiprocessing (only map on windows)"""
- proc = [multiprocessing.Process(target=fun, args=(f, q_in, q_out))
- for _ in range(nprocs)]
- for p in proc:
- p.daemon = True
- p.start()
+ if not sys.platform.endswith('win32'):
- sent = [q_in.put((i, x)) for i, x in enumerate(X)]
- [q_in.put((None, None)) for _ in range(nprocs)]
- res = [q_out.get() for _ in range(len(sent))]
+ q_in = multiprocessing.Queue(1)
+ q_out = multiprocessing.Queue()
- [p.join() for p in proc]
+ proc = [multiprocessing.Process(target=fun, args=(f, q_in, q_out))
+ for _ in range(nprocs)]
+ for p in proc:
+ p.daemon = True
+ p.start()
- return [x for i, x in sorted(res)]
+ sent = [q_in.put((i, x)) for i, x in enumerate(X)]
+ [q_in.put((None, None)) for _ in range(nprocs)]
+ res = [q_out.get() for _ in range(len(sent))]
+
+ [p.join() for p in proc]
+
+ return [x for i, x in sorted(res)]
+ else:
+ return list(map(f, X))
def check_params(**kwargs):
@@ -256,6 +257,7 @@ def check_params(**kwargs):
def check_random_state(seed):
"""Turn seed into a np.random.RandomState instance
+
Parameters
----------
seed : None | int | instance of RandomState
@@ -275,7 +277,6 @@ def check_random_state(seed):
class deprecated(object):
-
"""Decorator to mark a function or class as deprecated.
deprecated class from scikit-learn package
@@ -285,14 +286,14 @@ class deprecated(object):
The optional extra argument will be appended to the deprecation message
and the docstring. Note: to use this with the default value for extra, put
in an empty of parentheses:
- >>> from ot.deprecation import deprecated
- >>> @deprecated()
- ... def some_function(): pass
+ >>> from ot.deprecation import deprecated # doctest: +SKIP
+ >>> @deprecated() # doctest: +SKIP
+ ... def some_function(): pass # doctest: +SKIP
Parameters
----------
- extra : string
- to be added to the deprecation messages
+ extra : str
+ To be added to the deprecation messages.
"""
# Adapted from http://wiki.python.org/moin/PythonDecoratorLibrary,
@@ -373,9 +374,9 @@ def _is_deprecated(func):
class BaseEstimator(object):
-
"""Base class for most objects in POT
- adapted from sklearn BaseEstimator class
+
+ Code adapted from sklearn BaseEstimator class
Notes
-----
@@ -417,7 +418,7 @@ class BaseEstimator(object):
Parameters
----------
- deep : boolean, optional
+ deep : bool, optional
If True, will return the parameters for this estimator and
contained subobjects that are estimators.
@@ -487,3 +488,11 @@ class BaseEstimator(object):
(key, self.__class__.__name__))
setattr(self, key, value)
return self
+
+
+class UndefinedParameter(Exception):
+ """
+ Aim at raising an Exception when a undefined parameter is called
+
+ """
+ pass