summaryrefslogtreecommitdiff
path: root/ot
diff options
context:
space:
mode:
Diffstat (limited to 'ot')
-rw-r--r--ot/__init__.py32
-rw-r--r--ot/bregman.py570
-rw-r--r--ot/da.py674
-rw-r--r--ot/datasets.py23
-rw-r--r--ot/dr.py4
-rw-r--r--ot/gpu/__init__.py5
-rw-r--r--ot/gromov.py206
-rw-r--r--ot/lp/EMD.h2
-rw-r--r--ot/lp/EMD_wrapper.cpp9
-rw-r--r--ot/lp/__init__.py247
-rw-r--r--ot/lp/emd_wrap.pyx27
-rw-r--r--ot/lp/network_simplex_simple.h7
-rw-r--r--ot/optim.py8
-rwxr-xr-xot/partial.py1062
-rw-r--r--ot/plot.py3
-rw-r--r--ot/smooth.py4
-rw-r--r--ot/unbalanced.py107
-rw-r--r--ot/utils.py28
18 files changed, 2712 insertions, 306 deletions
diff --git a/ot/__init__.py b/ot/__init__.py
index 89c7936..0e6e2e2 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -1,34 +1,5 @@
"""
-This is the main module of the POT toolbox. It provides easy access to
-a number of sub-modules and functions described below.
-
-.. note::
-
-
- Here is a list of the submodules and short description of what they contain.
-
- - :any:`ot.lp` contains OT solvers for the exact (Linear Program) OT problems.
- - :any:`ot.bregman` contains OT solvers for the entropic OT problems using
- Bregman projections.
- - :any:`ot.lp` contains OT solvers for the exact (Linear Program) OT problems.
- - :any:`ot.smooth` contains OT solvers for the regularized (l2 and kl) smooth OT
- problems.
- - :any:`ot.gromov` contains solvers for Gromov-Wasserstein and Fused Gromov
- Wasserstein problems.
- - :any:`ot.optim` contains generic solvers OT based optimization problems
- - :any:`ot.da` contains classes and function related to Monge mapping
- estimation and Domain Adaptation (DA).
- - :any:`ot.gpu` contains GPU (cupy) implementation of some OT solvers
- - :any:`ot.dr` contains Dimension Reduction (DR) methods such as Wasserstein
- Discriminant Analysis.
- - :any:`ot.utils` contains utility functions such as distance computation and
- timing.
- - :any:`ot.datasets` contains toy dataset generation functions.
- - :any:`ot.plot` contains visualization functions
- - :any:`ot.stochastic` contains stochastic solvers for regularized OT.
- - :any:`ot.unbalanced` contains solvers for regularized unbalanced OT.
-
.. warning::
The list of automatically imported sub-modules is as follows:
:py:mod:`ot.lp`, :py:mod:`ot.bregman`, :py:mod:`ot.optim`
@@ -61,6 +32,7 @@ from . import gromov
from . import smooth
from . import stochastic
from . import unbalanced
+from . import partial
# OT functions
from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d
@@ -71,7 +43,7 @@ from .da import sinkhorn_lpl1_mm
# utils functions
from .utils import dist, unif, tic, toc, toq
-__version__ = "0.6.0"
+__version__ = "0.7.0"
__all__ = ['emd', 'emd2', 'emd_1d', 'sinkhorn', 'sinkhorn2', 'utils', 'datasets',
'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov',
diff --git a/ot/bregman.py b/ot/bregman.py
index 2cd832b..f1f8437 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
-Bregman projections for regularized OT
+Bregman projections solvers for entropic regularized OT
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
@@ -8,12 +8,16 @@ Bregman projections for regularized OT
# Kilian Fatras <kilian.fatras@irisa.fr>
# Titouan Vayer <titouan.vayer@irisa.fr>
# Hicham Janati <hicham.janati@inria.fr>
+# Mokhtar Z. Alaya <mokhtarzahdi.alaya@gmail.com>
+# Alexander Tong <alexander.tong@yale.edu>
+# Ievgen Redko <ievgen.redko@univ-st-etienne.fr>
#
# License: MIT License
import numpy as np
import warnings
from .utils import unif, dist
+from scipy.optimize import fmin_l_bfgs_b
def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
@@ -536,12 +540,12 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False,
old_v = v[i_2]
v[i_2] = b[i_2] / (K[:, i_2].T.dot(u))
G[:, i_2] = u * K[:, i_2] * v[i_2]
- #aviol = (G@one_m - a)
- #aviol_2 = (G.T@one_n - b)
+ # aviol = (G@one_m - a)
+ # aviol_2 = (G.T@one_n - b)
viol += (-old_v + v[i_2]) * K[:, i_2] * u
viol_2[i_2] = v[i_2] * K[:, i_2].dot(u) - b[i_2]
- #print('b',np.max(abs(aviol -viol)),np.max(abs(aviol_2 - viol_2)))
+ # print('b',np.max(abs(aviol -viol)),np.max(abs(aviol_2 - viol_2)))
if stopThr_val <= stopThr:
break
@@ -905,11 +909,6 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4,
else:
alpha, beta = warmstart
- def get_K(alpha, beta):
- """log space computation"""
- return np.exp(-(M - alpha.reshape((dim_a, 1))
- - beta.reshape((1, dim_b))) / reg)
-
# print(np.min(K))
def get_reg(n): # exponential decreasing
return (epsilon0 - reg) * np.exp(-n) + reg
@@ -937,7 +936,7 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4,
# the 10th iterations
transp = G
err = np.linalg.norm(
- (np.sum(transp, axis=0) - b))**2 + np.linalg.norm((np.sum(transp, axis=1) - a))**2
+ (np.sum(transp, axis=0) - b)) ** 2 + np.linalg.norm((np.sum(transp, axis=1) - a)) ** 2
if log:
log['err'].append(err)
@@ -963,7 +962,7 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4,
def geometricBar(weights, alldistribT):
"""return the weighted geometric mean of distributions"""
- assert(len(weights) == alldistribT.shape[1])
+ assert (len(weights) == alldistribT.shape[1])
return np.exp(np.dot(np.log(alldistribT), weights.T))
@@ -1037,11 +1036,13 @@ def barycenter(A, M, reg, weights=None, method="sinkhorn", numItermax=10000,
"""
if method.lower() == 'sinkhorn':
- return barycenter_sinkhorn(A, M, reg, numItermax=numItermax,
+ return barycenter_sinkhorn(A, M, reg, weights=weights,
+ numItermax=numItermax,
stopThr=stopThr, verbose=verbose, log=log,
**kwargs)
elif method.lower() == 'sinkhorn_stabilized':
- return barycenter_stabilized(A, M, reg, numItermax=numItermax,
+ return barycenter_stabilized(A, M, reg, weights=weights,
+ numItermax=numItermax,
stopThr=stopThr, verbose=verbose,
log=log, **kwargs)
else:
@@ -1103,7 +1104,7 @@ def barycenter_sinkhorn(A, M, reg, weights=None, numItermax=1000,
if weights is None:
weights = np.ones(A.shape[1]) / A.shape[1]
else:
- assert(len(weights) == A.shape[1])
+ assert (len(weights) == A.shape[1])
if log:
log = {'err': []}
@@ -1201,7 +1202,7 @@ def barycenter_stabilized(A, M, reg, tau=1e10, weights=None, numItermax=1000,
if weights is None:
weights = np.ones(n_hists) / n_hists
else:
- assert(len(weights) == A.shape[1])
+ assert (len(weights) == A.shape[1])
if log:
log = {'err': []}
@@ -1329,7 +1330,7 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000,
if weights is None:
weights = np.ones(A.shape[0]) / A.shape[0]
else:
- assert(len(weights) == A.shape[0])
+ assert (len(weights) == A.shape[0])
if log:
log = {'err': []}
@@ -1342,12 +1343,17 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000,
err = 1
# build the convolution operator
+ # this is equivalent to blurring on horizontal then vertical directions
t = np.linspace(0, 1, A.shape[1])
[Y, X] = np.meshgrid(t, t)
- xi1 = np.exp(-(X - Y)**2 / reg)
+ xi1 = np.exp(-(X - Y) ** 2 / reg)
+
+ t = np.linspace(0, 1, A.shape[2])
+ [Y, X] = np.meshgrid(t, t)
+ xi2 = np.exp(-(X - Y) ** 2 / reg)
def K(x):
- return np.dot(np.dot(xi1, x), xi1)
+ return np.dot(np.dot(xi1, x), xi2)
while (err > stopThr and cpt < numItermax):
@@ -1492,6 +1498,164 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
return np.sum(K0, axis=1)
+def jcpot_barycenter(Xs, Ys, Xt, reg, metric='sqeuclidean', numItermax=100,
+ stopThr=1e-6, verbose=False, log=False, **kwargs):
+ r'''Joint OT and proportion estimation for multi-source target shift as proposed in [27]
+
+ The function solves the following optimization problem:
+
+ .. math::
+
+ \mathbf{h} = arg\min_{\mathbf{h}}\quad \sum_{k=1}^{K} \lambda_k
+ W_{reg}((\mathbf{D}_2^{(k)} \mathbf{h})^T, \mathbf{a})
+
+ s.t. \ \forall k, \mathbf{D}_1^{(k)} \gamma_k \mathbf{1}_n= \mathbf{h}
+
+ where :
+
+ - :math:`\lambda_k` is the weight of k-th source domain
+ - :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance (see ot.bregman.sinkhorn)
+ - :math:`\mathbf{D}_2^{(k)}` is a matrix of weights related to k-th source domain defined as in [p. 5, 27], its expected shape is `(n_k, C)` where `n_k` is the number of elements in the k-th source domain and `C` is the number of classes
+ - :math:`\mathbf{h}` is a vector of estimated proportions in the target domain of size C
+ - :math:`\mathbf{a}` is a uniform vector of weights in the target domain of size `n`
+ - :math:`\mathbf{D}_1^{(k)}` is a matrix of class assignments defined as in [p. 5, 27], its expected shape is `(n_k, C)`
+
+ The problem consist in solving a Wasserstein barycenter problem to estimate the proportions :math:`\mathbf{h}` in the target domain.
+
+ The algorithm used for solving the problem is the Iterative Bregman projections algorithm
+ with two sets of marginal constraints related to the unknown vector :math:`\mathbf{h}` and uniform target distribution.
+
+ Parameters
+ ----------
+ Xs : list of K np.ndarray(nsk,d)
+ features of all source domains' samples
+ Ys : list of K np.ndarray(nsk,)
+ labels of all source domains' samples
+ Xt : np.ndarray (nt,d)
+ samples in the target domain
+ reg : float
+ Regularization term > 0
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshold on relative change in the barycenter (>0)
+ log : bool, optional
+ record log if True
+ verbose : bool, optional (default=False)
+ Controls the verbosity of the optimization algorithm
+
+ Returns
+ -------
+ h : (C,) ndarray
+ proportion estimation in the target domain
+ log : dict
+ log dictionary return only if log==True in parameters
+
+
+ References
+ ----------
+
+ .. [27] Ievgen Redko, Nicolas Courty, Rémi Flamary, Devis Tuia
+ "Optimal transport for multi-source domain adaptation under target shift",
+ International Conference on Artificial Intelligence and Statistics (AISTATS), 2019.
+
+ '''
+ nbclasses = len(np.unique(Ys[0]))
+ nbdomains = len(Xs)
+
+ # log dictionary
+ if log:
+ log = {'niter': 0, 'err': [], 'M': [], 'D1': [], 'D2': [], 'gamma': []}
+
+ K = []
+ M = []
+ D1 = []
+ D2 = []
+
+ # For each source domain, build cost matrices M, Gibbs kernels K and corresponding matrices D_1 and D_2
+ for d in range(nbdomains):
+ dom = {}
+ nsk = Xs[d].shape[0] # get number of elements for this domain
+ dom['nbelem'] = nsk
+ classes = np.unique(Ys[d]) # get number of classes for this domain
+
+ # format classes to start from 0 for convenience
+ if np.min(classes) != 0:
+ Ys[d] = Ys[d] - np.min(classes)
+ classes = np.unique(Ys[d])
+
+ # build the corresponding D_1 and D_2 matrices
+ Dtmp1 = np.zeros((nbclasses, nsk))
+ Dtmp2 = np.zeros((nbclasses, nsk))
+
+ for c in classes:
+ nbelemperclass = np.sum(Ys[d] == c)
+ if nbelemperclass != 0:
+ Dtmp1[int(c), Ys[d] == c] = 1.
+ Dtmp2[int(c), Ys[d] == c] = 1. / (nbelemperclass)
+ D1.append(Dtmp1)
+ D2.append(Dtmp2)
+
+ # build the cost matrix and the Gibbs kernel
+ Mtmp = dist(Xs[d], Xt, metric=metric)
+ M.append(Mtmp)
+
+ Ktmp = np.empty(Mtmp.shape, dtype=Mtmp.dtype)
+ np.divide(Mtmp, -reg, out=Ktmp)
+ np.exp(Ktmp, out=Ktmp)
+ K.append(Ktmp)
+
+ # uniform target distribution
+ a = unif(np.shape(Xt)[0])
+
+ cpt = 0 # iterations count
+ err = 1
+ old_bary = np.ones((nbclasses))
+
+ while (err > stopThr and cpt < numItermax):
+
+ bary = np.zeros((nbclasses))
+
+ # update coupling matrices for marginal constraints w.r.t. uniform target distribution
+ for d in range(nbdomains):
+ K[d] = projC(K[d], a)
+ other = np.sum(K[d], axis=1)
+ bary = bary + np.log(np.dot(D1[d], other)) / nbdomains
+
+ bary = np.exp(bary)
+
+ # update coupling matrices for marginal constraints w.r.t. unknown proportions based on [Prop 4., 27]
+ for d in range(nbdomains):
+ new = np.dot(D2[d].T, bary)
+ K[d] = projR(K[d], new)
+
+ err = np.linalg.norm(bary - old_bary)
+ cpt = cpt + 1
+ old_bary = bary
+
+ if log:
+ log['err'].append(err)
+
+ if verbose:
+ if cpt % 200 == 0:
+ print('{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
+
+ bary = bary / np.sum(bary)
+
+ if log:
+ log['niter'] = cpt
+ log['M'] = M
+ log['D1'] = D1
+ log['D2'] = D2
+ log['gamma'] = K
+ return bary, log
+ else:
+ return bary
+
+
def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
numIterMax=10000, stopThr=1e-9, verbose=False,
log=False, **kwargs):
@@ -1583,7 +1747,8 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
return pi
-def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9,
+ verbose=False, log=False, **kwargs):
r'''
Solve the entropic regularization optimal transport problem from empirical
data and return the OT loss
@@ -1665,14 +1830,17 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', num
M = dist(X_s, X_t, metric=metric)
if log:
- sinkhorn_loss, log = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss, log = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log,
+ **kwargs)
return sinkhorn_loss, log
else:
- sinkhorn_loss = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log,
+ **kwargs)
return sinkhorn_loss
-def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9,
+ verbose=False, log=False, **kwargs):
r'''
Compute the sinkhorn divergence loss from empirical data
@@ -1758,11 +1926,14 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
.. [23] Aude Genevay, Gabriel Peyré, Marco Cuturi, Learning Generative Models with Sinkhorn Divergences, Proceedings of the Twenty-First International Conference on Artficial Intelligence and Statistics, (AISTATS) 21, 2018
'''
if log:
- sinkhorn_loss_ab, log_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss_ab, log_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax,
+ stopThr=1e-9, verbose=verbose, log=log, **kwargs)
- sinkhorn_loss_a, log_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss_a, log_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax,
+ stopThr=1e-9, verbose=verbose, log=log, **kwargs)
- sinkhorn_loss_b, log_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss_b, log_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax,
+ stopThr=1e-9, verbose=verbose, log=log, **kwargs)
sinkhorn_div = sinkhorn_loss_ab - 1 / 2 * (sinkhorn_loss_a + sinkhorn_loss_b)
@@ -1777,11 +1948,354 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
return max(0, sinkhorn_div), log
else:
- sinkhorn_loss_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9,
+ verbose=verbose, log=log, **kwargs)
- sinkhorn_loss_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9,
+ verbose=verbose, log=log, **kwargs)
- sinkhorn_loss_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9,
+ verbose=verbose, log=log, **kwargs)
sinkhorn_div = sinkhorn_loss_ab - 1 / 2 * (sinkhorn_loss_a + sinkhorn_loss_b)
return max(0, sinkhorn_div)
+
+
+def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, restricted=True,
+ maxiter=10000, maxfun=10000, pgtol=1e-09, verbose=False, log=False):
+ r""""
+ Screening Sinkhorn Algorithm for Regularized Optimal Transport
+
+ The function solves an approximate dual of Sinkhorn divergence [2] which is written as the following optimization problem:
+
+ ..math::
+ (u, v) = \argmin_{u, v} 1_{ns}^T B(u,v) 1_{nt} - <\kappa u, a> - <v/\kappa, b>
+
+ where B(u,v) = \diag(e^u) K \diag(e^v), with K = e^{-M/reg} and
+
+ s.t. e^{u_i} \geq \epsilon / \kappa, for all i \in {1, ..., ns}
+
+ e^{v_j} \geq \epsilon \kappa, for all j \in {1, ..., nt}
+
+ The parameters \kappa and \epsilon are determined w.r.t the couple number budget of points (ns_budget, nt_budget), see Equation (5) in [26]
+
+
+ Parameters
+ ----------
+ a : `numpy.ndarray`, shape=(ns,)
+ samples weights in the source domain
+
+ b : `numpy.ndarray`, shape=(nt,)
+ samples weights in the target domain
+
+ M : `numpy.ndarray`, shape=(ns, nt)
+ Cost matrix
+
+ reg : `float`
+ Level of the entropy regularisation
+
+ ns_budget : `int`, deafult=None
+ Number budget of points to be keeped in the source domain
+ If it is None then 50% of the source sample points will be keeped
+
+ nt_budget : `int`, deafult=None
+ Number budget of points to be keeped in the target domain
+ If it is None then 50% of the target sample points will be keeped
+
+ uniform : `bool`, default=False
+ If `True`, the source and target distribution are supposed to be uniform, i.e., a_i = 1 / ns and b_j = 1 / nt
+
+ restricted : `bool`, default=True
+ If `True`, a warm-start initialization for the L-BFGS-B solver
+ using a restricted Sinkhorn algorithm with at most 5 iterations
+
+ maxiter : `int`, default=10000
+ Maximum number of iterations in LBFGS solver
+
+ maxfun : `int`, default=10000
+ Maximum number of function evaluations in LBFGS solver
+
+ pgtol : `float`, default=1e-09
+ Final objective function accuracy in LBFGS solver
+
+ verbose : `bool`, default=False
+ If `True`, dispaly informations about the cardinals of the active sets and the paramerters kappa
+ and epsilon
+
+ Dependency
+ ----------
+ To gain more efficiency, screenkhorn needs to call the "Bottleneck" package (https://pypi.org/project/Bottleneck/)
+ in the screening pre-processing step. If Bottleneck isn't installed, the following error message appears:
+ "Bottleneck module doesn't exist. Install it from https://pypi.org/project/Bottleneck/"
+
+
+ Returns
+ -------
+ gamma : `numpy.ndarray`, shape=(ns, nt)
+ Screened optimal transportation matrix for the given parameters
+
+ log : `dict`, default=False
+ Log dictionary return only if log==True in parameters
+
+
+ References
+ -----------
+ .. [26] Alaya M. Z., Bérar M., Gasso G., Rakotomamonjy A. (2019). Screening Sinkhorn Algorithm for Regularized Optimal Transport (NIPS) 33, 2019
+
+ """
+ # check if bottleneck module exists
+ try:
+ import bottleneck
+ except ImportError:
+ warnings.warn(
+ "Bottleneck module is not installed. Install it from https://pypi.org/project/Bottleneck/ for better performance.")
+ bottleneck = np
+
+ a = np.asarray(a, dtype=np.float64)
+ b = np.asarray(b, dtype=np.float64)
+ M = np.asarray(M, dtype=np.float64)
+ ns, nt = M.shape
+
+ # by default, we keep only 50% of the sample data points
+ if ns_budget is None:
+ ns_budget = int(np.floor(0.5 * ns))
+ if nt_budget is None:
+ nt_budget = int(np.floor(0.5 * nt))
+
+ # calculate the Gibbs kernel
+ K = np.empty_like(M)
+ np.divide(M, -reg, out=K)
+ np.exp(K, out=K)
+
+ def projection(u, epsilon):
+ u[u <= epsilon] = epsilon
+ return u
+
+ # ----------------------------------------------------------------------------------------------------------------#
+ # Step 1: Screening pre-processing #
+ # ----------------------------------------------------------------------------------------------------------------#
+
+ if ns_budget == ns and nt_budget == nt:
+ # full number of budget points (ns, nt) = (ns_budget, nt_budget)
+ Isel = np.ones(ns, dtype=bool)
+ Jsel = np.ones(nt, dtype=bool)
+ epsilon = 0.0
+ kappa = 1.0
+
+ cst_u = 0.
+ cst_v = 0.
+
+ bounds_u = [(0.0, np.inf)] * ns
+ bounds_v = [(0.0, np.inf)] * nt
+
+ a_I = a
+ b_J = b
+ K_IJ = K
+ K_IJc = []
+ K_IcJ = []
+
+ vec_eps_IJc = np.zeros(nt)
+ vec_eps_IcJ = np.zeros(ns)
+
+ else:
+ # sum of rows and columns of K
+ K_sum_cols = K.sum(axis=1)
+ K_sum_rows = K.sum(axis=0)
+
+ if uniform:
+ if ns / ns_budget < 4:
+ aK_sort = np.sort(K_sum_cols)
+ epsilon_u_square = a[0] / aK_sort[ns_budget - 1]
+ else:
+ aK_sort = bottleneck.partition(K_sum_cols, ns_budget - 1)[ns_budget - 1]
+ epsilon_u_square = a[0] / aK_sort
+
+ if nt / nt_budget < 4:
+ bK_sort = np.sort(K_sum_rows)
+ epsilon_v_square = b[0] / bK_sort[nt_budget - 1]
+ else:
+ bK_sort = bottleneck.partition(K_sum_rows, nt_budget - 1)[nt_budget - 1]
+ epsilon_v_square = b[0] / bK_sort
+ else:
+ aK = a / K_sum_cols
+ bK = b / K_sum_rows
+
+ aK_sort = np.sort(aK)[::-1]
+ epsilon_u_square = aK_sort[ns_budget - 1]
+
+ bK_sort = np.sort(bK)[::-1]
+ epsilon_v_square = bK_sort[nt_budget - 1]
+
+ # active sets I and J (see Lemma 1 in [26])
+ Isel = a >= epsilon_u_square * K_sum_cols
+ Jsel = b >= epsilon_v_square * K_sum_rows
+
+ if sum(Isel) != ns_budget:
+ if uniform:
+ aK = a / K_sum_cols
+ aK_sort = np.sort(aK)[::-1]
+ epsilon_u_square = aK_sort[ns_budget - 1:ns_budget + 1].mean()
+ Isel = a >= epsilon_u_square * K_sum_cols
+ ns_budget = sum(Isel)
+
+ if sum(Jsel) != nt_budget:
+ if uniform:
+ bK = b / K_sum_rows
+ bK_sort = np.sort(bK)[::-1]
+ epsilon_v_square = bK_sort[nt_budget - 1:nt_budget + 1].mean()
+ Jsel = b >= epsilon_v_square * K_sum_rows
+ nt_budget = sum(Jsel)
+
+ epsilon = (epsilon_u_square * epsilon_v_square) ** (1 / 4)
+ kappa = (epsilon_v_square / epsilon_u_square) ** (1 / 2)
+
+ if verbose:
+ print("epsilon = %s\n" % epsilon)
+ print("kappa = %s\n" % kappa)
+ print('Cardinality of selected points: |Isel| = %s \t |Jsel| = %s \n' % (sum(Isel), sum(Jsel)))
+
+ # Ic, Jc: complementary of the active sets I and J
+ Ic = ~Isel
+ Jc = ~Jsel
+
+ K_IJ = K[np.ix_(Isel, Jsel)]
+ K_IcJ = K[np.ix_(Ic, Jsel)]
+ K_IJc = K[np.ix_(Isel, Jc)]
+
+ K_min = K_IJ.min()
+ if K_min == 0:
+ K_min = np.finfo(float).tiny
+
+ # a_I, b_J, a_Ic, b_Jc
+ a_I = a[Isel]
+ b_J = b[Jsel]
+ if not uniform:
+ a_I_min = a_I.min()
+ a_I_max = a_I.max()
+ b_J_max = b_J.max()
+ b_J_min = b_J.min()
+ else:
+ a_I_min = a_I[0]
+ a_I_max = a_I[0]
+ b_J_max = b_J[0]
+ b_J_min = b_J[0]
+
+ # box constraints in L-BFGS-B (see Proposition 1 in [26])
+ bounds_u = [(max(a_I_min / ((nt - nt_budget) * epsilon + nt_budget * (b_J_max / (
+ ns * epsilon * kappa * K_min))), epsilon / kappa), a_I_max / (nt * epsilon * K_min))] * ns_budget
+
+ bounds_v = [(
+ max(b_J_min / ((ns - ns_budget) * epsilon + ns_budget * (kappa * a_I_max / (nt * epsilon * K_min))),
+ epsilon * kappa), b_J_max / (ns * epsilon * K_min))] * nt_budget
+
+ # pre-calculated constants for the objective
+ vec_eps_IJc = epsilon * kappa * (K_IJc * np.ones(nt - nt_budget).reshape((1, -1))).sum(axis=1)
+ vec_eps_IcJ = (epsilon / kappa) * (np.ones(ns - ns_budget).reshape((-1, 1)) * K_IcJ).sum(axis=0)
+
+ # initialisation
+ u0 = np.full(ns_budget, (1. / ns_budget) + epsilon / kappa)
+ v0 = np.full(nt_budget, (1. / nt_budget) + epsilon * kappa)
+
+ # pre-calculed constants for Restricted Sinkhorn (see Algorithm 1 in supplementary of [26])
+ if restricted:
+ if ns_budget != ns or nt_budget != nt:
+ cst_u = kappa * epsilon * K_IJc.sum(axis=1)
+ cst_v = epsilon * K_IcJ.sum(axis=0) / kappa
+
+ cpt = 1
+ while cpt < 5: # 5 iterations
+ K_IJ_v = np.dot(K_IJ.T, u0) + cst_v
+ v0 = b_J / (kappa * K_IJ_v)
+ KIJ_u = np.dot(K_IJ, v0) + cst_u
+ u0 = (kappa * a_I) / KIJ_u
+ cpt += 1
+
+ u0 = projection(u0, epsilon / kappa)
+ v0 = projection(v0, epsilon * kappa)
+
+ else:
+ u0 = u0
+ v0 = v0
+
+ def restricted_sinkhorn(usc, vsc, max_iter=5):
+ """
+ Restricted Sinkhorn Algorithm as a warm-start initialized point for L-BFGS-B (see Algorithm 1 in supplementary of [26])
+ """
+ cpt = 1
+ while cpt < max_iter:
+ K_IJ_v = np.dot(K_IJ.T, usc) + cst_v
+ vsc = b_J / (kappa * K_IJ_v)
+ KIJ_u = np.dot(K_IJ, vsc) + cst_u
+ usc = (kappa * a_I) / KIJ_u
+ cpt += 1
+
+ usc = projection(usc, epsilon / kappa)
+ vsc = projection(vsc, epsilon * kappa)
+
+ return usc, vsc
+
+ def screened_obj(usc, vsc):
+ part_IJ = np.dot(np.dot(usc, K_IJ), vsc) - kappa * np.dot(a_I, np.log(usc)) - (1. / kappa) * np.dot(b_J,
+ np.log(vsc))
+ part_IJc = np.dot(usc, vec_eps_IJc)
+ part_IcJ = np.dot(vec_eps_IcJ, vsc)
+ psi_epsilon = part_IJ + part_IJc + part_IcJ
+ return psi_epsilon
+
+ def screened_grad(usc, vsc):
+ # gradients of Psi_(kappa,epsilon) w.r.t u and v
+ grad_u = np.dot(K_IJ, vsc) + vec_eps_IJc - kappa * a_I / usc
+ grad_v = np.dot(K_IJ.T, usc) + vec_eps_IcJ - (1. / kappa) * b_J / vsc
+ return grad_u, grad_v
+
+ def bfgspost(theta):
+ u = theta[:ns_budget]
+ v = theta[ns_budget:]
+ # objective
+ f = screened_obj(u, v)
+ # gradient
+ g_u, g_v = screened_grad(u, v)
+ g = np.hstack([g_u, g_v])
+ return f, g
+
+ # ----------------------------------------------------------------------------------------------------------------#
+ # Step 2: L-BFGS-B solver #
+ # ----------------------------------------------------------------------------------------------------------------#
+
+ u0, v0 = restricted_sinkhorn(u0, v0)
+ theta0 = np.hstack([u0, v0])
+
+ bounds = bounds_u + bounds_v # constraint bounds
+
+ def obj(theta):
+ return bfgspost(theta)
+
+ theta, _, _ = fmin_l_bfgs_b(func=obj,
+ x0=theta0,
+ bounds=bounds,
+ maxfun=maxfun,
+ pgtol=pgtol,
+ maxiter=maxiter)
+
+ usc = theta[:ns_budget]
+ vsc = theta[ns_budget:]
+
+ usc_full = np.full(ns, epsilon / kappa)
+ vsc_full = np.full(nt, epsilon * kappa)
+ usc_full[Isel] = usc
+ vsc_full[Jsel] = vsc
+
+ if log:
+ log = {}
+ log['u'] = usc_full
+ log['v'] = vsc_full
+ log['Isel'] = Isel
+ log['Jsel'] = Jsel
+
+ gamma = usc_full[:, None] * K * vsc_full[None, :]
+ gamma = gamma / gamma.sum()
+
+ if log:
+ return gamma, log
+ else:
+ return gamma
diff --git a/ot/da.py b/ot/da.py
index 108a38d..b881a8b 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -7,15 +7,16 @@ Domain adaptation with optimal transport
# Nicolas Courty <ncourty@irisa.fr>
# Michael Perrot <michael.perrot@univ-st-etienne.fr>
# Nathalie Gayraud <nat.gayraud@gmail.com>
+# Ievgen Redko <ievgen.redko@univ-st-etienne.fr>
#
# License: MIT License
import numpy as np
import scipy.linalg as linalg
-from .bregman import sinkhorn
+from .bregman import sinkhorn, jcpot_barycenter
from .lp import emd
-from .utils import unif, dist, kernel, cost_normalization
+from .utils import unif, dist, kernel, cost_normalization, label_normalization, laplacian, dots
from .utils import check_params, BaseEstimator
from .unbalanced import sinkhorn_unbalanced
from .optim import cg
@@ -127,7 +128,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
W = np.ones(M.shape)
for (i, c) in enumerate(classes):
majs = np.sum(transp[indices_labels[i]], axis=0)
- majs = p * ((majs + epsilon)**(p - 1))
+ majs = p * ((majs + epsilon) ** (p - 1))
W[indices_labels[i]] = majs
return transp
@@ -359,8 +360,8 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False,
def loss(L, G):
"""Compute full loss"""
- return np.sum((xs1.dot(L) - ns * G.dot(xt))**2) + mu * \
- np.sum(G * M) + eta * np.sum(sel(L - I0)**2)
+ return np.sum((xs1.dot(L) - ns * G.dot(xt)) ** 2) + mu * \
+ np.sum(G * M) + eta * np.sum(sel(L - I0) ** 2)
def solve_L(G):
""" solve L problem with fixed G (least square)"""
@@ -372,10 +373,11 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False,
xsi = xs1.dot(L)
def f(G):
- return np.sum((xsi - ns * G.dot(xt))**2)
+ return np.sum((xsi - ns * G.dot(xt)) ** 2)
def df(G):
return -2 * ns * (xsi - ns * G.dot(xt)).dot(xt.T)
+
G = cg(a, b, M, 1.0 / mu, f, df, G0=G0,
numItermax=numInnerItermax, stopThr=stopInnerThr)
return G
@@ -562,7 +564,7 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
def loss(L, G):
"""Compute full loss"""
- return np.sum((K1.dot(L) - ns * G.dot(xt))**2) + mu * \
+ return np.sum((K1.dot(L) - ns * G.dot(xt)) ** 2) + mu * \
np.sum(G * M) + eta * np.trace(L.T.dot(Kreg).dot(L))
def solve_L_nobias(G):
@@ -580,10 +582,11 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
xsi = K1.dot(L)
def f(G):
- return np.sum((xsi - ns * G.dot(xt))**2)
+ return np.sum((xsi - ns * G.dot(xt)) ** 2)
def df(G):
return -2 * ns * (xsi - ns * G.dot(xt)).dot(xt.T)
+
G = cg(a, b, M, 1.0 / mu, f, df, G0=G0,
numItermax=numInnerItermax, stopThr=stopInnerThr)
return G
@@ -745,6 +748,139 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None,
return A, b
+def emd_laplace(a, b, xs, xt, M, sim='knn', sim_param=None, reg='pos', eta=1, alpha=.5,
+ numItermax=100, stopThr=1e-9, numInnerItermax=100000,
+ stopInnerThr=1e-9, log=False, verbose=False):
+ r"""Solve the optimal transport problem (OT) with Laplacian regularization
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + eta\Omega_\alpha(\gamma)
+
+ s.t.\ \gamma 1 = a
+
+ \gamma^T 1= b
+
+ \gamma\geq 0
+
+ where:
+
+ - a and b are source and target weights (sum to 1)
+ - xs and xt are source and target samples
+ - M is the (ns,nt) metric cost matrix
+ - :math:`\Omega_\alpha` is the Laplacian regularization term
+ :math:`\Omega_\alpha = (1-\alpha)/n_s^2\sum_{i,j}S^s_{i,j}\|T(\mathbf{x}^s_i)-T(\mathbf{x}^s_j)\|^2+\alpha/n_t^2\sum_{i,j}S^t_{i,j}^'\|T(\mathbf{x}^t_i)-T(\mathbf{x}^t_j)\|^2`
+ with :math:`S^s_{i,j}, S^t_{i,j}` denoting source and target similarity matrices and :math:`T(\cdot)` being a barycentric mapping
+
+ The algorithm used for solving the problem is the conditional gradient algorithm as proposed in [5].
+
+ Parameters
+ ----------
+ a : np.ndarray (ns,)
+ samples weights in the source domain
+ b : np.ndarray (nt,)
+ samples weights in the target domain
+ xs : np.ndarray (ns,d)
+ samples in the source domain
+ xt : np.ndarray (nt,d)
+ samples in the target domain
+ M : np.ndarray (ns,nt)
+ loss matrix
+ sim : string, optional
+ Type of similarity ('knn' or 'gauss') used to construct the Laplacian.
+ sim_param : int or float, optional
+ Parameter (number of the nearest neighbors for sim='knn'
+ or bandwidth for sim='gauss') used to compute the Laplacian.
+ reg : string
+ Type of Laplacian regularization
+ eta : float
+ Regularization term for Laplacian regularization
+ alpha : float
+ Regularization term for source domain's importance in regularization
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshold on error (inner emd solver) (>0)
+ numInnerItermax : int, optional
+ Max number of iterations (inner CG solver)
+ stopInnerThr : float, optional
+ Stop threshold on error (inner CG solver) (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+ Returns
+ -------
+ gamma : (ns x nt) ndarray
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary return only if log==True in parameters
+
+
+ References
+ ----------
+
+ .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE
+ Transactions on Pattern Analysis and Machine Intelligence ,
+ vol.PP, no.99, pp.1-1
+ .. [30] R. Flamary, N. Courty, D. Tuia, A. Rakotomamonjy,
+ "Optimal transport with Laplacian regularization: Applications to domain adaptation and shape matching,"
+ in NIPS Workshop on Optimal Transport and Machine Learning OTML, 2014.
+
+ See Also
+ --------
+ ot.lp.emd : Unregularized OT
+ ot.optim.cg : General regularized OT
+
+ """
+ if not isinstance(sim_param, (int, float, type(None))):
+ raise ValueError(
+ 'Similarity parameter should be an int or a float. Got {type} instead.'.format(type=type(sim_param).__name__))
+
+ if sim == 'gauss':
+ if sim_param is None:
+ sim_param = 1 / (2 * (np.mean(dist(xs, xs, 'sqeuclidean')) ** 2))
+ sS = kernel(xs, xs, method=sim, sigma=sim_param)
+ sT = kernel(xt, xt, method=sim, sigma=sim_param)
+
+ elif sim == 'knn':
+ if sim_param is None:
+ sim_param = 3
+
+ from sklearn.neighbors import kneighbors_graph
+
+ sS = kneighbors_graph(X=xs, n_neighbors=int(sim_param)).toarray()
+ sS = (sS + sS.T) / 2
+ sT = kneighbors_graph(xt, n_neighbors=int(sim_param)).toarray()
+ sT = (sT + sT.T) / 2
+ else:
+ raise ValueError('Unknown similarity type {sim}. Currently supported similarity types are "knn" and "gauss".'.format(sim=sim))
+
+ lS = laplacian(sS)
+ lT = laplacian(sT)
+
+ def f(G):
+ return alpha * np.trace(np.dot(xt.T, np.dot(G.T, np.dot(lS, np.dot(G, xt))))) \
+ + (1 - alpha) * np.trace(np.dot(xs.T, np.dot(G, np.dot(lT, np.dot(G.T, xs)))))
+
+ ls2 = lS + lS.T
+ lt2 = lT + lT.T
+ xt2 = np.dot(xt, xt.T)
+
+ if reg == 'disp':
+ Cs = -eta * alpha / xs.shape[0] * dots(ls2, xs, xt.T)
+ Ct = -eta * (1 - alpha) / xt.shape[0] * dots(xs, xt.T, lt2)
+ M = M + Cs + Ct
+
+ def df(G):
+ return alpha * np.dot(ls2, np.dot(G, xt2))\
+ + (1 - alpha) * np.dot(xs, np.dot(xs.T, np.dot(G, lt2)))
+
+ return cg(a, b, M, reg=eta, f=f, df=df, G0=None, numItermax=numItermax, numItermaxEmd=numInnerItermax,
+ stopThr=stopThr, stopThr2=stopInnerThr, verbose=verbose, log=log)
+
+
def distribution_estimation_uniform(X):
"""estimates a uniform distribution from an array of samples X
@@ -772,7 +908,8 @@ class BaseTransport(BaseEstimator):
at the class level in their ``__init__`` as explicit keyword
arguments (no ``*args`` or ``**kwargs``).
- fit method should:
+ the fit method should:
+
- estimate a cost matrix and store it in a `cost_` attribute
- estimate a coupling matrix and store it in a `coupling_`
attribute
@@ -783,6 +920,9 @@ class BaseTransport(BaseEstimator):
transform method should always get as input a Xs parameter
inverse_transform method should always get as input a Xt parameter
+
+ transform_labels method should always get as input a ys parameter
+ inverse_transform_labels method should always get as input a yt parameter
"""
def fit(self, Xs=None, ys=None, Xt=None, yt=None):
@@ -794,7 +934,7 @@ class BaseTransport(BaseEstimator):
Xs : array-like, shape (n_source_samples, n_features)
The training input samples.
ys : array-like, shape (n_source_samples,)
- The class labels
+ The training class labels
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
@@ -855,7 +995,7 @@ class BaseTransport(BaseEstimator):
Xs : array-like, shape (n_source_samples, n_features)
The training input samples.
ys : array-like, shape (n_source_samples,)
- The class labels
+ The class labels for training samples
Xt : array-like, shape (n_target_samples, n_features)
The training input samples.
yt : array-like, shape (n_target_samples,)
@@ -879,13 +1019,13 @@ class BaseTransport(BaseEstimator):
Parameters
----------
Xs : array-like, shape (n_source_samples, n_features)
- The training input samples.
+ The source input samples.
ys : array-like, shape (n_source_samples,)
- The class labels
+ The class labels for source samples
Xt : array-like, shape (n_target_samples, n_features)
- The training input samples.
+ The target input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
+ The class labels for target. If some target samples are unlabeled, fill the
yt's elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
@@ -921,7 +1061,6 @@ class BaseTransport(BaseEstimator):
transp_Xs = []
for bi in batch_ind:
-
# get the nearest neighbor in the source domain
D0 = dist(Xs[bi], self.xs_)
idx = np.argmin(D0, axis=1)
@@ -941,20 +1080,64 @@ class BaseTransport(BaseEstimator):
return transp_Xs
+ def transform_labels(self, ys=None):
+ """Propagate source labels ys to obtain estimated target labels as in [27]
+
+ Parameters
+ ----------
+ ys : array-like, shape (n_source_samples,)
+ The source class labels
+
+ Returns
+ -------
+ transp_ys : array-like, shape (n_target_samples, nb_classes)
+ Estimated soft target labels.
+
+ References
+ ----------
+
+ .. [27] Ievgen Redko, Nicolas Courty, Rémi Flamary, Devis Tuia
+ "Optimal transport for multi-source domain adaptation under target shift",
+ International Conference on Artificial Intelligence and Statistics (AISTATS), 2019.
+
+ """
+
+ # check the necessary inputs parameters are here
+ if check_params(ys=ys):
+
+ ysTemp = label_normalization(np.copy(ys))
+ classes = np.unique(ysTemp)
+ n = len(classes)
+ D1 = np.zeros((n, len(ysTemp)))
+
+ # perform label propagation
+ transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None]
+
+ # set nans to 0
+ transp[~ np.isfinite(transp)] = 0
+
+ for c in classes:
+ D1[int(c), ysTemp == c] = 1
+
+ # compute propagated labels
+ transp_ys = np.dot(D1, transp)
+
+ return transp_ys.T
+
def inverse_transform(self, Xs=None, ys=None, Xt=None, yt=None,
batch_size=128):
- """Transports target samples Xt onto target samples Xs
+ """Transports target samples Xt onto source samples Xs
Parameters
----------
Xs : array-like, shape (n_source_samples, n_features)
- The training input samples.
+ The source input samples.
ys : array-like, shape (n_source_samples,)
- The class labels
+ The source class labels
Xt : array-like, shape (n_target_samples, n_features)
- The training input samples.
+ The target input samples.
yt : array-like, shape (n_target_samples,)
- The class labels. If some target samples are unlabeled, fill the
+ The target class labels. If some target samples are unlabeled, fill the
yt's elements with -1.
Warning: Note that, due to this convention -1 cannot be used as a
@@ -990,7 +1173,6 @@ class BaseTransport(BaseEstimator):
transp_Xt = []
for bi in batch_ind:
-
D0 = dist(Xt[bi], self.xt_)
idx = np.argmin(D0, axis=1)
@@ -1009,6 +1191,41 @@ class BaseTransport(BaseEstimator):
return transp_Xt
+ def inverse_transform_labels(self, yt=None):
+ """Propagate target labels yt to obtain estimated source labels ys
+
+ Parameters
+ ----------
+ yt : array-like, shape (n_target_samples,)
+
+ Returns
+ -------
+ transp_ys : array-like, shape (n_source_samples, nb_classes)
+ Estimated soft source labels.
+ """
+
+ # check the necessary inputs parameters are here
+ if check_params(yt=yt):
+
+ ytTemp = label_normalization(np.copy(yt))
+ classes = np.unique(ytTemp)
+ n = len(classes)
+ D1 = np.zeros((n, len(ytTemp)))
+
+ # perform label propagation
+ transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None]
+
+ # set nans to 0
+ transp[~ np.isfinite(transp)] = 0
+
+ for c in classes:
+ D1[int(c), ytTemp == c] = 1
+
+ # compute propagated samples
+ transp_ys = np.dot(D1, transp.T)
+
+ return transp_ys.T
+
class LinearTransport(BaseTransport):
""" OT linear operator between empirical distributions
@@ -1055,7 +1272,6 @@ class LinearTransport(BaseTransport):
def __init__(self, reg=1e-8, bias=True, log=False,
distribution_estimation=distribution_estimation_uniform):
-
self.bias = bias
self.log = log
self.reg = reg
@@ -1136,7 +1352,6 @@ class LinearTransport(BaseTransport):
# check the necessary inputs parameters are here
if check_params(Xs=Xs):
-
transp_Xs = Xs.dot(self.A_) + self.B_
return transp_Xs
@@ -1170,7 +1385,6 @@ class LinearTransport(BaseTransport):
# check the necessary inputs parameters are here
if check_params(Xt=Xt):
-
transp_Xt = Xt.dot(self.A1_) + self.B1_
return transp_Xt
@@ -1224,6 +1438,9 @@ class SinkhornTransport(BaseTransport):
.. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal
Transport, Advances in Neural Information Processing Systems (NIPS)
26, 2013
+ .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
+ Regularized discrete optimal transport. SIAM Journal on Imaging
+ Sciences, 7(3), 1853-1882.
"""
def __init__(self, reg_e=1., max_iter=1000,
@@ -1231,7 +1448,6 @@ class SinkhornTransport(BaseTransport):
metric="sqeuclidean", norm=None,
distribution_estimation=distribution_estimation_uniform,
out_of_sample_map='ferradans', limit_max=np.infty):
-
self.reg_e = reg_e
self.max_iter = max_iter
self.tol = tol
@@ -1323,13 +1539,15 @@ class EMDTransport(BaseTransport):
.. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
"Optimal Transport for Domain Adaptation," in IEEE Transactions
on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
+ .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
+ Regularized discrete optimal transport. SIAM Journal on Imaging
+ Sciences, 7(3), 1853-1882.
"""
def __init__(self, metric="sqeuclidean", norm=None, log=False,
distribution_estimation=distribution_estimation_uniform,
out_of_sample_map='ferradans', limit_max=10,
max_iter=100000):
-
self.metric = metric
self.norm = norm
self.log = log
@@ -1431,7 +1649,9 @@ class SinkhornLpl1Transport(BaseTransport):
.. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
Generalized conditional gradient: analysis of convergence
and applications. arXiv preprint arXiv:1510.06567.
-
+ .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
+ Regularized discrete optimal transport. SIAM Journal on Imaging
+ Sciences, 7(3), 1853-1882.
"""
def __init__(self, reg_e=1., reg_cl=0.1,
@@ -1440,7 +1660,6 @@ class SinkhornLpl1Transport(BaseTransport):
metric="sqeuclidean", norm=None,
distribution_estimation=distribution_estimation_uniform,
out_of_sample_map='ferradans', limit_max=np.infty):
-
self.reg_e = reg_e
self.reg_cl = reg_cl
self.max_iter = max_iter
@@ -1481,7 +1700,6 @@ class SinkhornLpl1Transport(BaseTransport):
# check the necessary inputs parameters are here
if check_params(Xs=Xs, Xt=Xt, ys=ys):
-
super(SinkhornLpl1Transport, self).fit(Xs, ys, Xt, yt)
returned_ = sinkhorn_lpl1_mm(
@@ -1499,6 +1717,127 @@ class SinkhornLpl1Transport(BaseTransport):
return self
+class EMDLaplaceTransport(BaseTransport):
+
+ """Domain Adapatation OT method based on Earth Mover's Distance with Laplacian regularization
+
+ Parameters
+ ----------
+ reg_type : string optional (default='pos')
+ Type of the regularization term: 'pos' and 'disp' for
+ regularization term defined in [2] and [6], respectively.
+ reg_lap : float, optional (default=1)
+ Laplacian regularization parameter
+ reg_src : float, optional (default=0.5)
+ Source relative importance in regularization
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ norm : string, optional (default=None)
+ If given, normalize the ground metric to avoid numerical errors that
+ can occur with large metric values.
+ similarity : string, optional (default="knn")
+ The similarity to use either knn or gaussian
+ similarity_param : int or float, optional (default=None)
+ Parameter for the similarity: number of nearest neighbors or bandwidth
+ if similarity="knn" or "gaussian", respectively. If None is provided,
+ it is set to 3 or the average pairwise squared Euclidean distance, respectively.
+ max_iter : int, optional (default=100)
+ Max number of BCD iterations
+ tol : float, optional (default=1e-5)
+ Stop threshold on relative loss decrease (>0)
+ max_inner_iter : int, optional (default=10)
+ Max number of iterations (inner CG solver)
+ inner_tol : float, optional (default=1e-6)
+ Stop threshold on error (inner CG solver) (>0)
+ log : int, optional (default=False)
+ Controls the logs of the optimization algorithm
+ distribution_estimation : callable, optional (defaults to the uniform)
+ The kind of distribution estimation to employ
+ out_of_sample_map : string, optional (default="ferradans")
+ The kind of out of sample mapping to apply to transport samples
+ from a domain into another one. Currently the only possible option is
+ "ferradans" which uses the method proposed in [6].
+
+ Attributes
+ ----------
+ coupling_ : array-like, shape (n_source_samples, n_target_samples)
+ The optimal coupling
+
+ References
+ ----------
+ .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE Transactions
+ on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
+ .. [2] R. Flamary, N. Courty, D. Tuia, A. Rakotomamonjy,
+ "Optimal transport with Laplacian regularization: Applications to domain adaptation and shape matching,"
+ in NIPS Workshop on Optimal Transport and Machine Learning OTML, 2014.
+ .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
+ Regularized discrete optimal transport. SIAM Journal on Imaging
+ Sciences, 7(3), 1853-1882.
+ """
+
+ def __init__(self, reg_type='pos', reg_lap=1., reg_src=1., metric="sqeuclidean",
+ norm=None, similarity="knn", similarity_param=None, max_iter=100, tol=1e-9,
+ max_inner_iter=100000, inner_tol=1e-9, log=False, verbose=False,
+ distribution_estimation=distribution_estimation_uniform,
+ out_of_sample_map='ferradans'):
+ self.reg = reg_type
+ self.reg_lap = reg_lap
+ self.reg_src = reg_src
+ self.metric = metric
+ self.norm = norm
+ self.similarity = similarity
+ self.sim_param = similarity_param
+ self.max_iter = max_iter
+ self.tol = tol
+ self.max_inner_iter = max_inner_iter
+ self.inner_tol = inner_tol
+ self.log = log
+ self.verbose = verbose
+ self.distribution_estimation = distribution_estimation
+ self.out_of_sample_map = out_of_sample_map
+
+ def fit(self, Xs, ys=None, Xt=None, yt=None):
+ """Build a coupling matrix from source and target sets of samples
+ (Xs, ys) and (Xt, yt)
+
+ Parameters
+ ----------
+ Xs : array-like, shape (n_source_samples, n_features)
+ The training input samples.
+ ys : array-like, shape (n_source_samples,)
+ The class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
+
+ Returns
+ -------
+ self : object
+ Returns self.
+ """
+
+ super(EMDLaplaceTransport, self).fit(Xs, ys, Xt, yt)
+
+ returned_ = emd_laplace(a=self.mu_s, b=self.mu_t, xs=self.xs_,
+ xt=self.xt_, M=self.cost_, sim=self.similarity, sim_param=self.sim_param, reg=self.reg, eta=self.reg_lap,
+ alpha=self.reg_src, numItermax=self.max_iter, stopThr=self.tol, numInnerItermax=self.max_inner_iter,
+ stopInnerThr=self.inner_tol, log=self.log, verbose=self.verbose)
+
+ # coupling estimation
+ if self.log:
+ self.coupling_, self.log_ = returned_
+ else:
+ self.coupling_ = returned_
+ self.log_ = dict()
+ return self
+
+
class SinkhornL1l2Transport(BaseTransport):
"""Domain Adapatation OT method based on sinkhorn algorithm +
@@ -1554,7 +1893,9 @@ class SinkhornL1l2Transport(BaseTransport):
.. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
Generalized conditional gradient: analysis of convergence
and applications. arXiv preprint arXiv:1510.06567.
-
+ .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
+ Regularized discrete optimal transport. SIAM Journal on Imaging
+ Sciences, 7(3), 1853-1882.
"""
def __init__(self, reg_e=1., reg_cl=0.1,
@@ -1563,7 +1904,6 @@ class SinkhornL1l2Transport(BaseTransport):
metric="sqeuclidean", norm=None,
distribution_estimation=distribution_estimation_uniform,
out_of_sample_map='ferradans', limit_max=10):
-
self.reg_e = reg_e
self.reg_cl = reg_cl
self.max_iter = max_iter
@@ -1685,7 +2025,6 @@ class MappingTransport(BaseEstimator):
norm=None, kernel="linear", sigma=1, max_iter=100, tol=1e-5,
max_inner_iter=10, inner_tol=1e-6, log=False, verbose=False,
verbose2=False):
-
self.metric = metric
self.norm = norm
self.mu = mu
@@ -1848,7 +2187,9 @@ class UnbalancedSinkhornTransport(BaseTransport):
.. [1] Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X. (2016).
Scaling algorithms for unbalanced transport problems. arXiv preprint
arXiv:1607.05816.
-
+ .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
+ Regularized discrete optimal transport. SIAM Journal on Imaging
+ Sciences, 7(3), 1853-1882.
"""
def __init__(self, reg_e=1., reg_m=0.1, method='sinkhorn',
@@ -1856,7 +2197,6 @@ class UnbalancedSinkhornTransport(BaseTransport):
metric="sqeuclidean", norm=None,
distribution_estimation=distribution_estimation_uniform,
out_of_sample_map='ferradans', limit_max=10):
-
self.reg_e = reg_e
self.reg_m = reg_m
self.method = method
@@ -1914,3 +2254,267 @@ class UnbalancedSinkhornTransport(BaseTransport):
self.log_ = dict()
return self
+
+
+class JCPOTTransport(BaseTransport):
+
+ """Domain Adapatation OT method for multi-source target shift based on Wasserstein barycenter algorithm.
+
+ Parameters
+ ----------
+ reg_e : float, optional (default=1)
+ Entropic regularization parameter
+ max_iter : int, float, optional (default=10)
+ The minimum number of iteration before stopping the optimization
+ algorithm if no it has not converged
+ tol : float, optional (default=10e-9)
+ Stop threshold on error (inner sinkhorn solver) (>0)
+ verbose : bool, optional (default=False)
+ Controls the verbosity of the optimization algorithm
+ log : bool, optional (default=False)
+ Controls the logs of the optimization algorithm
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ norm : string, optional (default=None)
+ If given, normalize the ground metric to avoid numerical errors that
+ can occur with large metric values.
+ distribution_estimation : callable, optional (defaults to the uniform)
+ The kind of distribution estimation to employ
+ out_of_sample_map : string, optional (default="ferradans")
+ The kind of out of sample mapping to apply to transport samples
+ from a domain into another one. Currently the only possible option is
+ "ferradans" which uses the method proposed in [6].
+
+ Attributes
+ ----------
+ coupling_ : list of array-like objects, shape K x (n_source_samples, n_target_samples)
+ A set of optimal couplings between each source domain and the target domain
+ proportions_ : array-like, shape (n_classes,)
+ Estimated class proportions in the target domain
+ log_ : dictionary
+ The dictionary of log, empty dic if parameter log is not True
+
+ References
+ ----------
+
+ .. [1] Ievgen Redko, Nicolas Courty, Rémi Flamary, Devis Tuia
+ "Optimal transport for multi-source domain adaptation under target shift",
+ International Conference on Artificial Intelligence and Statistics (AISTATS),
+ vol. 89, p.849-858, 2019.
+
+ .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
+ Regularized discrete optimal transport. SIAM Journal on Imaging
+ Sciences, 7(3), 1853-1882.
+
+
+ """
+
+ def __init__(self, reg_e=.1, max_iter=10,
+ tol=10e-9, verbose=False, log=False,
+ metric="sqeuclidean",
+ out_of_sample_map='ferradans'):
+ self.reg_e = reg_e
+ self.max_iter = max_iter
+ self.tol = tol
+ self.verbose = verbose
+ self.log = log
+ self.metric = metric
+ self.out_of_sample_map = out_of_sample_map
+
+ def fit(self, Xs, ys=None, Xt=None, yt=None):
+ """Building coupling matrices from a list of source and target sets of samples
+ (Xs, ys) and (Xt, yt)
+
+ Parameters
+ ----------
+ Xs : list of K array-like objects, shape K x (nk_source_samples, n_features)
+ A list of the training input samples.
+ ys : list of K array-like objects, shape K x (nk_source_samples,)
+ A list of the class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
+
+ Returns
+ -------
+ self : object
+ Returns self.
+ """
+
+ # check the necessary inputs parameters are here
+ if check_params(Xs=Xs, Xt=Xt, ys=ys):
+
+ self.xs_ = Xs
+ self.xt_ = Xt
+
+ returned_ = jcpot_barycenter(Xs=Xs, Ys=ys, Xt=Xt, reg=self.reg_e,
+ metric=self.metric, distrinumItermax=self.max_iter, stopThr=self.tol,
+ verbose=self.verbose, log=True)
+
+ self.coupling_ = returned_[1]['gamma']
+
+ # deal with the value of log
+ if self.log:
+ self.proportions_, self.log_ = returned_
+ else:
+ self.proportions_ = returned_
+ self.log_ = dict()
+
+ return self
+
+ def transform(self, Xs=None, ys=None, Xt=None, yt=None, batch_size=128):
+ """Transports source samples Xs onto target ones Xt
+
+ Parameters
+ ----------
+ Xs : list of K array-like objects, shape K x (nk_source_samples, n_features)
+ A list of the training input samples.
+ ys : list of K array-like objects, shape K x (nk_source_samples,)
+ A list of the class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
+ batch_size : int, optional (default=128)
+ The batch size for out of sample inverse transform
+ """
+
+ transp_Xs = []
+
+ # check the necessary inputs parameters are here
+ if check_params(Xs=Xs):
+
+ if all([np.allclose(x, y) for x, y in zip(self.xs_, Xs)]):
+
+ # perform standard barycentric mapping for each source domain
+
+ for coupling in self.coupling_:
+ transp = coupling / np.sum(coupling, 1)[:, None]
+
+ # set nans to 0
+ transp[~ np.isfinite(transp)] = 0
+
+ # compute transported samples
+ transp_Xs.append(np.dot(transp, self.xt_))
+ else:
+
+ # perform out of sample mapping
+ indices = np.arange(Xs.shape[0])
+ batch_ind = [
+ indices[i:i + batch_size]
+ for i in range(0, len(indices), batch_size)]
+
+ transp_Xs = []
+
+ for bi in batch_ind:
+ transp_Xs_ = []
+
+ # get the nearest neighbor in the sources domains
+ xs = np.concatenate(self.xs_, axis=0)
+ idx = np.argmin(dist(Xs[bi], xs), axis=1)
+
+ # transport the source samples
+ for coupling in self.coupling_:
+ transp = coupling / np.sum(
+ coupling, 1)[:, None]
+ transp[~ np.isfinite(transp)] = 0
+ transp_Xs_.append(np.dot(transp, self.xt_))
+
+ transp_Xs_ = np.concatenate(transp_Xs_, axis=0)
+
+ # define the transported points
+ transp_Xs_ = transp_Xs_[idx, :] + Xs[bi] - xs[idx, :]
+ transp_Xs.append(transp_Xs_)
+
+ transp_Xs = np.concatenate(transp_Xs, axis=0)
+
+ return transp_Xs
+
+ def transform_labels(self, ys=None):
+ """Propagate source labels ys to obtain target labels as in [27]
+
+ Parameters
+ ----------
+ ys : list of K array-like objects, shape K x (nk_source_samples,)
+ A list of the class labels
+
+ Returns
+ -------
+ yt : array-like, shape (n_target_samples, nb_classes)
+ Estimated soft target labels.
+ """
+
+ # check the necessary inputs parameters are here
+ if check_params(ys=ys):
+ yt = np.zeros((len(np.unique(np.concatenate(ys))), self.xt_.shape[0]))
+ for i in range(len(ys)):
+ ysTemp = label_normalization(np.copy(ys[i]))
+ classes = np.unique(ysTemp)
+ n = len(classes)
+ ns = len(ysTemp)
+
+ # perform label propagation
+ transp = self.coupling_[i] / np.sum(self.coupling_[i], 1)[:, None]
+
+ # set nans to 0
+ transp[~ np.isfinite(transp)] = 0
+
+ if self.log:
+ D1 = self.log_['D1'][i]
+ else:
+ D1 = np.zeros((n, ns))
+
+ for c in classes:
+ D1[int(c), ysTemp == c] = 1
+
+ # compute propagated labels
+ yt = yt + np.dot(D1, transp) / len(ys)
+
+ return yt.T
+
+ def inverse_transform_labels(self, yt=None):
+ """Propagate source labels ys to obtain target labels
+
+ Parameters
+ ----------
+ yt : array-like, shape (n_source_samples,)
+ The target class labels
+
+ Returns
+ -------
+ transp_ys : list of K array-like objects, shape K x (nk_source_samples, nb_classes)
+ A list of estimated soft source labels
+ """
+
+ # check the necessary inputs parameters are here
+ if check_params(yt=yt):
+ transp_ys = []
+ ytTemp = label_normalization(np.copy(yt))
+ classes = np.unique(ytTemp)
+ n = len(classes)
+ D1 = np.zeros((n, len(ytTemp)))
+
+ for c in classes:
+ D1[int(c), ytTemp == c] = 1
+
+ for i in range(len(self.xs_)):
+
+ # perform label propagation
+ transp = self.coupling_[i] / np.sum(self.coupling_[i], 1)[:, None]
+
+ # set nans to 0
+ transp[~ np.isfinite(transp)] = 0
+
+ # compute propagated labels
+ transp_ys.append(np.dot(D1, transp.T).T)
+
+ return transp_ys
diff --git a/ot/datasets.py b/ot/datasets.py
index ba0cfd9..b86ef3b 100644
--- a/ot/datasets.py
+++ b/ot/datasets.py
@@ -1,5 +1,5 @@
"""
-Simple example datasets for OT
+Simple example datasets
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
@@ -30,7 +30,7 @@ def make_1D_gauss(n, m, s):
1D histogram for a gaussian distribution
"""
x = np.arange(n, dtype=np.float64)
- h = np.exp(-(x - m)**2 / (2 * s**2))
+ h = np.exp(-(x - m) ** 2 / (2 * s ** 2))
return h / h.sum()
@@ -80,7 +80,7 @@ def get_2D_samples_gauss(n, m, sigma, random_state=None):
return make_2D_samples_gauss(n, m, sigma, random_state=None)
-def make_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs):
+def make_data_classif(dataset, n, nz=.5, theta=0, p=.5, random_state=None, **kwargs):
"""Dataset generation for classification problems
Parameters
@@ -91,6 +91,8 @@ def make_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs):
number of training samples
nz : float
noise level (>0)
+ p : float
+ proportion of one class in the binary setting
random_state : int, RandomState instance or None, optional (default=None)
If int, random_state is the seed used by the random number generator;
If RandomState instance, random_state is the random number generator;
@@ -145,11 +147,22 @@ def make_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs):
n2 = np.sum(y == 2)
x = np.zeros((n, 2))
- x[y == 1, :] = get_2D_samples_gauss(n1, m1, nz, random_state=generator)
- x[y == 2, :] = get_2D_samples_gauss(n2, m2, nz, random_state=generator)
+ x[y == 1, :] = make_2D_samples_gauss(n1, m1, nz, random_state=generator)
+ x[y == 2, :] = make_2D_samples_gauss(n2, m2, nz, random_state=generator)
x = x.dot(rot)
+ elif dataset.lower() == '2gauss_prop':
+
+ y = np.concatenate((np.ones(int(p * n)), np.zeros(int((1 - p) * n))))
+ x = np.hstack((0 * y[:, None] - 0, 1 - 2 * y[:, None])) + nz * np.random.randn(len(y), 2)
+
+ if ('bias' not in kwargs) and ('b' not in kwargs):
+ kwargs['bias'] = np.array([0, 2])
+
+ x[:, 0] += kwargs['bias'][0]
+ x[:, 1] += kwargs['bias'][1]
+
else:
x = np.array(0)
y = np.array(0)
diff --git a/ot/dr.py b/ot/dr.py
index 680dabf..11d2e10 100644
--- a/ot/dr.py
+++ b/ot/dr.py
@@ -1,10 +1,10 @@
# -*- coding: utf-8 -*-
"""
-Dimension reduction with optimal transport
+Dimension reduction with OT
.. warning::
- Note that by default the module is not import in :mod:`ot`. In order to
+ Note that by default the module is not imported in :mod:`ot`. In order to
use it you need to explicitely import :mod:`ot.dr`
"""
diff --git a/ot/gpu/__init__.py b/ot/gpu/__init__.py
index 1ab95bb..7478fb9 100644
--- a/ot/gpu/__init__.py
+++ b/ot/gpu/__init__.py
@@ -1,8 +1,9 @@
# -*- coding: utf-8 -*-
"""
+GPU implementation for several OT solvers and utility
+functions.
-This module provides GPU implementation for several OT solvers and utility
-functions. The GPU backend in handled by `cupy
+The GPU backend in handled by `cupy
<https://cupy.chainer.org/>`_.
.. warning::
diff --git a/ot/gromov.py b/ot/gromov.py
index 699ae4c..4427a96 100644
--- a/ot/gromov.py
+++ b/ot/gromov.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
-Gromov-Wasserstein transport method
+Gromov-Wasserstein and Fused-Gromov-Wasserstein solvers
"""
# Author: Erwan Vautier <erwan.vautier@gmail.com>
@@ -276,7 +276,6 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs
- p : distribution in the source space
- q : distribution in the target space
- L : loss function to account for the misfit between the similarity matrices
- - H : entropy
Parameters
----------
@@ -343,6 +342,83 @@ def gromov_wasserstein(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs
return cg(p, q, 0, 1, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, **kwargs)
+def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs):
+ """
+ Returns the gromov-wasserstein discrepancy between (C1,p) and (C2,q)
+
+ The function solves the following optimization problem:
+
+ .. math::
+ GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
+
+ Where :
+ - C1 : Metric cost matrix in the source space
+ - C2 : Metric cost matrix in the target space
+ - p : distribution in the source space
+ - q : distribution in the target space
+ - L : loss function to account for the misfit between the similarity matrices
+
+ Parameters
+ ----------
+ C1 : ndarray, shape (ns, ns)
+ Metric cost matrix in the source space
+ C2 : ndarray, shape (nt, nt)
+ Metric cost matrix in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space.
+ q : ndarray, shape (nt,)
+ Distribution in the target space.
+ loss_fun : str
+ loss function used for the solver either 'square_loss' or 'kl_loss'
+ max_iter : int, optional
+ Max number of iterations
+ tol : float, optional
+ Stop threshold on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+ armijo : bool, optional
+ If True the steps of the line-search is found via an armijo research. Else closed form is used.
+ If there is convergence issues use False.
+
+ Returns
+ -------
+ gw_dist : float
+ Gromov-Wasserstein distance
+ log : dict
+ convergence information and Coupling marix
+
+ References
+ ----------
+ .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
+
+ .. [13] Mémoli, Facundo. Gromov–Wasserstein distances and the
+ metric approach to object matching. Foundations of computational
+ mathematics 11.4 (2011): 417-487.
+
+ """
+
+ constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
+
+ G0 = p[:, None] * q[None, :]
+
+ def f(G):
+ return gwloss(constC, hC1, hC2, G)
+
+ def df(G):
+ return gwggrad(constC, hC1, hC2, G)
+ res, log_gw = cg(p, q, 0, 1, f, df, G0, log=True, armijo=armijo, C1=C1, C2=C2, constC=constC, **kwargs)
+ log_gw['gw_dist'] = gwloss(constC, hC1, hC2, res)
+ log_gw['T'] = res
+ if log:
+ return log_gw['gw_dist'], log_gw
+ else:
+ return log_gw['gw_dist']
+
+
def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, log=False, **kwargs):
"""
Computes the FGW transport between two graphs see [24]
@@ -357,8 +433,7 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5,
where :
- M is the (ns,nt) metric cost matrix
- - :math:`f` is the regularization term ( and df is its gradient)
- - a and b are source and target weights (sum to 1)
+ - p and q are source and target weights (sum to 1)
- L is a loss function to account for the misfit between the similarity matrices
The algorithm used for solving the problem is conditional gradient as discussed in [24]_
@@ -377,17 +452,13 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5,
Distribution in the target space
loss_fun : str, optional
Loss function used for the solver
- max_iter : int, optional
- Max number of iterations
- tol : float, optional
- Stop threshold on error (>0)
- verbose : bool, optional
- Print information along iterations
- log : bool, optional
- record log if True
+ alpha : float, optional
+ Trade-off parameter (0 < alpha < 1)
armijo : bool, optional
If True the steps of the line-search is found via an armijo research. Else closed form is used.
If there is convergence issues use False.
+ log : bool, optional
+ record log if True
**kwargs : dict
parameters can be directly passed to the ot.optim.cg solver
@@ -417,11 +488,11 @@ def fused_gromov_wasserstein(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5,
return gwggrad(constC, hC1, hC2, G)
if log:
- res, log = cg(p, q, M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, log=True, **kwargs)
+ res, log = cg(p, q, (1 - alpha) * M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, log=True, **kwargs)
log['fgw_dist'] = log['loss'][::-1][0]
return res, log
else:
- return cg(p, q, M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, **kwargs)
+ return cg(p, q, (1 - alpha) * M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, **kwargs)
def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5, armijo=False, log=False, **kwargs):
@@ -439,8 +510,7 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
where :
- M is the (ns,nt) metric cost matrix
- - :math:`f` is the regularization term ( and df is its gradient)
- - a and b are source and target weights (sum to 1)
+ - p and q are source and target weights (sum to 1)
- L is a loss function to account for the misfit between the similarity matrices
The algorithm used for solving the problem is conditional gradient as discussed in [1]_
@@ -458,17 +528,13 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
Distribution in the target space.
loss_fun : str, optional
Loss function used for the solver.
- max_iter : int, optional
- Max number of iterations
- tol : float, optional
- Stop threshold on error (>0)
- verbose : bool, optional
- Print information along iterations
- log : bool, optional
- Record log if True.
+ alpha : float, optional
+ Trade-off parameter (0 < alpha < 1)
armijo : bool, optional
If True the steps of the line-search is found via an armijo research.
Else closed form is used. If there is convergence issues use False.
+ log : bool, optional
+ Record log if True.
**kwargs : dict
Parameters can be directly pased to the ot.optim.cg solver.
@@ -497,7 +563,7 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
def df(G):
return gwggrad(constC, hC1, hC2, G)
- res, log = cg(p, q, M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, log=True, **kwargs)
+ res, log = cg(p, q, (1 - alpha) * M, alpha, f, df, G0, armijo=armijo, C1=C1, C2=C2, constC=constC, log=True, **kwargs)
if log:
log['fgw_dist'] = log['loss'][::-1][0]
log['T'] = res
@@ -506,84 +572,6 @@ def fused_gromov_wasserstein2(M, C1, C2, p, q, loss_fun='square_loss', alpha=0.5
return log['fgw_dist']
-def gromov_wasserstein2(C1, C2, p, q, loss_fun, log=False, armijo=False, **kwargs):
- """
- Returns the gromov-wasserstein discrepancy between (C1,p) and (C2,q)
-
- The function solves the following optimization problem:
-
- .. math::
- GW = \min_T \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})*T_{i,j}*T_{k,l}
-
- Where :
- - C1 : Metric cost matrix in the source space
- - C2 : Metric cost matrix in the target space
- - p : distribution in the source space
- - q : distribution in the target space
- - L : loss function to account for the misfit between the similarity matrices
- - H : entropy
-
- Parameters
- ----------
- C1 : ndarray, shape (ns, ns)
- Metric cost matrix in the source space
- C2 : ndarray, shape (nt, nt)
- Metric cost matrix in the target space
- p : ndarray, shape (ns,)
- Distribution in the source space.
- q : ndarray, shape (nt,)
- Distribution in the target space.
- loss_fun : str
- loss function used for the solver either 'square_loss' or 'kl_loss'
- max_iter : int, optional
- Max number of iterations
- tol : float, optional
- Stop threshold on error (>0)
- verbose : bool, optional
- Print information along iterations
- log : bool, optional
- record log if True
- armijo : bool, optional
- If True the steps of the line-search is found via an armijo research. Else closed form is used.
- If there is convergence issues use False.
-
- Returns
- -------
- gw_dist : float
- Gromov-Wasserstein distance
- log : dict
- convergence information and Coupling marix
-
- References
- ----------
- .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
- "Gromov-Wasserstein averaging of kernel and distance matrices."
- International Conference on Machine Learning (ICML). 2016.
-
- .. [13] Mémoli, Facundo. Gromov–Wasserstein distances and the
- metric approach to object matching. Foundations of computational
- mathematics 11.4 (2011): 417-487.
-
- """
-
- constC, hC1, hC2 = init_matrix(C1, C2, p, q, loss_fun)
-
- G0 = p[:, None] * q[None, :]
-
- def f(G):
- return gwloss(constC, hC1, hC2, G)
-
- def df(G):
- return gwggrad(constC, hC1, hC2, G)
- res, log = cg(p, q, 0, 1, f, df, G0, log=True, armijo=armijo, C1=C1, C2=C2, constC=constC, **kwargs)
- log['gw_dist'] = gwloss(constC, hC1, hC2, res)
- log['T'] = res
- if log:
- return log['gw_dist'], log
- else:
- return log['gw_dist']
-
-
def entropic_gromov_wasserstein(C1, C2, p, q, loss_fun, epsilon,
max_iter=1000, tol=1e-9, verbose=False, log=False):
"""
@@ -996,6 +984,16 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_
Whether to fix the structure of the barycenter during the updates
fixed_features : bool
Whether to fix the feature of the barycenter during the updates
+ loss_fun : str
+ Loss function used for the solver either 'square_loss' or 'kl_loss'
+ max_iter : int, optional
+ Max number of iterations
+ tol : float, optional
+ Stop threshol on error (>0).
+ verbose : bool, optional
+ Print information along iterations.
+ log : bool, optional
+ Record log if True.
init_C : ndarray, shape (N,N), optional
Initialization for the barycenters' structure matrix. If not set
a random init is used.
@@ -1084,7 +1082,7 @@ def fgw_barycenters(N, Ys, Cs, ps, lambdas, alpha, fixed_structure=False, fixed_
T_temp = [t.T for t in T]
C = update_sructure_matrix(p, lambdas, T_temp, Cs)
- T = [fused_gromov_wasserstein((1 - alpha) * Ms[s], C, Cs[s], p, ps[s], loss_fun, alpha,
+ T = [fused_gromov_wasserstein(Ms[s], C, Cs[s], p, ps[s], loss_fun, alpha,
numItermax=max_iter, stopThr=1e-5, verbose=verbose) for s in range(S)]
# T is N,ns
diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h
index f42e222..c0fe7a3 100644
--- a/ot/lp/EMD.h
+++ b/ot/lp/EMD.h
@@ -32,4 +32,6 @@ enum ProblemType {
int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter);
+
+
#endif
diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp
index fc7ca63..bc873ed 100644
--- a/ot/lp/EMD_wrapper.cpp
+++ b/ot/lp/EMD_wrapper.cpp
@@ -17,13 +17,13 @@
int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G,
double* alpha, double* beta, double *cost, int maxIter) {
-// beware M and C anre strored in row major C style!!!
- int n, m, i, cur;
+ // beware M and C anre strored in row major C style!!!
+ int n, m, i, cur;
typedef FullBipartiteDigraph Digraph;
- DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
+ DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
- // Get the number of non zero coordinates for r and c
+ // Get the number of non zero coordinates for r and c
n=0;
for (int i=0; i<n1; i++) {
double val=*(X+i);
@@ -105,3 +105,4 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G,
return ret;
}
+
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 0c92810..514a607 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -2,8 +2,6 @@
"""
Solvers for the original linear program OT problem
-
-
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
@@ -12,22 +10,169 @@ Solvers for the original linear program OT problem
import multiprocessing
import sys
+
import numpy as np
from scipy.sparse import coo_matrix
-from .import cvx
-
+from . import cvx
+from .cvx import barycenter
# import compiled emd
from .emd_wrap import emd_c, check_result, emd_1d_sorted
-from ..utils import parmap
-from .cvx import barycenter
from ..utils import dist
+from ..utils import parmap
+
+__all__ = ['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
+ 'emd_1d', 'emd2_1d', 'wasserstein_1d']
+
+
+def center_ot_dual(alpha0, beta0, a=None, b=None):
+ r"""Center dual OT potentials w.r.t. theirs weights
+
+ The main idea of this function is to find unique dual potentials
+ that ensure some kind of centering/fairness. The main idea is to find dual potentials that lead to the same final objective value for both source and targets (see below for more details). It will help having
+ stability when multiple calling of the OT solver with small changes.
+
+ Basically we add another constraint to the potential that will not
+ change the objective value but will ensure unicity. The constraint
+ is the following:
+
+ .. math::
+ \alpha^T a= \beta^T b
+
+ in addition to the OT problem constraints.
+
+ since :math:`\sum_i a_i=\sum_j b_j` this can be solved by adding/removing
+ a constant from both :math:`\alpha_0` and :math:`\beta_0`.
+
+ .. math::
+ c=\frac{\beta0^T b-\alpha_0^T a}{1^Tb+1^Ta}
+
+ \alpha=\alpha_0+c
+
+ \beta=\beta0+c
+
+ Parameters
+ ----------
+ alpha0 : (ns,) numpy.ndarray, float64
+ Source dual potential
+ beta0 : (nt,) numpy.ndarray, float64
+ Target dual potential
+ a : (ns,) numpy.ndarray, float64
+ Source histogram (uniform weight if empty list)
+ b : (nt,) numpy.ndarray, float64
+ Target histogram (uniform weight if empty list)
+
+ Returns
+ -------
+ alpha : (ns,) numpy.ndarray, float64
+ Source centered dual potential
+ beta : (nt,) numpy.ndarray, float64
+ Target centered dual potential
+
+ """
+ # if no weights are provided, use uniform
+ if a is None:
+ a = np.ones(alpha0.shape[0]) / alpha0.shape[0]
+ if b is None:
+ b = np.ones(beta0.shape[0]) / beta0.shape[0]
+
+ # compute constant that balances the weighted sums of the duals
+ c = (b.dot(beta0) - a.dot(alpha0)) / (a.sum() + b.sum())
+
+ # update duals
+ alpha = alpha0 + c
+ beta = beta0 - c
+
+ return alpha, beta
+
+
+def estimate_dual_null_weights(alpha0, beta0, a, b, M):
+ r"""Estimate feasible values for 0-weighted dual potentials
+
+ The feasible values are computed efficiently but rather coarsely.
+
+ .. warning::
+ This function is necessary because the C++ solver in emd_c
+ discards all samples in the distributions with
+ zeros weights. This means that while the primal variable (transport
+ matrix) is exact, the solver only returns feasible dual potentials
+ on the samples with weights different from zero.
+
+ First we compute the constraints violations:
+
+ .. math::
+ V=\alpha+\beta^T-M
+
+ Next we compute the max amount of violation per row (alpha) and
+ columns (beta)
-__all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
- 'emd_1d', 'emd2_1d', 'wasserstein_1d']
+ .. math::
+ v^a_i=\max_j V_{i,j}
+
+ v^b_j=\max_i V_{i,j}
+
+ Finally we update the dual potential with 0 weights if a
+ constraint is violated
+
+ .. math::
+ \alpha_i = \alpha_i -v^a_i \quad \text{ if } a_i=0 \text{ and } v^a_i>0
+
+ \beta_j = \beta_j -v^b_j \quad \text{ if } b_j=0 \text{ and } v^b_j>0
+
+ In the end the dual potentials are centered using function
+ :ref:`center_ot_dual`.
+
+ Note that all those updates do not change the objective value of the
+ solution but provide dual potentials that do not violate the constraints.
+
+ Parameters
+ ----------
+ alpha0 : (ns,) numpy.ndarray, float64
+ Source dual potential
+ beta0 : (nt,) numpy.ndarray, float64
+ Target dual potential
+ alpha0 : (ns,) numpy.ndarray, float64
+ Source dual potential
+ beta0 : (nt,) numpy.ndarray, float64
+ Target dual potential
+ a : (ns,) numpy.ndarray, float64
+ Source distribution (uniform weights if empty list)
+ b : (nt,) numpy.ndarray, float64
+ Target distribution (uniform weights if empty list)
+ M : (ns,nt) numpy.ndarray, float64
+ Loss matrix (c-order array with type float64)
+
+ Returns
+ -------
+ alpha : (ns,) numpy.ndarray, float64
+ Source corrected dual potential
+ beta : (nt,) numpy.ndarray, float64
+ Target corrected dual potential
+
+ """
+
+ # binary indexing of non-zeros weights
+ asel = a != 0
+ bsel = b != 0
+
+ # compute dual constraints violation
+ constraint_violation = alpha0[:, None] + beta0[None, :] - M
+
+ # Compute largest violation per line and columns
+ aviol = np.max(constraint_violation, 1)
+ bviol = np.max(constraint_violation, 0)
+ # update corrects violation of
+ alpha_up = -1 * ~asel * np.maximum(aviol, 0)
+ beta_up = -1 * ~bsel * np.maximum(bviol, 0)
-def emd(a, b, M, numItermax=100000, log=False):
+ alpha = alpha0 + alpha_up
+ beta = beta0 + beta_up
+
+ return center_ot_dual(alpha, beta, a, b)
+
+
+def emd(a, b, M, numItermax=100000, log=False, center_dual=True):
r"""Solves the Earth Movers distance problem and returns the OT matrix
@@ -35,7 +180,9 @@ def emd(a, b, M, numItermax=100000, log=False):
\gamma = arg\min_\gamma <\gamma,M>_F
s.t. \gamma 1 = a
+
\gamma^T 1= b
+
\gamma\geq 0
where :
@@ -43,7 +190,7 @@ def emd(a, b, M, numItermax=100000, log=False):
- a and b are the sample weights
.. warning::
- Note that the M matrix needs to be a C-order numpy.array in float64
+ Note that the M matrix needs to be a C-order numpy.array in float64
format.
Uses the algorithm proposed in [1]_
@@ -62,6 +209,9 @@ def emd(a, b, M, numItermax=100000, log=False):
log: bool, optional (default=False)
If True, returns a dictionary containing the cost and dual
variables. Otherwise returns only the optimal transportation matrix.
+ center_dual: boolean, optional (default=True)
+ If True, centers the dual potential using function
+ :ref:`center_ot_dual`.
Returns
-------
@@ -109,7 +259,20 @@ def emd(a, b, M, numItermax=100000, log=False):
if len(b) == 0:
b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
+ assert (a.shape[0] == M.shape[0] and b.shape[0] == M.shape[1]), \
+ "Dimension mismatch, check dimensions of M with a and b"
+
+ asel = a != 0
+ bsel = b != 0
+
G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
+
+ if center_dual:
+ u, v = center_ot_dual(u, v, a, b)
+
+ if np.any(~asel) or np.any(~bsel):
+ u, v = estimate_dual_null_weights(u, v, a, b, M)
+
result_code_string = check_result(result_code)
if log:
log = {}
@@ -123,14 +286,17 @@ def emd(a, b, M, numItermax=100000, log=False):
def emd2(a, b, M, processes=multiprocessing.cpu_count(),
- numItermax=100000, log=False, return_matrix=False):
+ numItermax=100000, log=False, return_matrix=False,
+ center_dual=True):
r"""Solves the Earth Movers distance problem and returns the loss
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F
+ \min_\gamma <\gamma,M>_F
s.t. \gamma 1 = a
+
\gamma^T 1= b
+
\gamma\geq 0
where :
@@ -138,7 +304,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
- a and b are the sample weights
.. warning::
- Note that the M matrix needs to be a C-order numpy.array in float64
+ Note that the M matrix needs to be a C-order numpy.array in float64
format.
Uses the algorithm proposed in [1]_
@@ -161,6 +327,9 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
variables. Otherwise returns only the optimal transportation cost.
return_matrix: boolean, optional (default=False)
If True, returns the optimal transportation matrix in the log.
+ center_dual: boolean, optional (default=True)
+ If True, centers the dual potential using function
+ :ref:`center_ot_dual`.
Returns
-------
@@ -204,7 +373,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
# problem with pikling Forks
if sys.platform.endswith('win32'):
- processes=1
+ processes = 1
# if empty array given then use uniform distributions
if len(a) == 0:
@@ -212,21 +381,43 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
if len(b) == 0:
b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
+ assert (a.shape[0] == M.shape[0] and b.shape[0] == M.shape[1]), \
+ "Dimension mismatch, check dimensions of M with a and b"
+
+ asel = a != 0
+
if log or return_matrix:
def f(b):
- G, cost, u, v, resultCode = emd_c(a, b, M, numItermax)
- result_code_string = check_result(resultCode)
+ bsel = b != 0
+
+ G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
+
+ if center_dual:
+ u, v = center_ot_dual(u, v, a, b)
+
+ if np.any(~asel) or np.any(~bsel):
+ u, v = estimate_dual_null_weights(u, v, a, b, M)
+
+ result_code_string = check_result(result_code)
log = {}
if return_matrix:
log['G'] = G
log['u'] = u
log['v'] = v
log['warning'] = result_code_string
- log['result_code'] = resultCode
+ log['result_code'] = result_code
return [cost, log]
else:
def f(b):
+ bsel = b != 0
G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
+
+ if center_dual:
+ u, v = center_ot_dual(u, v, a, b)
+
+ if np.any(~asel) or np.any(~bsel):
+ u, v = estimate_dual_null_weights(u, v, a, b, M)
+
check_result(result_code)
return cost
@@ -234,7 +425,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
return f(b)
nb = b.shape[1]
- if processes>1:
+ if processes > 1:
res = parmap(f, [b[:, i] for i in range(nb)], processes)
else:
res = list(map(f, [b[:, i].copy() for i in range(nb)]))
@@ -242,8 +433,8 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
return res
-
-def free_support_barycenter(measures_locations, measures_weights, X_init, b=None, weights=None, numItermax=100, stopThr=1e-7, verbose=False, log=None):
+def free_support_barycenter(measures_locations, measures_weights, X_init, b=None, weights=None, numItermax=100,
+ stopThr=1e-7, verbose=False, log=None):
"""
Solves the free support (locations of the barycenters are optimized, not the weights) Wasserstein barycenter problem (i.e. the weighted Frechet mean for the 2-Wasserstein distance)
@@ -295,7 +486,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
k = X_init.shape[0]
d = X_init.shape[1]
if b is None:
- b = np.ones((k,))/k
+ b = np.ones((k,)) / k
if weights is None:
weights = np.ones((N,)) / N
@@ -306,17 +497,17 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
displacement_square_norm = stopThr + 1.
- while ( displacement_square_norm > stopThr and iter_count < numItermax ):
+ while (displacement_square_norm > stopThr and iter_count < numItermax):
T_sum = np.zeros((k, d))
- for (measure_locations_i, measure_weights_i, weight_i) in zip(measures_locations, measures_weights, weights.tolist()):
-
+ for (measure_locations_i, measure_weights_i, weight_i) in zip(measures_locations, measures_weights,
+ weights.tolist()):
M_i = dist(X, measure_locations_i)
T_i = emd(b, measure_weights_i, M_i)
T_sum = T_sum + weight_i * np.reshape(1. / b, (-1, 1)) * np.matmul(T_i, measure_locations_i)
- displacement_square_norm = np.sum(np.square(T_sum-X))
+ displacement_square_norm = np.sum(np.square(T_sum - X))
if log:
displacement_square_norms.append(displacement_square_norm)
@@ -436,12 +627,12 @@ def emd_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
if b.ndim == 0 or len(b) == 0:
b = np.ones((x_b.shape[0],), dtype=np.float64) / x_b.shape[0]
- x_a_1d = x_a.reshape((-1, ))
- x_b_1d = x_b.reshape((-1, ))
+ x_a_1d = x_a.reshape((-1,))
+ x_b_1d = x_b.reshape((-1,))
perm_a = np.argsort(x_a_1d)
perm_b = np.argsort(x_b_1d)
- G_sorted, indices, cost = emd_1d_sorted(a, b,
+ G_sorted, indices, cost = emd_1d_sorted(a[perm_a], b[perm_b],
x_a_1d[perm_a], x_b_1d[perm_b],
metric=metric, p=p)
G = coo_matrix((G_sorted, (perm_a[indices[:, 0]], perm_b[indices[:, 1]])),
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index 2b6c495..c167964 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -19,7 +19,7 @@ import warnings
cdef extern from "EMD.h":
- int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter)
+ int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter) nogil
cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED
@@ -35,8 +35,7 @@ def check_result(result_code):
message = "numItermax reached before optimality. Try to increase numItermax."
warnings.warn(message)
return message
-
-
+
@cython.boundscheck(False)
@cython.wraparound(False)
def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter):
@@ -61,6 +60,12 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
.. warning::
Note that the M matrix needs to be a C-order :py.cls:`numpy.array`
+ .. warning::
+ The C++ solver discards all samples in the distributions with
+ zeros weights. This means that while the primal variable (transport
+ matrix) is exact, the solver only returns feasible dual potentials
+ on the samples with weights different from zero.
+
Parameters
----------
a : (ns,) numpy.ndarray, float64
@@ -73,7 +78,6 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
The maximum number of iterations before stopping the optimization
algorithm if it has not converged.
-
Returns
-------
gamma: (ns x nt) numpy.ndarray
@@ -82,12 +86,19 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
"""
cdef int n1= M.shape[0]
cdef int n2= M.shape[1]
+ cdef int nmax=n1+n2-1
+ cdef int result_code = 0
+ cdef int nG=0
cdef double cost=0
- cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2])
cdef np.ndarray[double, ndim=1, mode="c"] alpha=np.zeros(n1)
cdef np.ndarray[double, ndim=1, mode="c"] beta=np.zeros(n2)
+ cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([0, 0])
+
+ cdef np.ndarray[double, ndim=1, mode="c"] Gv=np.zeros(0)
+ cdef np.ndarray[long, ndim=1, mode="c"] iG=np.zeros(0,dtype=np.int)
+ cdef np.ndarray[long, ndim=1, mode="c"] jG=np.zeros(0,dtype=np.int)
if not len(a):
a=np.ones((n1,))/n1
@@ -95,8 +106,12 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
if not len(b):
b=np.ones((n2,))/n2
+ # init OT matrix
+ G=np.zeros([n1, n2])
+
# calling the function
- cdef int result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
+ with nogil:
+ result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
return G, cost, alpha, beta, result_code
diff --git a/ot/lp/network_simplex_simple.h b/ot/lp/network_simplex_simple.h
index 7c6a4ce..5d93040 100644
--- a/ot/lp/network_simplex_simple.h
+++ b/ot/lp/network_simplex_simple.h
@@ -686,7 +686,7 @@ namespace lemon {
/// \see resetParams(), reset()
ProblemType run() {
#if DEBUG_LVL>0
- std::cout << "OPTIMAL = " << OPTIMAL << "\nINFEASIBLE = " << INFEASIBLE << "\nUNBOUNDED = " << UNBOUNDED << "\nMAX_ITER_REACHED" << MAX_ITER_REACHED\n";
+ std::cout << "OPTIMAL = " << OPTIMAL << "\nINFEASIBLE = " << INFEASIBLE << "\nUNBOUNDED = " << UNBOUNDED << "\nMAX_ITER_REACHED" << MAX_ITER_REACHED << "\n" ;
#endif
if (!init()) return INFEASIBLE;
@@ -875,7 +875,7 @@ namespace lemon {
c += Number(it->second) * Number(_cost[it->first]);
return c;*/
- for (int i=0; i<_flow.size(); i++)
+ for (unsigned long i=0; i<_flow.size(); i++)
c += _flow[i] * Number(_cost[i]);
return c;
@@ -1257,7 +1257,7 @@ namespace lemon {
u = w;
}
_pred[u_in] = in_arc;
- _forward[u_in] = (u_in == _source[in_arc]);
+ _forward[u_in] = ((unsigned int)u_in == _source[in_arc]);
_succ_num[u_in] = old_succ_num;
// Set limits for updating _last_succ form v_in and v_out
@@ -1418,7 +1418,6 @@ namespace lemon {
template <typename PivotRuleImpl>
ProblemType start() {
PivotRuleImpl pivot(*this);
- double prevCost=-1;
ProblemType retVal = OPTIMAL;
// Perform heuristic initial pivots
diff --git a/ot/optim.py b/ot/optim.py
index 0abd9e9..b9ca891 100644
--- a/ot/optim.py
+++ b/ot/optim.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
-Optimization algorithms for OT
+Generic solvers for regularized OT
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
@@ -134,7 +134,7 @@ def solve_linesearch(cost, G, deltaG, Mi, f_val,
return alpha, fc, f_val
-def cg(a, b, M, reg, f, df, G0=None, numItermax=200,
+def cg(a, b, M, reg, f, df, G0=None, numItermax=200, numItermaxEmd=100000,
stopThr=1e-9, stopThr2=1e-9, verbose=False, log=False, **kwargs):
"""
Solve the general regularized OT problem with conditional gradient
@@ -172,6 +172,8 @@ def cg(a, b, M, reg, f, df, G0=None, numItermax=200,
initial guess (default is indep joint density)
numItermax : int, optional
Max number of iterations
+ numItermaxEmd : int, optional
+ Max number of iterations for emd
stopThr : float, optional
Stop threshol on the relative variation (>0)
stopThr2 : float, optional
@@ -238,7 +240,7 @@ def cg(a, b, M, reg, f, df, G0=None, numItermax=200,
Mi += Mi.min()
# solve linear program
- Gc = emd(a, b, Mi)
+ Gc = emd(a, b, Mi, numItermax=numItermaxEmd)
deltaG = Gc - G
diff --git a/ot/partial.py b/ot/partial.py
new file mode 100755
index 0000000..eb707d8
--- /dev/null
+++ b/ot/partial.py
@@ -0,0 +1,1062 @@
+# -*- coding: utf-8 -*-
+"""
+Partial OT solvers
+"""
+
+# Author: Laetitia Chapel <laetitia.chapel@irisa.fr>
+# License: MIT License
+
+import numpy as np
+
+from .lp import emd
+
+
+def partial_wasserstein_lagrange(a, b, M, reg_m=None, nb_dummies=1, log=False,
+ **kwargs):
+ r"""
+ Solves the partial optimal transport problem for the quadratic cost
+ and returns the OT plan
+
+ The function considers the following problem:
+
+ .. math::
+ \gamma = \arg\min_\gamma <\gamma,(M-\lambda)>_F
+
+ s.t.
+ \gamma\geq 0 \\
+ \gamma 1 \leq a\\
+ \gamma^T 1 \leq b\\
+ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\}
+
+
+ or equivalently (see Chizat, L., Peyré, G., Schmitzer, B., & Vialard, F. X.
+ (2018). An interpolating distance between optimal transport and Fisher–Rao
+ metrics. Foundations of Computational Mathematics, 18(1), 1-44.)
+
+ .. math::
+ \gamma = \arg\min_\gamma <\gamma,M>_F + \sqrt(\lambda/2)
+ (\|\gamma 1 - a\|_1 + \|\gamma^T 1 - b\|_1)
+
+ s.t.
+ \gamma\geq 0 \\
+
+
+ where :
+
+ - M is the metric cost matrix
+ - a and b are source and target unbalanced distributions
+ - :math:`\lambda` is the lagragian cost. Tuning its value allows attaining
+ a given mass to be transported m
+
+ The formulation of the problem has been proposed in [28]_
+
+
+ Parameters
+ ----------
+ a : np.ndarray (dim_a,)
+ Unnormalized histogram of dimension dim_a
+ b : np.ndarray (dim_b,)
+ Unnormalized histograms of dimension dim_b
+ M : np.ndarray (dim_a, dim_b)
+ cost matrix for the quadratic cost
+ reg_m : float, optional
+ Lagragian cost
+ nb_dummies : int, optional, default:1
+ number of reservoir points to be added (to avoid numerical
+ instabilities, increase its value if an error is raised)
+ log : bool, optional
+ record log if True
+ **kwargs : dict
+ parameters can be directly passed to the emd solver
+
+ .. warning::
+ When dealing with a large number of points, the EMD solver may face
+ some instabilities, especially when the mass associated to the dummy
+ point is large. To avoid them, increase the number of dummy points
+ (allows a smoother repartition of the mass over the points).
+
+ Returns
+ -------
+ gamma : (dim_a x dim_b) ndarray
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary returned only if `log` is `True`
+
+
+ Examples
+ --------
+
+ >>> import ot
+ >>> a = [.1, .2]
+ >>> b = [.1, .1]
+ >>> M = [[0., 1.], [2., 3.]]
+ >>> np.round(partial_wasserstein_lagrange(a,b,M), 2)
+ array([[0.1, 0. ],
+ [0. , 0.1]])
+ >>> np.round(partial_wasserstein_lagrange(a,b,M,reg_m=2), 2)
+ array([[0.1, 0. ],
+ [0. , 0. ]])
+
+ References
+ ----------
+
+ .. [28] Caffarelli, L. A., & McCann, R. J. (2010) Free boundaries in
+ optimal transport and Monge-Ampere obstacle problems. Annals of
+ mathematics, 673-730.
+
+ See Also
+ --------
+ ot.partial.partial_wasserstein : Partial Wasserstein with fixed mass
+ """
+
+ if np.sum(a) > 1 or np.sum(b) > 1:
+ raise ValueError("Problem infeasible. Check that a and b are in the "
+ "simplex")
+
+ if reg_m is None:
+ reg_m = np.max(M) + 1
+ if reg_m < -np.max(M):
+ return np.zeros((len(a), len(b)))
+
+ eps = 1e-20
+ M = np.asarray(M, dtype=np.float64)
+ b = np.asarray(b, dtype=np.float64)
+ a = np.asarray(a, dtype=np.float64)
+
+ M_star = M - reg_m # modified cost matrix
+
+ # trick to fasten the computation: select only the subset of columns/lines
+ # that can have marginals greater than 0 (that is to say M < 0)
+ idx_x = np.where(np.min(M_star, axis=1) < eps)[0]
+ idx_y = np.where(np.min(M_star, axis=0) < eps)[0]
+
+ # extend a, b, M with "reservoir" or "dummy" points
+ M_extended = np.zeros((len(idx_x) + nb_dummies, len(idx_y) + nb_dummies))
+ M_extended[:len(idx_x), :len(idx_y)] = M_star[np.ix_(idx_x, idx_y)]
+
+ a_extended = np.append(a[idx_x], [(np.sum(a) - np.sum(a[idx_x]) +
+ np.sum(b)) / nb_dummies] * nb_dummies)
+ b_extended = np.append(b[idx_y], [(np.sum(b) - np.sum(b[idx_y]) +
+ np.sum(a)) / nb_dummies] * nb_dummies)
+
+ gamma_extended, log_emd = emd(a_extended, b_extended, M_extended, log=True,
+ **kwargs)
+ gamma = np.zeros((len(a), len(b)))
+ gamma[np.ix_(idx_x, idx_y)] = gamma_extended[:-nb_dummies, :-nb_dummies]
+
+ if log_emd['warning'] is not None:
+ raise ValueError("Error in the EMD resolution: try to increase the"
+ " number of dummy points")
+ log_emd['cost'] = np.sum(gamma * M)
+ if log:
+ return gamma, log_emd
+ else:
+ return gamma
+
+
+def partial_wasserstein(a, b, M, m=None, nb_dummies=1, log=False, **kwargs):
+ r"""
+ Solves the partial optimal transport problem for the quadratic cost
+ and returns the OT plan
+
+ The function considers the following problem:
+
+ .. math::
+ \gamma = \arg\min_\gamma <\gamma,M>_F
+
+ s.t.
+ \gamma\geq 0 \\
+ \gamma 1 \leq a\\
+ \gamma^T 1 \leq b\\
+ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\}
+
+
+ where :
+
+ - M is the metric cost matrix
+ - a and b are source and target unbalanced distributions
+ - m is the amount of mass to be transported
+
+ Parameters
+ ----------
+ a : np.ndarray (dim_a,)
+ Unnormalized histogram of dimension dim_a
+ b : np.ndarray (dim_b,)
+ Unnormalized histograms of dimension dim_b
+ M : np.ndarray (dim_a, dim_b)
+ cost matrix for the quadratic cost
+ m : float, optional
+ amount of mass to be transported
+ nb_dummies : int, optional, default:1
+ number of reservoir points to be added (to avoid numerical
+ instabilities, increase its value if an error is raised)
+ log : bool, optional
+ record log if True
+ **kwargs : dict
+ parameters can be directly passed to the emd solver
+
+
+ .. warning::
+ When dealing with a large number of points, the EMD solver may face
+ some instabilities, especially when the mass associated to the dummy
+ point is large. To avoid them, increase the number of dummy points
+ (allows a smoother repartition of the mass over the points).
+
+
+ Returns
+ -------
+ :math:`gamma` : (dim_a x dim_b) ndarray
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary returned only if `log` is `True`
+
+
+ Examples
+ --------
+
+ >>> import ot
+ >>> a = [.1, .2]
+ >>> b = [.1, .1]
+ >>> M = [[0., 1.], [2., 3.]]
+ >>> np.round(partial_wasserstein(a,b,M), 2)
+ array([[0.1, 0. ],
+ [0. , 0.1]])
+ >>> np.round(partial_wasserstein(a,b,M,m=0.1), 2)
+ array([[0.1, 0. ],
+ [0. , 0. ]])
+
+ References
+ ----------
+ .. [28] Caffarelli, L. A., & McCann, R. J. (2010) Free boundaries in
+ optimal transport and Monge-Ampere obstacle problems. Annals of
+ mathematics, 673-730.
+ .. [29] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov-
+ Wasserstein with Applications on Positive-Unlabeled Learning".
+ arXiv preprint arXiv:2002.08276.
+
+ See Also
+ --------
+ ot.partial.partial_wasserstein_lagrange: Partial Wasserstein with
+ regularization on the marginals
+ ot.partial.entropic_partial_wasserstein: Partial Wasserstein with a
+ entropic regularization parameter
+ """
+
+ if m is None:
+ return partial_wasserstein_lagrange(a, b, M, log=log, **kwargs)
+ elif m < 0:
+ raise ValueError("Problem infeasible. Parameter m should be greater"
+ " than 0.")
+ elif m > np.min((np.sum(a), np.sum(b))):
+ raise ValueError("Problem infeasible. Parameter m should lower or"
+ " equal than min(|a|_1, |b|_1).")
+
+ b_extended = np.append(b, [(np.sum(a) - m) / nb_dummies] * nb_dummies)
+ a_extended = np.append(a, [(np.sum(b) - m) / nb_dummies] * nb_dummies)
+ M_extended = np.zeros((len(a_extended), len(b_extended)))
+ M_extended[-1, -1] = np.max(M) * 1e5
+ M_extended[:len(a), :len(b)] = M
+
+ gamma, log_emd = emd(a_extended, b_extended, M_extended, log=True,
+ **kwargs)
+ if log_emd['warning'] is not None:
+ raise ValueError("Error in the EMD resolution: try to increase the"
+ " number of dummy points")
+ log_emd['partial_w_dist'] = np.sum(M * gamma[:len(a), :len(b)])
+
+ if log:
+ return gamma[:len(a), :len(b)], log_emd
+ else:
+ return gamma[:len(a), :len(b)]
+
+
+def partial_wasserstein2(a, b, M, m=None, nb_dummies=1, log=False, **kwargs):
+ r"""
+ Solves the partial optimal transport problem for the quadratic cost
+ and returns the partial GW discrepancy
+
+ The function considers the following problem:
+
+ .. math::
+ \gamma = \arg\min_\gamma <\gamma,M>_F
+
+ s.t.
+ \gamma\geq 0 \\
+ \gamma 1 \leq a\\
+ \gamma^T 1 \leq b\\
+ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\}
+
+
+ where :
+
+ - M is the metric cost matrix
+ - a and b are source and target unbalanced distributions
+ - m is the amount of mass to be transported
+
+ Parameters
+ ----------
+ a : np.ndarray (dim_a,)
+ Unnormalized histogram of dimension dim_a
+ b : np.ndarray (dim_b,)
+ Unnormalized histograms of dimension dim_b
+ M : np.ndarray (dim_a, dim_b)
+ cost matrix for the quadratic cost
+ m : float, optional
+ amount of mass to be transported
+ nb_dummies : int, optional, default:1
+ number of reservoir points to be added (to avoid numerical
+ instabilities, increase its value if an error is raised)
+ log : bool, optional
+ record log if True
+ **kwargs : dict
+ parameters can be directly passed to the emd solver
+
+
+ .. warning::
+ When dealing with a large number of points, the EMD solver may face
+ some instabilities, especially when the mass associated to the dummy
+ point is large. To avoid them, increase the number of dummy points
+ (allows a smoother repartition of the mass over the points).
+
+
+ Returns
+ -------
+ :math:`gamma` : (dim_a x dim_b) ndarray
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary returned only if `log` is `True`
+
+
+ Examples
+ --------
+
+ >>> import ot
+ >>> a=[.1, .2]
+ >>> b=[.1, .1]
+ >>> M=[[0., 1.], [2., 3.]]
+ >>> np.round(partial_wasserstein2(a, b, M), 1)
+ 0.3
+ >>> np.round(partial_wasserstein2(a,b,M,m=0.1), 1)
+ 0.0
+
+ References
+ ----------
+ .. [28] Caffarelli, L. A., & McCann, R. J. (2010) Free boundaries in
+ optimal transport and Monge-Ampere obstacle problems. Annals of
+ mathematics, 673-730.
+ .. [29] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov-
+ Wasserstein with Applications on Positive-Unlabeled Learning".
+ arXiv preprint arXiv:2002.08276.
+ """
+
+ partial_gw, log_w = partial_wasserstein(a, b, M, m, nb_dummies, log=True,
+ **kwargs)
+
+ log_w['T'] = partial_gw
+
+ if log:
+ return np.sum(partial_gw * M), log_w
+ else:
+ return np.sum(partial_gw * M)
+
+
+def gwgrad_partial(C1, C2, T):
+ """Compute the GW gradient. Note: we can not use the trick in [12]_ as
+ the marginals may not sum to 1.
+
+ Parameters
+ ----------
+ C1: array of shape (n_p,n_p)
+ intra-source (P) cost matrix
+
+ C2: array of shape (n_u,n_u)
+ intra-target (U) cost matrix
+
+ T : array of shape(n_p+nb_dummies, n_u) (default: None)
+ Transport matrix
+
+ Returns
+ -------
+ numpy.array of shape (n_p+nb_dummies, n_u)
+ gradient
+
+ References
+ ----------
+ .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
+ """
+ cC1 = np.dot(C1 ** 2 / 2, np.dot(T, np.ones(C2.shape[0]).reshape(-1, 1)))
+ cC2 = np.dot(np.dot(np.ones(C1.shape[0]).reshape(1, -1), T), C2 ** 2 / 2)
+ constC = cC1 + cC2
+ A = -np.dot(C1, T).dot(C2.T)
+ tens = constC + A
+ return tens * 2
+
+
+def gwloss_partial(C1, C2, T):
+ """Compute the GW loss.
+
+ Parameters
+ ----------
+ C1: array of shape (n_p,n_p)
+ intra-source (P) cost matrix
+
+ C2: array of shape (n_u,n_u)
+ intra-target (U) cost matrix
+
+ T : array of shape(n_p+nb_dummies, n_u) (default: None)
+ Transport matrix
+
+ Returns
+ -------
+ GW loss
+ """
+ g = gwgrad_partial(C1, C2, T) * 0.5
+ return np.sum(g * T)
+
+
+def partial_gromov_wasserstein(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
+ thres=1, numItermax=1000, tol=1e-7,
+ log=False, verbose=False, **kwargs):
+ r"""
+ Solves the partial optimal transport problem
+ and returns the OT plan
+
+ The function considers the following problem:
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F
+
+ s.t. \gamma 1 \leq a \\
+ \gamma^T 1 \leq b \\
+ \gamma\geq 0 \\
+ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\} \\
+
+ where :
+
+ - M is the metric cost matrix
+ - :math:`\Omega` is the entropic regularization term :math:`\Omega(\gamma)
+ =\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are the sample weights
+ - m is the amount of mass to be transported
+
+ The formulation of the problem has been proposed in [29]_
+
+
+ Parameters
+ ----------
+ C1 : ndarray, shape (ns, ns)
+ Metric cost matrix in the source space
+ C2 : ndarray, shape (nt, nt)
+ Metric costfr matrix in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space
+ q : ndarray, shape (nt,)
+ Distribution in the target space
+ m : float, optional
+ Amount of mass to be transported (default: min (|p|_1, |q|_1))
+ nb_dummies : int, optional
+ Number of dummy points to add (avoid instabilities in the EMD solver)
+ G0 : ndarray, shape (ns, nt), optional
+ Initialisation of the transportation matrix
+ thres : float, optional
+ quantile of the gradient matrix to populate the cost matrix when 0
+ (default: 1)
+ numItermax : int, optional
+ Max number of iterations
+ tol : float, optional
+ tolerance for stopping iterations
+ log : bool, optional
+ return log if True
+ verbose : bool, optional
+ Print information along iterations
+ **kwargs : dict
+ parameters can be directly passed to the emd solver
+
+
+ Returns
+ -------
+ gamma : (dim_a x dim_b) ndarray
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary returned only if `log` is `True`
+
+
+ Examples
+ --------
+ >>> import ot
+ >>> import scipy as sp
+ >>> a = np.array([0.25] * 4)
+ >>> b = np.array([0.25] * 4)
+ >>> x = np.array([1,2,100,200]).reshape((-1,1))
+ >>> y = np.array([3,2,98,199]).reshape((-1,1))
+ >>> C1 = sp.spatial.distance.cdist(x, x)
+ >>> C2 = sp.spatial.distance.cdist(y, y)
+ >>> np.round(partial_gromov_wasserstein(C1, C2, a, b),2)
+ array([[0. , 0.25, 0. , 0. ],
+ [0.25, 0. , 0. , 0. ],
+ [0. , 0. , 0.25, 0. ],
+ [0. , 0. , 0. , 0.25]])
+ >>> np.round(partial_gromov_wasserstein(C1, C2, a, b, m=0.25),2)
+ array([[0. , 0. , 0. , 0. ],
+ [0. , 0. , 0. , 0. ],
+ [0. , 0. , 0. , 0. ],
+ [0. , 0. , 0. , 0.25]])
+
+ References
+ ----------
+ .. [29] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov-
+ Wasserstein with Applications on Positive-Unlabeled Learning".
+ arXiv preprint arXiv:2002.08276.
+
+ """
+
+ if m is None:
+ m = np.min((np.sum(p), np.sum(q)))
+ elif m < 0:
+ raise ValueError("Problem infeasible. Parameter m should be greater"
+ " than 0.")
+ elif m > np.min((np.sum(p), np.sum(q))):
+ raise ValueError("Problem infeasible. Parameter m should lower or"
+ " equal than min(|a|_1, |b|_1).")
+
+ if G0 is None:
+ G0 = np.outer(p, q)
+
+ dim_G_extended = (len(p) + nb_dummies, len(q) + nb_dummies)
+ q_extended = np.append(q, [(np.sum(p) - m) / nb_dummies] * nb_dummies)
+ p_extended = np.append(p, [(np.sum(q) - m) / nb_dummies] * nb_dummies)
+
+ cpt = 0
+ err = 1
+ eps = 1e-20
+ if log:
+ log = {'err': []}
+
+ while (err > tol and cpt < numItermax):
+
+ Gprev = G0
+
+ M = gwgrad_partial(C1, C2, G0)
+ M[M < eps] = np.quantile(M, thres)
+
+ M_emd = np.zeros(dim_G_extended)
+ M_emd[:len(p), :len(q)] = M
+ M_emd[-nb_dummies:, -nb_dummies:] = np.max(M) * 1e5
+ M_emd = np.asarray(M_emd, dtype=np.float64)
+
+ Gc, logemd = emd(p_extended, q_extended, M_emd, log=True, **kwargs)
+
+ if logemd['warning'] is not None:
+ raise ValueError("Error in the EMD resolution: try to increase the"
+ " number of dummy points")
+
+ G0 = Gc[:len(p), :len(q)]
+
+ if cpt % 10 == 0: # to speed up the computations
+ err = np.linalg.norm(G0 - Gprev)
+ if log:
+ log['err'].append(err)
+ if verbose:
+ if cpt % 200 == 0:
+ print('{:5s}|{:12s}|{:12s}'.format(
+ 'It.', 'Err', 'Loss') + '\n' + '-' * 31)
+ print('{:5d}|{:8e}|{:8e}'.format(cpt, err,
+ gwloss_partial(C1, C2, G0)))
+
+ cpt += 1
+
+ if log:
+ log['partial_gw_dist'] = gwloss_partial(C1, C2, G0)
+ return G0[:len(p), :len(q)], log
+ else:
+ return G0[:len(p), :len(q)]
+
+
+def partial_gromov_wasserstein2(C1, C2, p, q, m=None, nb_dummies=1, G0=None,
+ thres=1, numItermax=1000, tol=1e-7,
+ log=False, verbose=False, **kwargs):
+ r"""
+ Solves the partial optimal transport problem
+ and returns the partial Gromov-Wasserstein discrepancy
+
+ The function considers the following problem:
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F
+
+ s.t. \gamma 1 \leq a \\
+ \gamma^T 1 \leq b \\
+ \gamma\geq 0 \\
+ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\} \\
+
+ where :
+
+ - M is the metric cost matrix
+ - :math:`\Omega` is the entropic regularization term
+ :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are the sample weights
+ - m is the amount of mass to be transported
+
+ The formulation of the problem has been proposed in [29]_
+
+
+ Parameters
+ ----------
+ C1 : ndarray, shape (ns, ns)
+ Metric cost matrix in the source space
+ C2 : ndarray, shape (nt, nt)
+ Metric costfr matrix in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space
+ q : ndarray, shape (nt,)
+ Distribution in the target space
+ m : float, optional
+ Amount of mass to be transported (default: min (|p|_1, |q|_1))
+ nb_dummies : int, optional
+ Number of dummy points to add (avoid instabilities in the EMD solver)
+ G0 : ndarray, shape (ns, nt), optional
+ Initialisation of the transportation matrix
+ thres : float, optional
+ quantile of the gradient matrix to populate the cost matrix when 0
+ (default: 1)
+ numItermax : int, optional
+ Max number of iterations
+ tol : float, optional
+ tolerance for stopping iterations
+ log : bool, optional
+ return log if True
+ verbose : bool, optional
+ Print information along iterations
+ **kwargs : dict
+ parameters can be directly passed to the emd solver
+
+
+ .. warning::
+ When dealing with a large number of points, the EMD solver may face
+ some instabilities, especially when the mass associated to the dummy
+ point is large. To avoid them, increase the number of dummy points
+ (allows a smoother repartition of the mass over the points).
+
+
+ Returns
+ -------
+ partial_gw_dist : (dim_a x dim_b) ndarray
+ partial GW discrepancy
+ log : dict
+ log dictionary returned only if `log` is `True`
+
+
+ Examples
+ --------
+ >>> import ot
+ >>> import scipy as sp
+ >>> a = np.array([0.25] * 4)
+ >>> b = np.array([0.25] * 4)
+ >>> x = np.array([1,2,100,200]).reshape((-1,1))
+ >>> y = np.array([3,2,98,199]).reshape((-1,1))
+ >>> C1 = sp.spatial.distance.cdist(x, x)
+ >>> C2 = sp.spatial.distance.cdist(y, y)
+ >>> np.round(partial_gromov_wasserstein2(C1, C2, a, b),2)
+ 1.69
+ >>> np.round(partial_gromov_wasserstein2(C1, C2, a, b, m=0.25),2)
+ 0.0
+
+ References
+ ----------
+ .. [29] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov-
+ Wasserstein with Applications on Positive-Unlabeled Learning".
+ arXiv preprint arXiv:2002.08276.
+
+ """
+
+ partial_gw, log_gw = partial_gromov_wasserstein(C1, C2, p, q, m,
+ nb_dummies, G0, thres,
+ numItermax, tol, True,
+ verbose, **kwargs)
+
+ log_gw['T'] = partial_gw
+
+ if log:
+ return log_gw['partial_gw_dist'], log_gw
+ else:
+ return log_gw['partial_gw_dist']
+
+
+def entropic_partial_wasserstein(a, b, M, reg, m=None, numItermax=1000,
+ stopThr=1e-100, verbose=False, log=False):
+ r"""
+ Solves the partial optimal transport problem
+ and returns the OT plan
+
+ The function considers the following problem:
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega(\gamma)
+
+ s.t. \gamma 1 \leq a \\
+ \gamma^T 1 \leq b \\
+ \gamma\geq 0 \\
+ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\} \\
+
+ where :
+
+ - M is the metric cost matrix
+ - :math:`\Omega` is the entropic regularization term
+ :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - a and b are the sample weights
+ - m is the amount of mass to be transported
+
+ The formulation of the problem has been proposed in [3]_ (prop. 5)
+
+
+ Parameters
+ ----------
+ a : np.ndarray (dim_a,)
+ Unnormalized histogram of dimension dim_a
+ b : np.ndarray (dim_b,)
+ Unnormalized histograms of dimension dim_b
+ M : np.ndarray (dim_a, dim_b)
+ cost matrix
+ reg : float
+ Regularization term > 0
+ m : float, optional
+ Amount of mass to be transported
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshold on error (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ gamma : (dim_a x dim_b) ndarray
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary returned only if `log` is `True`
+
+
+ Examples
+ --------
+ >>> import ot
+ >>> a = [.1, .2]
+ >>> b = [.1, .1]
+ >>> M = [[0., 1.], [2., 3.]]
+ >>> np.round(entropic_partial_wasserstein(a, b, M, 1, 0.1), 2)
+ array([[0.06, 0.02],
+ [0.01, 0. ]])
+
+ References
+ ----------
+ .. [3] Benamou, J. D., Carlier, G., Cuturi, M., Nenna, L., & Peyré, G.
+ (2015). Iterative Bregman projections for regularized transportation
+ problems. SIAM Journal on Scientific Computing, 37(2), A1111-A1138.
+
+ See Also
+ --------
+ ot.partial.partial_wasserstein: exact Partial Wasserstein
+ """
+
+ a = np.asarray(a, dtype=np.float64)
+ b = np.asarray(b, dtype=np.float64)
+ M = np.asarray(M, dtype=np.float64)
+
+ dim_a, dim_b = M.shape
+ dx = np.ones(dim_a, dtype=np.float64)
+ dy = np.ones(dim_b, dtype=np.float64)
+
+ if len(a) == 0:
+ a = np.ones(dim_a, dtype=np.float64) / dim_a
+ if len(b) == 0:
+ b = np.ones(dim_b, dtype=np.float64) / dim_b
+
+ if m is None:
+ m = np.min((np.sum(a), np.sum(b))) * 1.0
+ if m < 0:
+ raise ValueError("Problem infeasible. Parameter m should be greater"
+ " than 0.")
+ if m > np.min((np.sum(a), np.sum(b))):
+ raise ValueError("Problem infeasible. Parameter m should lower or"
+ " equal than min(|a|_1, |b|_1).")
+
+ log_e = {'err': []}
+
+ # Next 3 lines equivalent to K=np.exp(-M/reg), but faster to compute
+ K = np.empty(M.shape, dtype=M.dtype)
+ np.divide(M, -reg, out=K)
+ np.exp(K, out=K)
+ np.multiply(K, m / np.sum(K), out=K)
+
+ err, cpt = 1, 0
+
+ while (err > stopThr and cpt < numItermax):
+ Kprev = K
+ K1 = np.dot(np.diag(np.minimum(a / np.sum(K, axis=1), dx)), K)
+ K2 = np.dot(K1, np.diag(np.minimum(b / np.sum(K1, axis=0), dy)))
+ K = K2 * (m / np.sum(K2))
+
+ if np.any(np.isnan(K)) or np.any(np.isinf(K)):
+ print('Warning: numerical errors at iteration', cpt)
+ break
+ if cpt % 10 == 0:
+ err = np.linalg.norm(Kprev - K)
+ if log:
+ log_e['err'].append(err)
+ if verbose:
+ if cpt % 200 == 0:
+ print(
+ '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 11)
+ print('{:5d}|{:8e}|'.format(cpt, err))
+
+ cpt = cpt + 1
+ log_e['partial_w_dist'] = np.sum(M * K)
+ if log:
+ return K, log_e
+ else:
+ return K
+
+
+def entropic_partial_gromov_wasserstein(C1, C2, p, q, reg, m=None, G0=None,
+ numItermax=1000, tol=1e-7, log=False,
+ verbose=False):
+ r"""
+ Returns the partial Gromov-Wasserstein transport between (C1,p) and (C2,q)
+
+ The function solves the following optimization problem:
+
+ .. math::
+ GW = \arg\min_{\gamma} \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})\cdot
+ \gamma_{i,j}\cdot\gamma_{k,l} + reg\cdot\Omega(\gamma)
+
+ s.t.
+ \gamma\geq 0 \\
+ \gamma 1 \leq a\\
+ \gamma^T 1 \leq b\\
+ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\}
+
+ where :
+
+ - C1 is the metric cost matrix in the source space
+ - C2 is the metric cost matrix in the target space
+ - p and q are the sample weights
+ - L : quadratic loss function
+ - :math:`\Omega` is the entropic regularization term
+ :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - m is the amount of mass to be transported
+
+ The formulation of the GW problem has been proposed in [12]_ and the
+ partial GW in [29]_.
+
+ Parameters
+ ----------
+ C1 : ndarray, shape (ns, ns)
+ Metric cost matrix in the source space
+ C2 : ndarray, shape (nt, nt)
+ Metric costfr matrix in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space
+ q : ndarray, shape (nt,)
+ Distribution in the target space
+ reg: float
+ entropic regularization parameter
+ m : float, optional
+ Amount of mass to be transported (default: min (|p|_1, |q|_1))
+ G0 : ndarray, shape (ns, nt), optional
+ Initialisation of the transportation matrix
+ numItermax : int, optional
+ Max number of iterations
+ tol : float, optional
+ Stop threshold on error (>0)
+ log : bool, optional
+ return log if True
+ verbose : bool, optional
+ Print information along iterations
+
+ Examples
+ --------
+ >>> import ot
+ >>> import scipy as sp
+ >>> a = np.array([0.25] * 4)
+ >>> b = np.array([0.25] * 4)
+ >>> x = np.array([1,2,100,200]).reshape((-1,1))
+ >>> y = np.array([3,2,98,199]).reshape((-1,1))
+ >>> C1 = sp.spatial.distance.cdist(x, x)
+ >>> C2 = sp.spatial.distance.cdist(y, y)
+ >>> np.round(entropic_partial_gromov_wasserstein(C1, C2, a, b,50), 2)
+ array([[0.12, 0.13, 0. , 0. ],
+ [0.13, 0.12, 0. , 0. ],
+ [0. , 0. , 0.25, 0. ],
+ [0. , 0. , 0. , 0.25]])
+ >>> np.round(entropic_partial_gromov_wasserstein(C1, C2, a, b, 50, m=0.25), 2)
+ array([[0.02, 0.03, 0. , 0.03],
+ [0.03, 0.03, 0. , 0.03],
+ [0. , 0. , 0.03, 0. ],
+ [0.02, 0.02, 0. , 0.03]])
+
+ Returns
+ -------
+ :math: `gamma` : (dim_a x dim_b) ndarray
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary returned only if `log` is `True`
+
+ References
+ ----------
+ .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
+ .. [29] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov-
+ Wasserstein with Applications on Positive-Unlabeled Learning".
+ arXiv preprint arXiv:2002.08276.
+
+ See Also
+ --------
+ ot.partial.partial_gromov_wasserstein: exact Partial Gromov-Wasserstein
+ """
+
+ if G0 is None:
+ G0 = np.outer(p, q)
+
+ if m is None:
+ m = np.min((np.sum(p), np.sum(q)))
+ elif m < 0:
+ raise ValueError("Problem infeasible. Parameter m should be greater"
+ " than 0.")
+ elif m > np.min((np.sum(p), np.sum(q))):
+ raise ValueError("Problem infeasible. Parameter m should lower or"
+ " equal than min(|a|_1, |b|_1).")
+
+ cpt = 0
+ err = 1
+
+ loge = {'err': []}
+
+ while (err > tol and cpt < numItermax):
+ Gprev = G0
+ M_entr = gwgrad_partial(C1, C2, G0)
+ G0 = entropic_partial_wasserstein(p, q, M_entr, reg, m)
+ if cpt % 10 == 0: # to speed up the computations
+ err = np.linalg.norm(G0 - Gprev)
+ if log:
+ loge['err'].append(err)
+ if verbose:
+ if cpt % 200 == 0:
+ print('{:5s}|{:12s}|{:12s}'.format(
+ 'It.', 'Err', 'Loss') + '\n' + '-' * 31)
+ print('{:5d}|{:8e}|{:8e}'.format(cpt, err,
+ gwloss_partial(C1, C2, G0)))
+
+ cpt += 1
+
+ if log:
+ loge['partial_gw_dist'] = gwloss_partial(C1, C2, G0)
+ return G0, loge
+ else:
+ return G0
+
+
+def entropic_partial_gromov_wasserstein2(C1, C2, p, q, reg, m=None, G0=None,
+ numItermax=1000, tol=1e-7, log=False,
+ verbose=False):
+ r"""
+ Returns the partial Gromov-Wasserstein discrepancy between (C1,p) and
+ (C2,q)
+
+ The function solves the following optimization problem:
+
+ .. math::
+ GW = \arg\min_{\gamma} \sum_{i,j,k,l} L(C1_{i,k},C2_{j,l})\cdot
+ \gamma_{i,j}\cdot\gamma_{k,l} + reg\cdot\Omega(\gamma)
+
+ s.t.
+ \gamma\geq 0 \\
+ \gamma 1 \leq a\\
+ \gamma^T 1 \leq b\\
+ 1^T \gamma^T 1 = m \leq \min\{\|a\|_1, \|b\|_1\}
+
+ where :
+
+ - C1 is the metric cost matrix in the source space
+ - C2 is the metric cost matrix in the target space
+ - p and q are the sample weights
+ - L : quadratic loss function
+ - :math:`\Omega` is the entropic regularization term
+ :math:`\Omega=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - m is the amount of mass to be transported
+
+ The formulation of the GW problem has been proposed in [12]_ and the
+ partial GW in [29]_.
+
+
+ Parameters
+ ----------
+ C1 : ndarray, shape (ns, ns)
+ Metric cost matrix in the source space
+ C2 : ndarray, shape (nt, nt)
+ Metric costfr matrix in the target space
+ p : ndarray, shape (ns,)
+ Distribution in the source space
+ q : ndarray, shape (nt,)
+ Distribution in the target space
+ reg: float
+ entropic regularization parameter
+ m : float, optional
+ Amount of mass to be transported (default: min (|p|_1, |q|_1))
+ G0 : ndarray, shape (ns, nt), optional
+ Initialisation of the transportation matrix
+ numItermax : int, optional
+ Max number of iterations
+ tol : float, optional
+ Stop threshold on error (>0)
+ log : bool, optional
+ return log if True
+ verbose : bool, optional
+ Print information along iterations
+
+
+ Returns
+ -------
+ partial_gw_dist: float
+ Gromov-Wasserstein distance
+ log : dict
+ log dictionary returned only if `log` is `True`
+
+ Examples
+ --------
+ >>> import ot
+ >>> import scipy as sp
+ >>> a = np.array([0.25] * 4)
+ >>> b = np.array([0.25] * 4)
+ >>> x = np.array([1,2,100,200]).reshape((-1,1))
+ >>> y = np.array([3,2,98,199]).reshape((-1,1))
+ >>> C1 = sp.spatial.distance.cdist(x, x)
+ >>> C2 = sp.spatial.distance.cdist(y, y)
+ >>> np.round(entropic_partial_gromov_wasserstein2(C1, C2, a, b,50), 2)
+ 1.87
+
+ References
+ ----------
+ .. [12] Peyré, Gabriel, Marco Cuturi, and Justin Solomon,
+ "Gromov-Wasserstein averaging of kernel and distance matrices."
+ International Conference on Machine Learning (ICML). 2016.
+ .. [29] Chapel, L., Alaya, M., Gasso, G. (2019). "Partial Gromov-
+ Wasserstein with Applications on Positive-Unlabeled Learning".
+ arXiv preprint arXiv:2002.08276.
+ """
+
+ partial_gw, log_gw = entropic_partial_gromov_wasserstein(C1, C2, p, q, reg,
+ m, G0, numItermax,
+ tol, True,
+ verbose)
+
+ log_gw['T'] = partial_gw
+
+ if log:
+ return log_gw['partial_gw_dist'], log_gw
+ else:
+ return log_gw['partial_gw_dist']
diff --git a/ot/plot.py b/ot/plot.py
index f403e98..ad436b4 100644
--- a/ot/plot.py
+++ b/ot/plot.py
@@ -78,9 +78,10 @@ def plot2D_samples_mat(xs, xt, G, thr=1e-8, **kwargs):
thr : float, optional
threshold above which the line is drawn
**kwargs : dict
- paameters given to the plot functions (default color is black if
+ parameters given to the plot functions (default color is black if
nothing given)
"""
+
if ('color' not in kwargs) and ('c' not in kwargs):
kwargs['color'] = 'k'
mx = G.max()
diff --git a/ot/smooth.py b/ot/smooth.py
index 5a8e4b5..81f6a3e 100644
--- a/ot/smooth.py
+++ b/ot/smooth.py
@@ -26,7 +26,9 @@
# Remi Flamary <remi.flamary@unice.fr>
"""
-Implementation of
+Smooth and Sparse Optimal Transport solvers (KL an L2 reg.)
+
+Implementation of :
Smooth and Sparse Optimal Transport.
Mathieu Blondel, Vivien Seguy, Antoine Rolet.
In Proc. of AISTATS 2018.
diff --git a/ot/unbalanced.py b/ot/unbalanced.py
index d516dfc..e37f10c 100644
--- a/ot/unbalanced.py
+++ b/ot/unbalanced.py
@@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
"""
-Regularized Unbalanced OT
+Regularized Unbalanced OT solvers
"""
# Author: Hicham Janati <hicham.janati@inria.fr>
@@ -384,10 +384,9 @@ def sinkhorn_knopp_unbalanced(a, b, M, reg, reg_m, numItermax=1000,
fi = reg_m / (reg_m + reg)
- cpt = 0
err = 1.
- while (err > stopThr and cpt < numItermax):
+ for i in range(numItermax):
uprev = u
vprev = v
@@ -401,28 +400,27 @@ def sinkhorn_knopp_unbalanced(a, b, M, reg, reg_m, numItermax=1000,
or np.any(np.isinf(u)) or np.any(np.isinf(v))):
# we have reached the machine precision
# come back to previous solution and quit loop
- warnings.warn('Numerical errors at iteration %s' % cpt)
+ warnings.warn('Numerical errors at iteration %s' % i)
u = uprev
v = vprev
break
- if cpt % 10 == 0:
- # we can speed up the process by checking for the error only all
- # the 10th iterations
- err_u = abs(u - uprev).max() / max(abs(u).max(), abs(uprev).max(), 1.)
- err_v = abs(v - vprev).max() / max(abs(v).max(), abs(vprev).max(), 1.)
- err = 0.5 * (err_u + err_v)
- if log:
- log['err'].append(err)
+
+ err_u = abs(u - uprev).max() / max(abs(u).max(), abs(uprev).max(), 1.)
+ err_v = abs(v - vprev).max() / max(abs(v).max(), abs(vprev).max(), 1.)
+ err = 0.5 * (err_u + err_v)
+ if log:
+ log['err'].append(err)
if verbose:
- if cpt % 200 == 0:
+ if i % 50 == 0:
print(
'{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
- print('{:5d}|{:8e}|'.format(cpt, err))
- cpt += 1
+ print('{:5d}|{:8e}|'.format(i, err))
+ if err < stopThr:
+ break
if log:
- log['logu'] = np.log(u + 1e-16)
- log['logv'] = np.log(v + 1e-16)
+ log['logu'] = np.log(u + 1e-300)
+ log['logv'] = np.log(v + 1e-300)
if n_hists: # return only loss
res = np.einsum('ik,ij,jk,ij->k', u, K, v, M)
@@ -747,8 +745,8 @@ def barycenter_unbalanced_stabilized(A, M, reg, reg_m, weights=None, tau=1e3,
alpha = np.zeros(dim)
beta = np.zeros(dim)
q = np.ones(dim) / dim
- while (err > stopThr and cpt < numItermax):
- qprev = q
+ for i in range(numItermax):
+ qprev = q.copy()
Kv = K.dot(v)
f_alpha = np.exp(- alpha / (reg + reg_m))
f_beta = np.exp(- beta / (reg + reg_m))
@@ -777,7 +775,7 @@ def barycenter_unbalanced_stabilized(A, M, reg, reg_m, weights=None, tau=1e3,
warnings.warn('Numerical errors at iteration %s' % cpt)
q = qprev
break
- if (cpt % 10 == 0 and not absorbing) or cpt == 0:
+ if (i % 10 == 0 and not absorbing) or i == 0:
# we can speed up the process by checking for the error only all
# the 10th iterations
err = abs(q - qprev).max() / max(abs(q).max(),
@@ -785,20 +783,21 @@ def barycenter_unbalanced_stabilized(A, M, reg, reg_m, weights=None, tau=1e3,
if log:
log['err'].append(err)
if verbose:
- if cpt % 50 == 0:
+ if i % 50 == 0:
print(
'{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
- print('{:5d}|{:8e}|'.format(cpt, err))
+ print('{:5d}|{:8e}|'.format(i, err))
+ if err < stopThr:
+ break
- cpt += 1
if err > stopThr:
warnings.warn("Stabilized Unbalanced Sinkhorn did not converge." +
"Try a larger entropy `reg` or a lower mass `reg_m`." +
"Or a larger absorption threshold `tau`.")
if log:
- log['niter'] = cpt
- log['logu'] = np.log(u + 1e-16)
- log['logv'] = np.log(v + 1e-16)
+ log['niter'] = i
+ log['logu'] = np.log(u + 1e-300)
+ log['logv'] = np.log(v + 1e-300)
return q, log
else:
return q
@@ -882,15 +881,15 @@ def barycenter_unbalanced_sinkhorn(A, M, reg, reg_m, weights=None,
fi = reg_m / (reg_m + reg)
- v = np.ones((dim, n_hists)) / dim
- u = np.ones((dim, 1)) / dim
-
- cpt = 0
+ v = np.ones((dim, n_hists))
+ u = np.ones((dim, 1))
+ q = np.ones(dim)
err = 1.
- while (err > stopThr and cpt < numItermax):
- uprev = u
- vprev = v
+ for i in range(numItermax):
+ uprev = u.copy()
+ vprev = v.copy()
+ qprev = q.copy()
Kv = K.dot(v)
u = (A / Kv) ** fi
@@ -905,31 +904,30 @@ def barycenter_unbalanced_sinkhorn(A, M, reg, reg_m, weights=None,
or np.any(np.isinf(u)) or np.any(np.isinf(v))):
# we have reached the machine precision
# come back to previous solution and quit loop
- warnings.warn('Numerical errors at iteration %s' % cpt)
+ warnings.warn('Numerical errors at iteration %s' % i)
u = uprev
v = vprev
+ q = qprev
break
- if cpt % 10 == 0:
- # we can speed up the process by checking for the error only all
- # the 10th iterations
- err_u = abs(u - uprev).max()
- err_u /= max(abs(u).max(), abs(uprev).max(), 1.)
- err_v = abs(v - vprev).max()
- err_v /= max(abs(v).max(), abs(vprev).max(), 1.)
- err = 0.5 * (err_u + err_v)
- if log:
- log['err'].append(err)
- if verbose:
- if cpt % 50 == 0:
- print(
- '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
- print('{:5d}|{:8e}|'.format(cpt, err))
+ # compute change in barycenter
+ err = abs(q - qprev).max()
+ err /= max(abs(q).max(), abs(qprev).max(), 1.)
+ if log:
+ log['err'].append(err)
+ # if barycenter did not change + at least 10 iterations - stop
+ if err < stopThr and i > 10:
+ break
+
+ if verbose:
+ if i % 10 == 0:
+ print(
+ '{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(i, err))
- cpt += 1
if log:
- log['niter'] = cpt
- log['logu'] = np.log(u + 1e-16)
- log['logv'] = np.log(v + 1e-16)
+ log['niter'] = i
+ log['logu'] = np.log(u + 1e-300)
+ log['logv'] = np.log(v + 1e-300)
return q, log
else:
return q
@@ -1002,12 +1000,14 @@ def barycenter_unbalanced(A, M, reg, reg_m, method="sinkhorn", weights=None,
if method.lower() == 'sinkhorn':
return barycenter_unbalanced_sinkhorn(A, M, reg, reg_m,
+ weights=weights,
numItermax=numItermax,
stopThr=stopThr, verbose=verbose,
log=log, **kwargs)
elif method.lower() == 'sinkhorn_stabilized':
return barycenter_unbalanced_stabilized(A, M, reg, reg_m,
+ weights=weights,
numItermax=numItermax,
stopThr=stopThr,
verbose=verbose,
@@ -1015,6 +1015,7 @@ def barycenter_unbalanced(A, M, reg, reg_m, method="sinkhorn", weights=None,
elif method.lower() in ['sinkhorn_reg_scaling']:
warnings.warn('Method not implemented yet. Using classic Sinkhorn Knopp')
return barycenter_unbalanced(A, M, reg, reg_m,
+ weights=weights,
numItermax=numItermax,
stopThr=stopThr, verbose=verbose,
log=log, **kwargs)
diff --git a/ot/utils.py b/ot/utils.py
index b71458b..f9911a1 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -49,6 +49,12 @@ def kernel(x1, x2, method='gaussian', sigma=1, **kwargs):
return K
+def laplacian(x):
+ """Compute Laplacian matrix"""
+ L = np.diag(np.sum(x, axis=0)) - x
+ return L
+
+
def unif(n):
""" return a uniform histogram of length n (simplex)
@@ -200,6 +206,28 @@ def dots(*args):
return reduce(np.dot, args)
+def label_normalization(y, start=0):
+ """ Transform labels to start at a given value
+
+ Parameters
+ ----------
+ y : array-like, shape (n, )
+ The vector of labels to be normalized.
+ start : int
+ Desired value for the smallest label in y (default=0)
+
+ Returns
+ -------
+ y : array-like, shape (n1, )
+ The input vector of labels normalized according to given start value.
+ """
+
+ diff = np.min(np.unique(y)) - start
+ if diff != 0:
+ y -= diff
+ return y
+
+
def fun(f, q_in, q_out):
""" Utility function for parmap with no serializing problems """
while True: