summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--README.md3
-rw-r--r--examples/plot_otda_jcpot.py171
-rw-r--r--examples/plot_otda_laplacian.py127
-rw-r--r--ot/bregman.py228
-rw-r--r--ot/da.py425
-rw-r--r--ot/datasets.py17
-rw-r--r--ot/lp/__init__.py2
-rw-r--r--ot/plot.py3
-rw-r--r--ot/utils.py6
-rw-r--r--test/test_da.py112
10 files changed, 1038 insertions, 56 deletions
diff --git a/README.md b/README.md
index c115776..f439405 100644
--- a/README.md
+++ b/README.md
@@ -29,6 +29,7 @@ It provides the following solvers:
* Non regularized free support Wasserstein barycenters [20].
* Unbalanced OT with KL relaxation distance and barycenter [10, 25].
* Screening Sinkhorn Algorithm for OT [26].
+* JCPOT algorithm for multi-source target shift [27].
Some demonstrations (both in Python and Jupyter Notebook format) are available in the examples folder.
@@ -257,3 +258,5 @@ You can also post bug reports and feature requests in Github issues. Make sure t
[25] Frogner C., Zhang C., Mobahi H., Araya-Polo M., Poggio T. (2015). [Learning with a Wasserstein Loss](http://cbcl.mit.edu/wasserstein/) Advances in Neural Information Processing Systems (NIPS).
[26] Alaya M. Z., Bérar M., Gasso G., Rakotomamonjy A. (2019). [Screening Sinkhorn Algorithm for Regularized Optimal Transport](https://papers.nips.cc/paper/9386-screening-sinkhorn-algorithm-for-regularized-optimal-transport), Advances in Neural Information Processing Systems 33 (NeurIPS).
+
+[27] Redko I., Courty N., Flamary R., Tuia D. (2019). [Optimal Transport for Multi-source Domain Adaptation under Target Shift](http://proceedings.mlr.press/v89/redko19a.html), Proceedings of the Twenty-Second International Conference on Artificial Intelligence and Statistics (AISTATS) 22, 2019. \ No newline at end of file
diff --git a/examples/plot_otda_jcpot.py b/examples/plot_otda_jcpot.py
new file mode 100644
index 0000000..ce6b88f
--- /dev/null
+++ b/examples/plot_otda_jcpot.py
@@ -0,0 +1,171 @@
+# -*- coding: utf-8 -*-
+"""
+========================
+OT for multi-source target shift
+========================
+
+This example introduces a target shift problem with two 2D source and 1 target domain.
+
+"""
+
+# Authors: Remi Flamary <remi.flamary@unice.fr>
+# Ievgen Redko <ievgen.redko@univ-st-etienne.fr>
+#
+# License: MIT License
+
+import pylab as pl
+import numpy as np
+import ot
+from ot.datasets import make_data_classif
+
+##############################################################################
+# Generate data
+# -------------
+n = 50
+sigma = 0.3
+np.random.seed(1985)
+
+p1 = .2
+dec1 = [0, 2]
+
+p2 = .9
+dec2 = [0, -2]
+
+pt = .4
+dect = [4, 0]
+
+xs1, ys1 = make_data_classif('2gauss_prop', n, nz=sigma, p=p1, bias=dec1)
+xs2, ys2 = make_data_classif('2gauss_prop', n + 1, nz=sigma, p=p2, bias=dec2)
+xt, yt = make_data_classif('2gauss_prop', n, nz=sigma, p=pt, bias=dect)
+
+all_Xr = [xs1, xs2]
+all_Yr = [ys1, ys2]
+# %%
+
+da = 1.5
+
+
+def plot_ax(dec, name):
+ pl.plot([dec[0], dec[0]], [dec[1] - da, dec[1] + da], 'k', alpha=0.5)
+ pl.plot([dec[0] - da, dec[0] + da], [dec[1], dec[1]], 'k', alpha=0.5)
+ pl.text(dec[0] - .5, dec[1] + 2, name)
+
+
+##############################################################################
+# Fig 1 : plots source and target samples
+# ---------------------------------------
+
+pl.figure(1)
+pl.clf()
+plot_ax(dec1, 'Source 1')
+plot_ax(dec2, 'Source 2')
+plot_ax(dect, 'Target')
+pl.scatter(xs1[:, 0], xs1[:, 1], c=ys1, s=35, marker='x', cmap='Set1', vmax=9,
+ label='Source 1 ({:1.2f}, {:1.2f})'.format(1 - p1, p1))
+pl.scatter(xs2[:, 0], xs2[:, 1], c=ys2, s=35, marker='+', cmap='Set1', vmax=9,
+ label='Source 2 ({:1.2f}, {:1.2f})'.format(1 - p2, p2))
+pl.scatter(xt[:, 0], xt[:, 1], c=yt, s=35, marker='o', cmap='Set1', vmax=9,
+ label='Target ({:1.2f}, {:1.2f})'.format(1 - pt, pt))
+pl.title('Data')
+
+pl.legend()
+pl.axis('equal')
+pl.axis('off')
+
+##############################################################################
+# Instantiate Sinkhorn transport algorithm and fit them for all source domains
+# ----------------------------------------------------------------------------
+ot_sinkhorn = ot.da.SinkhornTransport(reg_e=1e-1, metric='sqeuclidean')
+
+
+def print_G(G, xs, ys, xt):
+ for i in range(G.shape[0]):
+ for j in range(G.shape[1]):
+ if G[i, j] > 5e-4:
+ if ys[i]:
+ c = 'b'
+ else:
+ c = 'r'
+ pl.plot([xs[i, 0], xt[j, 0]], [xs[i, 1], xt[j, 1]], c, alpha=.2)
+
+
+##############################################################################
+# Fig 2 : plot optimal couplings and transported samples
+# ------------------------------------------------------
+pl.figure(2)
+pl.clf()
+plot_ax(dec1, 'Source 1')
+plot_ax(dec2, 'Source 2')
+plot_ax(dect, 'Target')
+print_G(ot_sinkhorn.fit(Xs=xs1, Xt=xt).coupling_, xs1, ys1, xt)
+print_G(ot_sinkhorn.fit(Xs=xs2, Xt=xt).coupling_, xs2, ys2, xt)
+pl.scatter(xs1[:, 0], xs1[:, 1], c=ys1, s=35, marker='x', cmap='Set1', vmax=9)
+pl.scatter(xs2[:, 0], xs2[:, 1], c=ys2, s=35, marker='+', cmap='Set1', vmax=9)
+pl.scatter(xt[:, 0], xt[:, 1], c=yt, s=35, marker='o', cmap='Set1', vmax=9)
+
+pl.plot([], [], 'r', alpha=.2, label='Mass from Class 1')
+pl.plot([], [], 'b', alpha=.2, label='Mass from Class 2')
+
+pl.title('Independent OT')
+
+pl.legend()
+pl.axis('equal')
+pl.axis('off')
+
+##############################################################################
+# Instantiate JCPOT adaptation algorithm and fit it
+# ----------------------------------------------------------------------------
+otda = ot.da.JCPOTTransport(reg_e=1e-2, max_iter=1000, metric='sqeuclidean', tol=1e-9, verbose=True, log=True)
+otda.fit(all_Xr, all_Yr, xt)
+
+ws1 = otda.proportions_.dot(otda.log_['all_domains'][0]['D2'])
+ws2 = otda.proportions_.dot(otda.log_['all_domains'][1]['D2'])
+
+pl.figure(3)
+pl.clf()
+plot_ax(dec1, 'Source 1')
+plot_ax(dec2, 'Source 2')
+plot_ax(dect, 'Target')
+print_G(ot.bregman.sinkhorn(ws1, [], otda.log_['all_domains'][0]['M'], reg=1e-2), xs1, ys1, xt)
+print_G(ot.bregman.sinkhorn(ws2, [], otda.log_['all_domains'][1]['M'], reg=1e-2), xs2, ys2, xt)
+pl.scatter(xs1[:, 0], xs1[:, 1], c=ys1, s=35, marker='x', cmap='Set1', vmax=9)
+pl.scatter(xs2[:, 0], xs2[:, 1], c=ys2, s=35, marker='+', cmap='Set1', vmax=9)
+pl.scatter(xt[:, 0], xt[:, 1], c=yt, s=35, marker='o', cmap='Set1', vmax=9)
+
+pl.plot([], [], 'r', alpha=.2, label='Mass from Class 1')
+pl.plot([], [], 'b', alpha=.2, label='Mass from Class 2')
+
+pl.title('OT with prop estimation ({:1.3f},{:1.3f})'.format(otda.proportions_[0], otda.proportions_[1]))
+
+pl.legend()
+pl.axis('equal')
+pl.axis('off')
+
+##############################################################################
+# Run oracle transport algorithm with known proportions
+# ----------------------------------------------------------------------------
+h_res = np.array([1 - pt, pt])
+
+ws1 = h_res.dot(otda.log_['all_domains'][0]['D2'])
+ws2 = h_res.dot(otda.log_['all_domains'][1]['D2'])
+
+pl.figure(4)
+pl.clf()
+plot_ax(dec1, 'Source 1')
+plot_ax(dec2, 'Source 2')
+plot_ax(dect, 'Target')
+print_G(ot.bregman.sinkhorn(ws1, [], otda.log_['all_domains'][0]['M'], reg=1e-2), xs1, ys1, xt)
+print_G(ot.bregman.sinkhorn(ws2, [], otda.log_['all_domains'][1]['M'], reg=1e-2), xs2, ys2, xt)
+pl.scatter(xs1[:, 0], xs1[:, 1], c=ys1, s=35, marker='x', cmap='Set1', vmax=9)
+pl.scatter(xs2[:, 0], xs2[:, 1], c=ys2, s=35, marker='+', cmap='Set1', vmax=9)
+pl.scatter(xt[:, 0], xt[:, 1], c=yt, s=35, marker='o', cmap='Set1', vmax=9)
+
+pl.plot([], [], 'r', alpha=.2, label='Mass from Class 1')
+pl.plot([], [], 'b', alpha=.2, label='Mass from Class 2')
+
+pl.title('OT with known proportion ({:1.1f},{:1.1f})'.format(h_res[0], h_res[1]))
+
+pl.legend()
+pl.axis('equal')
+pl.axis('off')
+pl.show()
diff --git a/examples/plot_otda_laplacian.py b/examples/plot_otda_laplacian.py
new file mode 100644
index 0000000..965380c
--- /dev/null
+++ b/examples/plot_otda_laplacian.py
@@ -0,0 +1,127 @@
+# -*- coding: utf-8 -*-
+"""
+========================
+OT for domain adaptation
+========================
+
+This example introduces a domain adaptation in a 2D setting and OTDA
+approache with Laplacian regularization.
+
+"""
+
+# Authors: Ievgen Redko <ievgen.redko@univ-st-etienne.fr>
+
+# License: MIT License
+
+import matplotlib.pylab as pl
+import ot
+
+##############################################################################
+# Generate data
+# -------------
+
+n_source_samples = 150
+n_target_samples = 150
+
+Xs, ys = ot.datasets.make_data_classif('3gauss', n_source_samples)
+Xt, yt = ot.datasets.make_data_classif('3gauss2', n_target_samples)
+
+
+##############################################################################
+# Instantiate the different transport algorithms and fit them
+# -----------------------------------------------------------
+
+# EMD Transport
+ot_emd = ot.da.EMDTransport()
+ot_emd.fit(Xs=Xs, Xt=Xt)
+
+# Sinkhorn Transport
+ot_sinkhorn = ot.da.SinkhornTransport(reg_e=.01)
+ot_sinkhorn.fit(Xs=Xs, Xt=Xt)
+
+# EMD Transport with Laplacian regularization
+ot_emd_laplace = ot.da.EMDLaplaceTransport(reg_lap=100, reg_src=1)
+ot_emd_laplace.fit(Xs=Xs, Xt=Xt)
+
+# transport source samples onto target samples
+transp_Xs_emd = ot_emd.transform(Xs=Xs)
+transp_Xs_sinkhorn = ot_sinkhorn.transform(Xs=Xs)
+transp_Xs_emd_laplace = ot_emd_laplace.transform(Xs=Xs)
+
+##############################################################################
+# Fig 1 : plots source and target samples
+# ---------------------------------------
+
+pl.figure(1, figsize=(10, 5))
+pl.subplot(1, 2, 1)
+pl.scatter(Xs[:, 0], Xs[:, 1], c=ys, marker='+', label='Source samples')
+pl.xticks([])
+pl.yticks([])
+pl.legend(loc=0)
+pl.title('Source samples')
+
+pl.subplot(1, 2, 2)
+pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o', label='Target samples')
+pl.xticks([])
+pl.yticks([])
+pl.legend(loc=0)
+pl.title('Target samples')
+pl.tight_layout()
+
+
+##############################################################################
+# Fig 2 : plot optimal couplings and transported samples
+# ------------------------------------------------------
+
+param_img = {'interpolation': 'nearest'}
+
+pl.figure(2, figsize=(15, 8))
+pl.subplot(2, 3, 1)
+pl.imshow(ot_emd.coupling_, **param_img)
+pl.xticks([])
+pl.yticks([])
+pl.title('Optimal coupling\nEMDTransport')
+
+pl.figure(2, figsize=(15, 8))
+pl.subplot(2, 3, 2)
+pl.imshow(ot_sinkhorn.coupling_, **param_img)
+pl.xticks([])
+pl.yticks([])
+pl.title('Optimal coupling\nSinkhornTransport')
+
+pl.subplot(2, 3, 3)
+pl.imshow(ot_emd_laplace.coupling_, **param_img)
+pl.xticks([])
+pl.yticks([])
+pl.title('Optimal coupling\nEMDLaplaceTransport')
+
+pl.subplot(2, 3, 4)
+pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o',
+ label='Target samples', alpha=0.3)
+pl.scatter(transp_Xs_emd[:, 0], transp_Xs_emd[:, 1], c=ys,
+ marker='+', label='Transp samples', s=30)
+pl.xticks([])
+pl.yticks([])
+pl.title('Transported samples\nEmdTransport')
+pl.legend(loc="lower left")
+
+pl.subplot(2, 3, 5)
+pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o',
+ label='Target samples', alpha=0.3)
+pl.scatter(transp_Xs_sinkhorn[:, 0], transp_Xs_sinkhorn[:, 1], c=ys,
+ marker='+', label='Transp samples', s=30)
+pl.xticks([])
+pl.yticks([])
+pl.title('Transported samples\nSinkhornTransport')
+
+pl.subplot(2, 3, 6)
+pl.scatter(Xt[:, 0], Xt[:, 1], c=yt, marker='o',
+ label='Target samples', alpha=0.3)
+pl.scatter(transp_Xs_emd_laplace[:, 0], transp_Xs_emd_laplace[:, 1], c=ys,
+ marker='+', label='Transp samples', s=30)
+pl.xticks([])
+pl.yticks([])
+pl.title('Transported samples\nEMDLaplaceTransport')
+pl.tight_layout()
+
+pl.show()
diff --git a/ot/bregman.py b/ot/bregman.py
index d5e3563..951d3ce 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -10,6 +10,7 @@ Bregman projections for regularized OT
# Hicham Janati <hicham.janati@inria.fr>
# Mokhtar Z. Alaya <mokhtarzahdi.alaya@gmail.com>
# Alexander Tong <alexander.tong@yale.edu>
+# Ievgen Redko <ievgen.redko@univ-st-etienne.fr>
#
# License: MIT License
@@ -539,12 +540,12 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False,
old_v = v[i_2]
v[i_2] = b[i_2] / (K[:, i_2].T.dot(u))
G[:, i_2] = u * K[:, i_2] * v[i_2]
- #aviol = (G@one_m - a)
- #aviol_2 = (G.T@one_n - b)
+ # aviol = (G@one_m - a)
+ # aviol_2 = (G.T@one_n - b)
viol += (-old_v + v[i_2]) * K[:, i_2] * u
viol_2[i_2] = v[i_2] * K[:, i_2].dot(u) - b[i_2]
- #print('b',np.max(abs(aviol -viol)),np.max(abs(aviol_2 - viol_2)))
+ # print('b',np.max(abs(aviol -viol)),np.max(abs(aviol_2 - viol_2)))
if stopThr_val <= stopThr:
break
@@ -715,7 +716,7 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
if np.abs(u).max() > tau or np.abs(v).max() > tau:
if n_hists:
alpha, beta = alpha + reg * \
- np.max(np.log(u), 1), beta + reg * np.max(np.log(v))
+ np.max(np.log(u), 1), beta + reg * np.max(np.log(v))
else:
alpha, beta = alpha + reg * np.log(u), beta + reg * np.log(v)
if n_hists:
@@ -940,7 +941,7 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4,
# the 10th iterations
transp = G
err = np.linalg.norm(
- (np.sum(transp, axis=0) - b))**2 + np.linalg.norm((np.sum(transp, axis=1) - a))**2
+ (np.sum(transp, axis=0) - b)) ** 2 + np.linalg.norm((np.sum(transp, axis=1) - a)) ** 2
if log:
log['err'].append(err)
@@ -966,7 +967,7 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4,
def geometricBar(weights, alldistribT):
"""return the weighted geometric mean of distributions"""
- assert(len(weights) == alldistribT.shape[1])
+ assert (len(weights) == alldistribT.shape[1])
return np.exp(np.dot(np.log(alldistribT), weights.T))
@@ -1108,7 +1109,7 @@ def barycenter_sinkhorn(A, M, reg, weights=None, numItermax=1000,
if weights is None:
weights = np.ones(A.shape[1]) / A.shape[1]
else:
- assert(len(weights) == A.shape[1])
+ assert (len(weights) == A.shape[1])
if log:
log = {'err': []}
@@ -1206,7 +1207,7 @@ def barycenter_stabilized(A, M, reg, tau=1e10, weights=None, numItermax=1000,
if weights is None:
weights = np.ones(n_hists) / n_hists
else:
- assert(len(weights) == A.shape[1])
+ assert (len(weights) == A.shape[1])
if log:
log = {'err': []}
@@ -1334,7 +1335,7 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000,
if weights is None:
weights = np.ones(A.shape[0]) / A.shape[0]
else:
- assert(len(weights) == A.shape[0])
+ assert (len(weights) == A.shape[0])
if log:
log = {'err': []}
@@ -1350,11 +1351,11 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000,
# this is equivalent to blurring on horizontal then vertical directions
t = np.linspace(0, 1, A.shape[1])
[Y, X] = np.meshgrid(t, t)
- xi1 = np.exp(-(X - Y)**2 / reg)
+ xi1 = np.exp(-(X - Y) ** 2 / reg)
t = np.linspace(0, 1, A.shape[2])
[Y, X] = np.meshgrid(t, t)
- xi2 = np.exp(-(X - Y)**2 / reg)
+ xi2 = np.exp(-(X - Y) ** 2 / reg)
def K(x):
return np.dot(np.dot(xi1, x), xi2)
@@ -1502,6 +1503,164 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
return np.sum(K0, axis=1)
+def jcpot_barycenter(Xs, Ys, Xt, reg, metric='sqeuclidean', numItermax=100,
+ stopThr=1e-6, verbose=False, log=False, **kwargs):
+ r'''Joint OT and proportion estimation for multi-source target shift as proposed in [27]
+
+ The function solves the following optimization problem:
+
+ .. math::
+
+ \mathbf{h} = arg\min_{\mathbf{h}}\quad \sum_{k=1}^{K} \lambda_k
+ W_{reg}((\mathbf{D}_2^{(k)} \mathbf{h})^T, \mathbf{a})
+
+ s.t. \ \forall k, \mathbf{D}_1^{(k)} \gamma_k \mathbf{1}_n= \mathbf{h}
+
+ where :
+
+ - :math:`\lambda_k` is the weight of k-th source domain
+ - :math:`W_{reg}(\cdot,\cdot)` is the entropic regularized Wasserstein distance (see ot.bregman.sinkhorn)
+ - :math:`\mathbf{D}_2^{(k)}` is a matrix of weights related to k-th source domain defined as in [p. 5, 27], its expected shape is `(n_k, C)` where `n_k` is the number of elements in the k-th source domain and `C` is the number of classes
+ - :math:`\mathbf{h}` is a vector of estimated proportions in the target domain of size C
+ - :math:`\mathbf{a}` is a uniform vector of weights in the target domain of size `n`
+ - :math:`\mathbf{D}_1^{(k)}` is a matrix of class assignments defined as in [p. 5, 27], its expected shape is `(n_k, C)`
+
+ The problem consist in solving a Wasserstein barycenter problem to estimate the proportions :math:`\mathbf{h}` in the target domain.
+
+ The algorithm used for solving the problem is the Iterative Bregman projections algorithm
+ with two sets of marginal constraints related to the unknown vector :math:`\mathbf{h}` and uniform tarhet distribution.
+
+ Parameters
+ ----------
+ Xs : list of K np.ndarray(nsk,d)
+ features of all source domains' samples
+ Ys : list of K np.ndarray(nsk,)
+ labels of all source domains' samples
+ Xt : np.ndarray (nt,d)
+ samples in the target domain
+ reg : float
+ Regularization term > 0
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshold on relative change in the barycenter (>0)
+ log : bool, optional
+ record log if True
+ verbose : bool, optional (default=False)
+ Controls the verbosity of the optimization algorithm
+
+ Returns
+ -------
+ gamma : List of K (nsk x nt) ndarrays
+ Optimal transportation matrices for the given parameters for each pair of source and target domains
+ h : (C,) ndarray
+ proportion estimation in the target domain
+ log : dict
+ log dictionary return only if log==True in parameters
+
+
+ References
+ ----------
+
+ .. [27] Ievgen Redko, Nicolas Courty, Rémi Flamary, Devis Tuia
+ "Optimal transport for multi-source domain adaptation under target shift",
+ International Conference on Artificial Intelligence and Statistics (AISTATS), 2019.
+
+ '''
+ nbclasses = len(np.unique(Ys[0]))
+ nbdomains = len(Xs)
+
+ # For each source domain, build cost matrices M, Gibbs kernels K and corresponding matrices D_1 and D_2
+ all_domains = []
+
+ # log dictionary
+ if log:
+ log = {'niter': 0, 'err': [], 'all_domains': []}
+
+ for d in range(nbdomains):
+ dom = {}
+ nsk = Xs[d].shape[0] # get number of elements for this domain
+ dom['nbelem'] = nsk
+ classes = np.unique(Ys[d]) # get number of classes for this domain
+
+ # format classes to start from 0 for convenience
+ if np.min(classes) != 0:
+ Ys[d] = Ys[d] - np.min(classes)
+ classes = np.unique(Ys[d])
+
+ # build the corresponding D_1 and D_2 matrices
+ D1 = np.zeros((nbclasses, nsk))
+ D2 = np.zeros((nbclasses, nsk))
+
+ for c in classes:
+ nbelemperclass = np.sum(Ys[d] == c)
+ if nbelemperclass != 0:
+ D1[int(c), Ys[d] == c] = 1.
+ D2[int(c), Ys[d] == c] = 1. / (nbelemperclass)
+ dom['D1'] = D1
+ dom['D2'] = D2
+
+ # build the cost matrix and the Gibbs kernel
+ M = dist(Xs[d], Xt, metric=metric)
+ M = M / np.median(M)
+ dom['M'] = M
+
+ K = np.empty(M.shape, dtype=M.dtype)
+ np.divide(M, -reg, out=K)
+ np.exp(K, out=K)
+ dom['K'] = K
+
+ all_domains.append(dom)
+
+ # uniform target distribution
+ a = unif(np.shape(Xt)[0])
+
+ cpt = 0 # iterations count
+ err = 1
+ old_bary = np.ones((nbclasses))
+
+ while (err > stopThr and cpt < numItermax):
+
+ bary = np.zeros((nbclasses))
+
+ # update coupling matrices for marginal constraints w.r.t. uniform target distribution
+ for d in range(nbdomains):
+ all_domains[d]['K'] = projC(all_domains[d]['K'], a)
+ other = np.sum(all_domains[d]['K'], axis=1)
+ bary = bary + np.log(np.dot(all_domains[d]['D1'], other)) / nbdomains
+
+ bary = np.exp(bary)
+
+ # update coupling matrices for marginal constraints w.r.t. unknown proportions based on [Prop 4., 27]
+ for d in range(nbdomains):
+ new = np.dot(all_domains[d]['D2'].T, bary)
+ all_domains[d]['K'] = projR(all_domains[d]['K'], new)
+
+ err = np.linalg.norm(bary - old_bary)
+ cpt = cpt + 1
+ old_bary = bary
+
+ if log:
+ log['err'].append(err)
+
+ if verbose:
+ if cpt % 200 == 0:
+ print('{:5s}|{:12s}'.format('It.', 'Err') + '\n' + '-' * 19)
+ print('{:5d}|{:8e}|'.format(cpt, err))
+
+ bary = bary / np.sum(bary)
+ couplings = [all_domains[d]['K'] for d in range(nbdomains)]
+
+ if log:
+ log['niter'] = cpt
+ log['all_domains'] = all_domains
+ return couplings, bary, log
+ else:
+ return couplings, bary
+
+
def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
numIterMax=10000, stopThr=1e-9, verbose=False,
log=False, **kwargs):
@@ -1593,7 +1752,8 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
return pi
-def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9,
+ verbose=False, log=False, **kwargs):
r'''
Solve the entropic regularization optimal transport problem from empirical
data and return the OT loss
@@ -1675,14 +1835,17 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', num
M = dist(X_s, X_t, metric=metric)
if log:
- sinkhorn_loss, log = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss, log = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log,
+ **kwargs)
return sinkhorn_loss, log
else:
- sinkhorn_loss = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log,
+ **kwargs)
return sinkhorn_loss
-def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9,
+ verbose=False, log=False, **kwargs):
r'''
Compute the sinkhorn divergence loss from empirical data
@@ -1768,11 +1931,14 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
.. [23] Aude Genevay, Gabriel Peyré, Marco Cuturi, Learning Generative Models with Sinkhorn Divergences, Proceedings of the Twenty-First International Conference on Artficial Intelligence and Statistics, (AISTATS) 21, 2018
'''
if log:
- sinkhorn_loss_ab, log_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss_ab, log_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax,
+ stopThr=1e-9, verbose=verbose, log=log, **kwargs)
- sinkhorn_loss_a, log_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss_a, log_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax,
+ stopThr=1e-9, verbose=verbose, log=log, **kwargs)
- sinkhorn_loss_b, log_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss_b, log_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax,
+ stopThr=1e-9, verbose=verbose, log=log, **kwargs)
sinkhorn_div = sinkhorn_loss_ab - 1 / 2 * (sinkhorn_loss_a + sinkhorn_loss_b)
@@ -1787,11 +1953,14 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
return max(0, sinkhorn_div), log
else:
- sinkhorn_loss_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9,
+ verbose=verbose, log=log, **kwargs)
- sinkhorn_loss_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9,
+ verbose=verbose, log=log, **kwargs)
- sinkhorn_loss_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+ sinkhorn_loss_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9,
+ verbose=verbose, log=log, **kwargs)
sinkhorn_div = sinkhorn_loss_ab - 1 / 2 * (sinkhorn_loss_a + sinkhorn_loss_b)
return max(0, sinkhorn_div)
@@ -1883,7 +2052,8 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, res
try:
import bottleneck
except ImportError:
- warnings.warn("Bottleneck module is not installed. Install it from https://pypi.org/project/Bottleneck/ for better performance.")
+ warnings.warn(
+ "Bottleneck module is not installed. Install it from https://pypi.org/project/Bottleneck/ for better performance.")
bottleneck = np
a = np.asarray(a, dtype=np.float64)
@@ -2017,10 +2187,11 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, res
# box constraints in L-BFGS-B (see Proposition 1 in [26])
bounds_u = [(max(a_I_min / ((nt - nt_budget) * epsilon + nt_budget * (b_J_max / (
- ns * epsilon * kappa * K_min))), epsilon / kappa), a_I_max / (nt * epsilon * K_min))] * ns_budget
+ ns * epsilon * kappa * K_min))), epsilon / kappa), a_I_max / (nt * epsilon * K_min))] * ns_budget
- bounds_v = [(max(b_J_min / ((ns - ns_budget) * epsilon + ns_budget * (kappa * a_I_max / (nt * epsilon * K_min))),
- epsilon * kappa), b_J_max / (ns * epsilon * K_min))] * nt_budget
+ bounds_v = [(
+ max(b_J_min / ((ns - ns_budget) * epsilon + ns_budget * (kappa * a_I_max / (nt * epsilon * K_min))),
+ epsilon * kappa), b_J_max / (ns * epsilon * K_min))] * nt_budget
# pre-calculated constants for the objective
vec_eps_IJc = epsilon * kappa * (K_IJc * np.ones(nt - nt_budget).reshape((1, -1))).sum(axis=1)
@@ -2069,7 +2240,8 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, res
return usc, vsc
def screened_obj(usc, vsc):
- part_IJ = np.dot(np.dot(usc, K_IJ), vsc) - kappa * np.dot(a_I, np.log(usc)) - (1. / kappa) * np.dot(b_J, np.log(vsc))
+ part_IJ = np.dot(np.dot(usc, K_IJ), vsc) - kappa * np.dot(a_I, np.log(usc)) - (1. / kappa) * np.dot(b_J,
+ np.log(vsc))
part_IJc = np.dot(usc, vec_eps_IJc)
part_IcJ = np.dot(vec_eps_IcJ, vsc)
psi_epsilon = part_IJ + part_IJc + part_IcJ
@@ -2091,9 +2263,9 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, res
g = np.hstack([g_u, g_v])
return f, g
- #----------------------------------------------------------------------------------------------------------------#
+ # ----------------------------------------------------------------------------------------------------------------#
# Step 2: L-BFGS-B solver #
- #----------------------------------------------------------------------------------------------------------------#
+ # ----------------------------------------------------------------------------------------------------------------#
u0, v0 = restricted_sinkhorn(u0, v0)
theta0 = np.hstack([u0, v0])
diff --git a/ot/da.py b/ot/da.py
index 108a38d..0fdd3be 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -7,15 +7,16 @@ Domain adaptation with optimal transport
# Nicolas Courty <ncourty@irisa.fr>
# Michael Perrot <michael.perrot@univ-st-etienne.fr>
# Nathalie Gayraud <nat.gayraud@gmail.com>
+# Ievgen Redko <ievgen.redko@univ-st-etienne.fr>
#
# License: MIT License
import numpy as np
import scipy.linalg as linalg
-from .bregman import sinkhorn
+from .bregman import sinkhorn, jcpot_barycenter
from .lp import emd
-from .utils import unif, dist, kernel, cost_normalization
+from .utils import unif, dist, kernel, cost_normalization, laplacian
from .utils import check_params, BaseEstimator
from .unbalanced import sinkhorn_unbalanced
from .optim import cg
@@ -127,7 +128,7 @@ def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
W = np.ones(M.shape)
for (i, c) in enumerate(classes):
majs = np.sum(transp[indices_labels[i]], axis=0)
- majs = p * ((majs + epsilon)**(p - 1))
+ majs = p * ((majs + epsilon) ** (p - 1))
W[indices_labels[i]] = majs
return transp
@@ -359,8 +360,8 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False,
def loss(L, G):
"""Compute full loss"""
- return np.sum((xs1.dot(L) - ns * G.dot(xt))**2) + mu * \
- np.sum(G * M) + eta * np.sum(sel(L - I0)**2)
+ return np.sum((xs1.dot(L) - ns * G.dot(xt)) ** 2) + mu * \
+ np.sum(G * M) + eta * np.sum(sel(L - I0) ** 2)
def solve_L(G):
""" solve L problem with fixed G (least square)"""
@@ -372,10 +373,11 @@ def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False,
xsi = xs1.dot(L)
def f(G):
- return np.sum((xsi - ns * G.dot(xt))**2)
+ return np.sum((xsi - ns * G.dot(xt)) ** 2)
def df(G):
return -2 * ns * (xsi - ns * G.dot(xt)).dot(xt.T)
+
G = cg(a, b, M, 1.0 / mu, f, df, G0=G0,
numItermax=numInnerItermax, stopThr=stopInnerThr)
return G
@@ -562,7 +564,7 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
def loss(L, G):
"""Compute full loss"""
- return np.sum((K1.dot(L) - ns * G.dot(xt))**2) + mu * \
+ return np.sum((K1.dot(L) - ns * G.dot(xt)) ** 2) + mu * \
np.sum(G * M) + eta * np.trace(L.T.dot(Kreg).dot(L))
def solve_L_nobias(G):
@@ -580,10 +582,11 @@ def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
xsi = K1.dot(L)
def f(G):
- return np.sum((xsi - ns * G.dot(xt))**2)
+ return np.sum((xsi - ns * G.dot(xt)) ** 2)
def df(G):
return -2 * ns * (xsi - ns * G.dot(xt)).dot(xt.T)
+
G = cg(a, b, M, 1.0 / mu, f, df, G0=G0,
numItermax=numInnerItermax, stopThr=stopInnerThr)
return G
@@ -745,6 +748,115 @@ def OT_mapping_linear(xs, xt, reg=1e-6, ws=None,
return A, b
+def emd_laplace(a, b, xs, xt, M, sim, eta, alpha,
+ numItermax, stopThr, numInnerItermax,
+ stopInnerThr, log=False, verbose=False, **kwargs):
+ r"""Solve the optimal transport problem (OT) with Laplacian regularization
+
+ .. math::
+ \gamma = arg\min_\gamma <\gamma,M>_F + eta\Omega_\alpha(\gamma)
+
+ s.t.\ \gamma 1 = a
+
+ \gamma^T 1= b
+
+ \gamma\geq 0
+
+ where:
+
+ - a and b are source and target weights (sum to 1)
+ - xs and xt are source and target samples
+ - M is the (ns,nt) metric cost matrix
+ - :math:`\Omega_\alpha` is the Laplacian regularization term
+ :math:`\Omega_\alpha = (1-\alpha)/n_s^2\sum_{i,j}S^s_{i,j}\|T(\mathbf{x}^s_i)-T(\mathbf{x}^s_j)\|^2+\alpha/n_t^2\sum_{i,j}S^t_{i,j}^'\|T(\mathbf{x}^t_i)-T(\mathbf{x}^t_j)\|^2`
+ with :math:`S^s_{i,j}, S^t_{i,j}` denoting source and target similarity matrices and :math:`T(\cdot)` being a barycentric mapping
+
+ The algorithm used for solving the problem is the conditional gradient algorithm as proposed in [5].
+
+ Parameters
+ ----------
+ a : np.ndarray (ns,)
+ samples weights in the source domain
+ b : np.ndarray (nt,)
+ samples weights in the target domain
+ xs : np.ndarray (ns,d)
+ samples in the source domain
+ xt : np.ndarray (nt,d)
+ samples in the target domain
+ M : np.ndarray (ns,nt)
+ loss matrix
+ eta : float
+ Regularization term for Laplacian regularization
+ alpha : float
+ Regularization term for source domain's importance in regularization
+ numItermax : int, optional
+ Max number of iterations
+ stopThr : float, optional
+ Stop threshold on error (inner emd solver) (>0)
+ numInnerItermax : int, optional
+ Max number of iterations (inner CG solver)
+ stopInnerThr : float, optional
+ Stop threshold on error (inner CG solver) (>0)
+ verbose : bool, optional
+ Print information along iterations
+ log : bool, optional
+ record log if True
+
+
+ Returns
+ -------
+ gamma : (ns x nt) ndarray
+ Optimal transportation matrix for the given parameters
+ log : dict
+ log dictionary return only if log==True in parameters
+
+
+ References
+ ----------
+
+ .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE
+ Transactions on Pattern Analysis and Machine Intelligence ,
+ vol.PP, no.99, pp.1-1
+
+ See Also
+ --------
+ ot.lp.emd : Unregularized OT
+ ot.optim.cg : General regularized OT
+
+ """
+ if sim == 'gauss':
+ if 'rbfparam' not in kwargs:
+ kwargs['rbfparam'] = 1 / (2 * (np.mean(dist(xs, xs, 'sqeuclidean')) ** 2))
+ sS = kernel(xs, xs, method=kwargs['sim'], sigma=kwargs['rbfparam'])
+ sT = kernel(xt, xt, method=kwargs['sim'], sigma=kwargs['rbfparam'])
+
+ elif sim == 'knn':
+ if 'nn' not in kwargs:
+ kwargs['nn'] = 5
+
+ from sklearn.neighbors import kneighbors_graph
+
+ sS = kneighbors_graph(xs, kwargs['nn']).toarray()
+ sS = (sS + sS.T) / 2
+ sT = kneighbors_graph(xt, kwargs['nn']).toarray()
+ sT = (sT + sT.T) / 2
+
+ lS = laplacian(sS)
+ lT = laplacian(sT)
+
+ def f(G):
+ return alpha * np.trace(np.dot(xt.T, np.dot(G.T, np.dot(lS, np.dot(G, xt))))) \
+ + (1 - alpha) * np.trace(np.dot(xs.T, np.dot(G, np.dot(lT, np.dot(G.T, xs)))))
+
+ def df(G):
+ return alpha * np.dot(lS + lS.T, np.dot(G, np.dot(xt, xt.T)))\
+ + (1 - alpha) * np.dot(xs, np.dot(xs.T, np.dot(G, lT + lT.T)))
+
+ return cg(a, b, M, reg=eta, f=f, df=df, G0=None, numItermax=numItermax, numItermaxEmd=numInnerItermax,
+ stopThr=stopThr, stopThr2=stopInnerThr, verbose=verbose, log=log)
+
+
def distribution_estimation_uniform(X):
"""estimates a uniform distribution from an array of samples X
@@ -921,7 +1033,6 @@ class BaseTransport(BaseEstimator):
transp_Xs = []
for bi in batch_ind:
-
# get the nearest neighbor in the source domain
D0 = dist(Xs[bi], self.xs_)
idx = np.argmin(D0, axis=1)
@@ -990,7 +1101,6 @@ class BaseTransport(BaseEstimator):
transp_Xt = []
for bi in batch_ind:
-
D0 = dist(Xt[bi], self.xt_)
idx = np.argmin(D0, axis=1)
@@ -1011,6 +1121,7 @@ class BaseTransport(BaseEstimator):
class LinearTransport(BaseTransport):
+
""" OT linear operator between empirical distributions
The function estimates the optimal linear operator that aligns the two
@@ -1055,7 +1166,6 @@ class LinearTransport(BaseTransport):
def __init__(self, reg=1e-8, bias=True, log=False,
distribution_estimation=distribution_estimation_uniform):
-
self.bias = bias
self.log = log
self.reg = reg
@@ -1136,7 +1246,6 @@ class LinearTransport(BaseTransport):
# check the necessary inputs parameters are here
if check_params(Xs=Xs):
-
transp_Xs = Xs.dot(self.A_) + self.B_
return transp_Xs
@@ -1170,7 +1279,6 @@ class LinearTransport(BaseTransport):
# check the necessary inputs parameters are here
if check_params(Xt=Xt):
-
transp_Xt = Xt.dot(self.A1_) + self.B1_
return transp_Xt
@@ -1231,7 +1339,6 @@ class SinkhornTransport(BaseTransport):
metric="sqeuclidean", norm=None,
distribution_estimation=distribution_estimation_uniform,
out_of_sample_map='ferradans', limit_max=np.infty):
-
self.reg_e = reg_e
self.max_iter = max_iter
self.tol = tol
@@ -1329,7 +1436,6 @@ class EMDTransport(BaseTransport):
distribution_estimation=distribution_estimation_uniform,
out_of_sample_map='ferradans', limit_max=10,
max_iter=100000):
-
self.metric = metric
self.norm = norm
self.log = log
@@ -1440,7 +1546,6 @@ class SinkhornLpl1Transport(BaseTransport):
metric="sqeuclidean", norm=None,
distribution_estimation=distribution_estimation_uniform,
out_of_sample_map='ferradans', limit_max=np.infty):
-
self.reg_e = reg_e
self.reg_cl = reg_cl
self.max_iter = max_iter
@@ -1481,7 +1586,6 @@ class SinkhornLpl1Transport(BaseTransport):
# check the necessary inputs parameters are here
if check_params(Xs=Xs, Xt=Xt, ys=ys):
-
super(SinkhornLpl1Transport, self).fit(Xs, ys, Xt, yt)
returned_ = sinkhorn_lpl1_mm(
@@ -1499,6 +1603,113 @@ class SinkhornLpl1Transport(BaseTransport):
return self
+class EMDLaplaceTransport(BaseTransport):
+
+ """Domain Adapatation OT method based on Earth Mover's Distance with Laplacian regularization
+
+ Parameters
+ ----------
+ reg_lap : float, optional (default=1)
+ Laplacian regularization parameter
+ reg_src : float, optional (default=0.5)
+ Source relative importance in regularization
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ norm : string, optional (default=None)
+ If given, normalize the ground metric to avoid numerical errors that
+ can occur with large metric values.
+ similarity : string, optional (default="knn")
+ The similarity to use either knn or gaussian
+ max_iter : int, optional (default=100)
+ Max number of BCD iterations
+ tol : float, optional (default=1e-5)
+ Stop threshold on relative loss decrease (>0)
+ max_inner_iter : int, optional (default=10)
+ Max number of iterations (inner CG solver)
+ inner_tol : float, optional (default=1e-6)
+ Stop threshold on error (inner CG solver) (>0)
+ log : int, optional (default=False)
+ Controls the logs of the optimization algorithm
+ distribution_estimation : callable, optional (defaults to the uniform)
+ The kind of distribution estimation to employ
+ out_of_sample_map : string, optional (default="ferradans")
+ The kind of out of sample mapping to apply to transport samples
+ from a domain into another one. Currently the only possible option is
+ "ferradans" which uses the method proposed in [6].
+
+ Attributes
+ ----------
+ coupling_ : array-like, shape (n_source_samples, n_target_samples)
+ The optimal coupling
+
+ References
+ ----------
+ .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE Transactions
+ on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
+ """
+
+ def __init__(self, reg_lap=1., reg_src=1., alpha=0.5,
+ metric="sqeuclidean", norm=None, similarity="knn", max_iter=100, tol=1e-9,
+ max_inner_iter=100000, inner_tol=1e-9, log=False, verbose=False,
+ distribution_estimation=distribution_estimation_uniform,
+ out_of_sample_map='ferradans'):
+ self.reg_lap = reg_lap
+ self.reg_src = reg_src
+ self.alpha = alpha
+ self.metric = metric
+ self.norm = norm
+ self.similarity = similarity
+ self.max_iter = max_iter
+ self.tol = tol
+ self.max_inner_iter = max_inner_iter
+ self.inner_tol = inner_tol
+ self.log = log
+ self.verbose = verbose
+ self.distribution_estimation = distribution_estimation
+ self.out_of_sample_map = out_of_sample_map
+
+ def fit(self, Xs, ys=None, Xt=None, yt=None):
+ """Build a coupling matrix from source and target sets of samples
+ (Xs, ys) and (Xt, yt)
+
+ Parameters
+ ----------
+ Xs : array-like, shape (n_source_samples, n_features)
+ The training input samples.
+ ys : array-like, shape (n_source_samples,)
+ The class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
+
+ Returns
+ -------
+ self : object
+ Returns self.
+ """
+
+ super(EMDLaplaceTransport, self).fit(Xs, ys, Xt, yt)
+
+ returned_ = emd_laplace(a=self.mu_s, b=self.mu_t, xs=self.xs_,
+ xt=self.xt_, M=self.cost_, sim=self.similarity, eta=self.reg_lap, alpha=self.reg_src,
+ numItermax=self.max_iter, stopThr=self.tol, numInnerItermax=self.max_inner_iter,
+ stopInnerThr=self.inner_tol, log=self.log, verbose=self.verbose)
+
+ # coupling estimation
+ if self.log:
+ self.coupling_, self.log_ = returned_
+ else:
+ self.coupling_ = returned_
+ self.log_ = dict()
+ return self
+
+
class SinkhornL1l2Transport(BaseTransport):
"""Domain Adapatation OT method based on sinkhorn algorithm +
@@ -1563,7 +1774,6 @@ class SinkhornL1l2Transport(BaseTransport):
metric="sqeuclidean", norm=None,
distribution_estimation=distribution_estimation_uniform,
out_of_sample_map='ferradans', limit_max=10):
-
self.reg_e = reg_e
self.reg_cl = reg_cl
self.max_iter = max_iter
@@ -1685,7 +1895,6 @@ class MappingTransport(BaseEstimator):
norm=None, kernel="linear", sigma=1, max_iter=100, tol=1e-5,
max_inner_iter=10, inner_tol=1e-6, log=False, verbose=False,
verbose2=False):
-
self.metric = metric
self.norm = norm
self.mu = mu
@@ -1856,7 +2065,6 @@ class UnbalancedSinkhornTransport(BaseTransport):
metric="sqeuclidean", norm=None,
distribution_estimation=distribution_estimation_uniform,
out_of_sample_map='ferradans', limit_max=10):
-
self.reg_e = reg_e
self.reg_m = reg_m
self.method = method
@@ -1914,3 +2122,180 @@ class UnbalancedSinkhornTransport(BaseTransport):
self.log_ = dict()
return self
+
+
+class JCPOTTransport(BaseTransport):
+
+ """Domain Adapatation OT method for multi-source target shift based on Wasserstein barycenter algorithm.
+
+ Parameters
+ ----------
+ reg_e : float, optional (default=1)
+ Entropic regularization parameter
+ max_iter : int, float, optional (default=10)
+ The minimum number of iteration before stopping the optimization
+ algorithm if no it has not converged
+ tol : float, optional (default=10e-9)
+ Stop threshold on error (inner sinkhorn solver) (>0)
+ verbose : bool, optional (default=False)
+ Controls the verbosity of the optimization algorithm
+ log : bool, optional (default=False)
+ Controls the logs of the optimization algorithm
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ norm : string, optional (default=None)
+ If given, normalize the ground metric to avoid numerical errors that
+ can occur with large metric values.
+ distribution_estimation : callable, optional (defaults to the uniform)
+ The kind of distribution estimation to employ
+ out_of_sample_map : string, optional (default="ferradans")
+ The kind of out of sample mapping to apply to transport samples
+ from a domain into another one. Currently the only possible option is
+ "ferradans" which uses the method proposed in [6].
+
+ Attributes
+ ----------
+ coupling_ : list of array-like objects, shape K x (n_source_samples, n_target_samples)
+ A set of optimal couplings between each source domain and the target domain
+ proportions_ : array-like, shape (n_classes,)
+ Estimated class proportions in the target domain
+ log_ : dictionary
+ The dictionary of log, empty dic if parameter log is not True
+
+ References
+ ----------
+
+ .. [1] Ievgen Redko, Nicolas Courty, Rémi Flamary, Devis Tuia
+ "Optimal transport for multi-source domain adaptation under target shift",
+ International Conference on Artificial Intelligence and Statistics (AISTATS),
+ vol. 89, p.849-858, 2019.
+
+ """
+
+ def __init__(self, reg_e=.1, max_iter=10,
+ tol=10e-9, verbose=False, log=False,
+ metric="sqeuclidean",
+ out_of_sample_map='ferradans'):
+ self.reg_e = reg_e
+ self.max_iter = max_iter
+ self.tol = tol
+ self.verbose = verbose
+ self.log = log
+ self.metric = metric
+ self.out_of_sample_map = out_of_sample_map
+
+ def fit(self, Xs, ys=None, Xt=None, yt=None):
+ """Building coupling matrices from a list of source and target sets of samples
+ (Xs, ys) and (Xt, yt)
+
+ Parameters
+ ----------
+ Xs : list of K array-like objects, shape K x (nk_source_samples, n_features)
+ A list of the training input samples.
+ ys : list of K array-like objects, shape K x (nk_source_samples,)
+ A list of the class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
+
+ Returns
+ -------
+ self : object
+ Returns self.
+ """
+
+ # check the necessary inputs parameters are here
+ if check_params(Xs=Xs, Xt=Xt, ys=ys):
+
+ self.xs_ = Xs
+ self.xt_ = Xt
+
+ returned_ = jcpot_barycenter(Xs=Xs, Ys=ys, Xt=Xt, reg=self.reg_e,
+ metric=self.metric, distrinumItermax=self.max_iter, stopThr=self.tol,
+ verbose=self.verbose, log=self.log)
+
+ # deal with the value of log
+ if self.log:
+ self.coupling_, self.proportions_, self.log_ = returned_
+ else:
+ self.coupling_, self.proportions_ = returned_
+ self.log_ = dict()
+
+ return self
+
+ def transform(self, Xs=None, ys=None, Xt=None, yt=None, batch_size=128):
+ """Transports source samples Xs onto target ones Xt
+
+ Parameters
+ ----------
+ Xs : array-like, shape (n_source_samples, n_features)
+ The training input samples.
+ ys : array-like, shape (n_source_samples,)
+ The class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_target_samples,)
+ The class labels. If some target samples are unlabeled, fill the
+ yt's elements with -1.
+
+ Warning: Note that, due to this convention -1 cannot be used as a
+ class label
+ batch_size : int, optional (default=128)
+ The batch size for out of sample inverse transform
+ """
+
+ transp_Xs = []
+
+ # check the necessary inputs parameters are here
+ if check_params(Xs=Xs):
+
+ if all([np.allclose(x, y) for x, y in zip(self.xs_, Xs)]):
+
+ # perform standard barycentric mapping for each source domain
+
+ for coupling in self.coupling_:
+ transp = coupling / np.sum(coupling, 1)[:, None]
+
+ # set nans to 0
+ transp[~ np.isfinite(transp)] = 0
+
+ # compute transported samples
+ transp_Xs.append(np.dot(transp, self.xt_))
+ else:
+
+ # perform out of sample mapping
+ indices = np.arange(Xs.shape[0])
+ batch_ind = [
+ indices[i:i + batch_size]
+ for i in range(0, len(indices), batch_size)]
+
+ transp_Xs = []
+
+ for bi in batch_ind:
+ transp_Xs_ = []
+
+ # get the nearest neighbor in the sources domains
+ xs = np.concatenate(self.xs_, axis=0)
+ idx = np.argmin(dist(Xs[bi], xs), axis=1)
+
+ # transport the source samples
+ for coupling in self.coupling_:
+ transp = coupling / np.sum(
+ coupling, 1)[:, None]
+ transp[~ np.isfinite(transp)] = 0
+ transp_Xs_.append(np.dot(transp, self.xt_))
+
+ transp_Xs_ = np.concatenate(transp_Xs_, axis=0)
+
+ # define the transported points
+ transp_Xs_ = transp_Xs_[idx, :] + Xs[bi] - xs[idx, :]
+ transp_Xs.append(transp_Xs_)
+
+ transp_Xs = np.concatenate(transp_Xs, axis=0)
+
+ return transp_Xs
diff --git a/ot/datasets.py b/ot/datasets.py
index ba0cfd9..a1ca7b6 100644
--- a/ot/datasets.py
+++ b/ot/datasets.py
@@ -30,7 +30,7 @@ def make_1D_gauss(n, m, s):
1D histogram for a gaussian distribution
"""
x = np.arange(n, dtype=np.float64)
- h = np.exp(-(x - m)**2 / (2 * s**2))
+ h = np.exp(-(x - m) ** 2 / (2 * s ** 2))
return h / h.sum()
@@ -80,7 +80,7 @@ def get_2D_samples_gauss(n, m, sigma, random_state=None):
return make_2D_samples_gauss(n, m, sigma, random_state=None)
-def make_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs):
+def make_data_classif(dataset, n, nz=.5, theta=0, p=.5, random_state=None, **kwargs):
"""Dataset generation for classification problems
Parameters
@@ -91,6 +91,8 @@ def make_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs):
number of training samples
nz : float
noise level (>0)
+ p : float
+ proportion of one class in the binary setting
random_state : int, RandomState instance or None, optional (default=None)
If int, random_state is the seed used by the random number generator;
If RandomState instance, random_state is the random number generator;
@@ -150,6 +152,17 @@ def make_data_classif(dataset, n, nz=.5, theta=0, random_state=None, **kwargs):
x = x.dot(rot)
+ elif dataset.lower() == '2gauss_prop':
+
+ y = np.concatenate((np.ones(int(p * n)), np.zeros(int((1 - p) * n))))
+ x = np.hstack((0 * y[:, None] - 0, 1 - 2 * y[:, None])) + nz * np.random.randn(len(y), 2)
+
+ if ('bias' not in kwargs) and ('b' not in kwargs):
+ kwargs['bias'] = np.array([0, 2])
+
+ x[:, 0] += kwargs['bias'][0]
+ x[:, 1] += kwargs['bias'][1]
+
else:
x = np.array(0)
y = np.array(0)
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index f4f6861..8d1baa0 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -2,8 +2,6 @@
"""
Solvers for the original linear program OT problem
-
-
"""
# Author: Remi Flamary <remi.flamary@unice.fr>
diff --git a/ot/plot.py b/ot/plot.py
index f403e98..ad436b4 100644
--- a/ot/plot.py
+++ b/ot/plot.py
@@ -78,9 +78,10 @@ def plot2D_samples_mat(xs, xt, G, thr=1e-8, **kwargs):
thr : float, optional
threshold above which the line is drawn
**kwargs : dict
- paameters given to the plot functions (default color is black if
+ parameters given to the plot functions (default color is black if
nothing given)
"""
+
if ('color' not in kwargs) and ('c' not in kwargs):
kwargs['color'] = 'k'
mx = G.max()
diff --git a/ot/utils.py b/ot/utils.py
index b71458b..a633be2 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -49,6 +49,12 @@ def kernel(x1, x2, method='gaussian', sigma=1, **kwargs):
return K
+def laplacian(x):
+ """Compute Laplacian matrix"""
+ L = np.diag(np.sum(x, axis=0)) - x
+ return L
+
+
def unif(n):
""" return a uniform histogram of length n (simplex)
diff --git a/test/test_da.py b/test/test_da.py
index 2a5e50e..372ebd4 100644
--- a/test/test_da.py
+++ b/test/test_da.py
@@ -5,7 +5,7 @@
# License: MIT License
import numpy as np
-from numpy.testing.utils import assert_allclose, assert_equal
+from numpy.testing import assert_allclose, assert_equal
import ot
from ot.datasets import make_data_classif
@@ -510,7 +510,6 @@ def test_mapping_transport_class():
def test_linear_mapping():
-
ns = 150
nt = 200
@@ -528,7 +527,6 @@ def test_linear_mapping():
def test_linear_mapping_class():
-
ns = 150
nt = 200
@@ -549,3 +547,111 @@ def test_linear_mapping_class():
Cst = np.cov(Xst.T)
np.testing.assert_allclose(Ct, Cst, rtol=1e-2, atol=1e-2)
+
+
+def test_jcpot_transport_class():
+ """test_jcpot_transport
+ """
+
+ ns1 = 150
+ ns2 = 150
+ nt = 200
+
+ Xs1, ys1 = make_data_classif('3gauss', ns1)
+ Xs2, ys2 = make_data_classif('3gauss', ns2)
+
+ Xt, yt = make_data_classif('3gauss2', nt)
+
+ Xs = [Xs1, Xs2]
+ ys = [ys1, ys2]
+
+ otda = ot.da.JCPOTTransport(reg_e=0.01, max_iter=1000, tol=1e-9, verbose=True, log=True)
+
+ # test its computed
+ otda.fit(Xs=Xs, ys=ys, Xt=Xt)
+
+ assert hasattr(otda, "coupling_")
+ assert hasattr(otda, "proportions_")
+ assert hasattr(otda, "log_")
+
+ # test dimensions of coupling
+ for i, xs in enumerate(Xs):
+ assert_equal(otda.coupling_[i].shape, ((xs.shape[0], Xt.shape[0])))
+
+ # test all margin constraints
+ mu_t = unif(nt)
+
+ for i in range(len(Xs)):
+ # test margin constraints w.r.t. uniform target weights for each coupling matrix
+ assert_allclose(
+ np.sum(otda.coupling_[i], axis=0), mu_t, rtol=1e-3, atol=1e-3)
+
+ # test margin constraints w.r.t. modified source weights for each source domain
+
+ assert_allclose(
+ np.dot(otda.log_['all_domains'][i]['D1'], np.sum(otda.coupling_[i], axis=1)), otda.proportions_, rtol=1e-3,
+ atol=1e-3)
+
+ # test transform
+ transp_Xs = otda.transform(Xs=Xs)
+ [assert_equal(x.shape, y.shape) for x, y in zip(transp_Xs, Xs)]
+
+ Xs_new, _ = make_data_classif('3gauss', ns1 + 1)
+ transp_Xs_new = otda.transform(Xs_new)
+
+ # check that the oos method is working
+ assert_equal(transp_Xs_new.shape, Xs_new.shape)
+
+
+def test_emd_laplace_class():
+ """test_emd_laplace_transport
+ """
+ ns = 150
+ nt = 200
+
+ Xs, ys = make_data_classif('3gauss', ns)
+ Xt, yt = make_data_classif('3gauss2', nt)
+
+ otda = ot.da.EMDLaplaceTransport(reg_lap=0.01, max_iter=1000, tol=1e-9, verbose=False, log=True)
+
+ # test its computed
+ otda.fit(Xs=Xs, ys=ys, Xt=Xt)
+
+ assert hasattr(otda, "coupling_")
+ assert hasattr(otda, "log_")
+
+ # test dimensions of coupling
+ assert_equal(otda.coupling_.shape, ((Xs.shape[0], Xt.shape[0])))
+
+ # test all margin constraints
+ mu_s = unif(ns)
+ mu_t = unif(nt)
+
+ assert_allclose(
+ np.sum(otda.coupling_, axis=0), mu_t, rtol=1e-3, atol=1e-3)
+ assert_allclose(
+ np.sum(otda.coupling_, axis=1), mu_s, rtol=1e-3, atol=1e-3)
+
+ # test transform
+ transp_Xs = otda.transform(Xs=Xs)
+ [assert_equal(x.shape, y.shape) for x, y in zip(transp_Xs, Xs)]
+
+ Xs_new, _ = make_data_classif('3gauss', ns + 1)
+ transp_Xs_new = otda.transform(Xs_new)
+
+ # check that the oos method is working
+ assert_equal(transp_Xs_new.shape, Xs_new.shape)
+
+ # test inverse transform
+ transp_Xt = otda.inverse_transform(Xt=Xt)
+ assert_equal(transp_Xt.shape, Xt.shape)
+
+ Xt_new, _ = make_data_classif('3gauss2', nt + 1)
+ transp_Xt_new = otda.inverse_transform(Xt=Xt_new)
+
+ # check that the oos method is working
+ assert_equal(transp_Xt_new.shape, Xt_new.shape)
+
+ # test fit_transform
+ transp_Xs = otda.fit_transform(Xs=Xs, Xt=Xt)
+ assert_equal(transp_Xs.shape, Xs.shape)