summaryrefslogtreecommitdiff
path: root/ot/da.py
diff options
context:
space:
mode:
Diffstat (limited to 'ot/da.py')
-rw-r--r--ot/da.py1506
1 files changed, 1206 insertions, 300 deletions
diff --git a/ot/da.py b/ot/da.py
index ddf1c60..564c7b7 100644
--- a/ot/da.py
+++ b/ot/da.py
@@ -3,22 +3,34 @@
Domain adaptation with optimal transport
"""
+# Author: Remi Flamary <remi.flamary@unice.fr>
+# Nicolas Courty <ncourty@irisa.fr>
+# Michael Perrot <michael.perrot@univ-st-etienne.fr>
+#
+# License: MIT License
+
import numpy as np
+
from .bregman import sinkhorn
from .lp import emd
-from .utils import unif,dist,kernel
+from .utils import unif, dist, kernel, cost_normalization
+from .utils import check_params, deprecated, BaseEstimator
from .optim import cg
from .optim import gcg
-def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerItermax = 200,stopInnerThr=1e-9,verbose=False,log=False):
+def sinkhorn_lpl1_mm(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
+ numInnerItermax=200, stopInnerThr=1e-9, verbose=False,
+ log=False):
"""
- Solve the entropic regularization optimal transport problem with nonconvex group lasso regularization
+ Solve the entropic regularization optimal transport problem with nonconvex
+ group lasso regularization
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ \eta \Omega_g(\gamma)
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)
+ + \eta \Omega_g(\gamma)
s.t. \gamma 1 = a
@@ -28,11 +40,16 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
where :
- M is the (ns,nt) metric cost matrix
- - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - :math:`\Omega_g` is the group lasso regulaization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1` where :math:`\mathcal{I}_c` are the index of samples from class c in the source domain.
+ - :math:`\Omega_e` is the entropic regularization term
+ :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\Omega_g` is the group lasso regulaization term
+ :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^{1/2}_1`
+ where :math:`\mathcal{I}_c` are the index of samples from class c
+ in the source domain.
- a and b are source and target weights (sum to 1)
- The algorithm used for solving the problem is the generalised conditional gradient as proposed in [5]_ [7]_
+ The algorithm used for solving the problem is the generalised conditional
+ gradient as proposed in [5]_ [7]_
Parameters
@@ -72,8 +89,13 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
References
----------
- .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
- .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567.
+ .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE
+ Transactions on Pattern Analysis and Machine Intelligence ,
+ vol.PP, no.99, pp.1-1
+ .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
+ Generalized conditional gradient: analysis of convergence
+ and applications. arXiv preprint arXiv:1510.06567.
See Also
--------
@@ -94,7 +116,7 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
W = np.zeros(M.shape)
for cpt in range(numItermax):
- Mreg = M + eta*W
+ Mreg = M + eta * W
transp = sinkhorn(a, b, Mreg, reg, numItermax=numInnerItermax,
stopThr=stopInnerThr)
# the transport has been computed. Check if classes are really
@@ -102,19 +124,24 @@ def sinkhorn_lpl1_mm(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
W = np.ones(M.shape)
for (i, c) in enumerate(classes):
majs = np.sum(transp[indices_labels[i]], axis=0)
- majs = p*((majs+epsilon)**(p-1))
+ majs = p * ((majs + epsilon)**(p - 1))
W[indices_labels[i]] = majs
return transp
-def sinkhorn_l1l2_gl(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerItermax = 200,stopInnerThr=1e-9,verbose=False,log=False):
+
+def sinkhorn_l1l2_gl(a, labels_a, b, M, reg, eta=0.1, numItermax=10,
+ numInnerItermax=200, stopInnerThr=1e-9, verbose=False,
+ log=False):
"""
- Solve the entropic regularization optimal transport problem with group lasso regularization
+ Solve the entropic regularization optimal transport problem with group
+ lasso regularization
The function solves the following optimization problem:
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+ \eta \Omega_g(\gamma)
+ \gamma = arg\min_\gamma <\gamma,M>_F + reg\cdot\Omega_e(\gamma)+
+ \eta \Omega_g(\gamma)
s.t. \gamma 1 = a
@@ -124,11 +151,16 @@ def sinkhorn_l1l2_gl(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
where :
- M is the (ns,nt) metric cost matrix
- - :math:`\Omega_e` is the entropic regularization term :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
- - :math:`\Omega_g` is the group lasso regulaization term :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^2` where :math:`\mathcal{I}_c` are the index of samples from class c in the source domain.
+ - :math:`\Omega_e` is the entropic regularization term
+ :math:`\Omega_e(\gamma)=\sum_{i,j} \gamma_{i,j}\log(\gamma_{i,j})`
+ - :math:`\Omega_g` is the group lasso regulaization term
+ :math:`\Omega_g(\gamma)=\sum_{i,c} \|\gamma_{i,\mathcal{I}_c}\|^2`
+ where :math:`\mathcal{I}_c` are the index of samples from class
+ c in the source domain.
- a and b are source and target weights (sum to 1)
- The algorithm used for solving the problem is the generalised conditional gradient as proposed in [5]_ [7]_
+ The algorithm used for solving the problem is the generalised conditional
+ gradient as proposed in [5]_ [7]_
Parameters
@@ -168,46 +200,54 @@ def sinkhorn_l1l2_gl(a,labels_a, b, M, reg, eta=0.1,numItermax = 10,numInnerIter
References
----------
- .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
- .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015). Generalized conditional gradient: analysis of convergence and applications. arXiv preprint arXiv:1510.06567.
+ .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE Transactions
+ on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
+ .. [7] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
+ Generalized conditional gradient: analysis of convergence and
+ applications. arXiv preprint arXiv:1510.06567.
See Also
--------
ot.optim.gcg : Generalized conditional gradient for OT problems
"""
- lstlab=np.unique(labels_a)
+ lstlab = np.unique(labels_a)
def f(G):
- res=0
+ res = 0
for i in range(G.shape[1]):
for lab in lstlab:
- temp=G[labels_a==lab,i]
- res+=np.linalg.norm(temp)
+ temp = G[labels_a == lab, i]
+ res += np.linalg.norm(temp)
return res
def df(G):
- W=np.zeros(G.shape)
+ W = np.zeros(G.shape)
for i in range(G.shape[1]):
for lab in lstlab:
- temp=G[labels_a==lab,i]
- n=np.linalg.norm(temp)
+ temp = G[labels_a == lab, i]
+ n = np.linalg.norm(temp)
if n:
- W[labels_a==lab,i]=temp/n
+ W[labels_a == lab, i] = temp / n
return W
-
- return gcg(a,b,M,reg,eta,f,df,G0=None,numItermax = numItermax,numInnerItermax=numInnerItermax, stopThr=stopInnerThr,verbose=verbose,log=log)
+ return gcg(a, b, M, reg, eta, f, df, G0=None, numItermax=numItermax,
+ numInnerItermax=numInnerItermax, stopThr=stopInnerThr,
+ verbose=verbose, log=log)
-
-def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbose2=False,numItermax = 100,numInnerItermax = 10,stopInnerThr=1e-6,stopThr=1e-5,log=False,**kwargs):
+def joint_OT_mapping_linear(xs, xt, mu=1, eta=0.001, bias=False, verbose=False,
+ verbose2=False, numItermax=100, numInnerItermax=10,
+ stopInnerThr=1e-6, stopThr=1e-5, log=False,
+ **kwargs):
"""Joint OT and linear mapping estimation as proposed in [8]
The function solves the following optimization problem:
.. math::
- \min_{\gamma,L}\quad \|L(X_s) -n_s\gamma X_t\|^2_F + \mu<\gamma,M>_F + \eta \|L -I\|^2_F
+ \min_{\gamma,L}\quad \|L(X_s) -n_s\gamma X_t\|^2_F +
+ \mu<\gamma,M>_F + \eta \|L -I\|^2_F
s.t. \gamma 1 = a
@@ -216,8 +256,10 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos
\gamma\geq 0
where :
- - M is the (ns,nt) squared euclidean cost matrix between samples in Xs and Xt (scaled by ns)
- - :math:`L` is a dxd linear operator that approximates the barycentric mapping
+ - M is the (ns,nt) squared euclidean cost matrix between samples in
+ Xs and Xt (scaled by ns)
+ - :math:`L` is a dxd linear operator that approximates the barycentric
+ mapping
- :math:`I` is the identity matrix (neutral linear mapping)
- a and b are uniform source and target weights
@@ -272,7 +314,9 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos
References
----------
- .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard, "Mapping estimation for discrete optimal transport", Neural Information Processing Systems (NIPS), 2016.
+ .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard,
+ "Mapping estimation for discrete optimal transport",
+ Neural Information Processing Systems (NIPS), 2016.
See Also
--------
@@ -281,103 +325,116 @@ def joint_OT_mapping_linear(xs,xt,mu=1,eta=0.001,bias=False,verbose=False,verbos
"""
- ns,nt,d=xs.shape[0],xt.shape[0],xt.shape[1]
+ ns, nt, d = xs.shape[0], xt.shape[0], xt.shape[1]
if bias:
- xs1=np.hstack((xs,np.ones((ns,1))))
- xstxs=xs1.T.dot(xs1)
- I=np.eye(d+1)
- I[-1]=0
- I0=I[:,:-1]
- sel=lambda x : x[:-1,:]
+ xs1 = np.hstack((xs, np.ones((ns, 1))))
+ xstxs = xs1.T.dot(xs1)
+ I = np.eye(d + 1)
+ I[-1] = 0
+ I0 = I[:, :-1]
+
+ def sel(x):
+ return x[:-1, :]
else:
- xs1=xs
- xstxs=xs1.T.dot(xs1)
- I=np.eye(d)
- I0=I
- sel=lambda x : x
+ xs1 = xs
+ xstxs = xs1.T.dot(xs1)
+ I = np.eye(d)
+ I0 = I
+
+ def sel(x):
+ return x
if log:
- log={'err':[]}
+ log = {'err': []}
- a,b=unif(ns),unif(nt)
- M=dist(xs,xt)*ns
- G=emd(a,b,M)
+ a, b = unif(ns), unif(nt)
+ M = dist(xs, xt) * ns
+ G = emd(a, b, M)
- vloss=[]
+ vloss = []
- def loss(L,G):
+ def loss(L, G):
"""Compute full loss"""
- return np.sum((xs1.dot(L)-ns*G.dot(xt))**2)+mu*np.sum(G*M)+eta*np.sum(sel(L-I0)**2)
+ return np.sum((xs1.dot(L) - ns * G.dot(xt))**2) + mu * np.sum(G * M) + eta * np.sum(sel(L - I0)**2)
def solve_L(G):
""" solve L problem with fixed G (least square)"""
- xst=ns*G.dot(xt)
- return np.linalg.solve(xstxs+eta*I,xs1.T.dot(xst)+eta*I0)
+ xst = ns * G.dot(xt)
+ return np.linalg.solve(xstxs + eta * I, xs1.T.dot(xst) + eta * I0)
- def solve_G(L,G0):
+ def solve_G(L, G0):
"""Update G with CG algorithm"""
- xsi=xs1.dot(L)
+ xsi = xs1.dot(L)
+
def f(G):
- return np.sum((xsi-ns*G.dot(xt))**2)
+ return np.sum((xsi - ns * G.dot(xt))**2)
+
def df(G):
- return -2*ns*(xsi-ns*G.dot(xt)).dot(xt.T)
- G=cg(a,b,M,1.0/mu,f,df,G0=G0,numItermax=numInnerItermax,stopThr=stopInnerThr)
+ return -2 * ns * (xsi - ns * G.dot(xt)).dot(xt.T)
+ G = cg(a, b, M, 1.0 / mu, f, df, G0=G0,
+ numItermax=numInnerItermax, stopThr=stopInnerThr)
return G
+ L = solve_L(G)
- L=solve_L(G)
-
- vloss.append(loss(L,G))
+ vloss.append(loss(L, G))
if verbose:
- print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32)
- print('{:5d}|{:8e}|{:8e}'.format(0,vloss[-1],0))
-
+ print('{:5s}|{:12s}|{:8s}'.format(
+ 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
+ print('{:5d}|{:8e}|{:8e}'.format(0, vloss[-1], 0))
# init loop
- if numItermax>0:
- loop=1
+ if numItermax > 0:
+ loop = 1
else:
- loop=0
- it=0
+ loop = 0
+ it = 0
while loop:
- it+=1
+ it += 1
# update G
- G=solve_G(L,G)
+ G = solve_G(L, G)
- #update L
- L=solve_L(G)
+ # update L
+ L = solve_L(G)
- vloss.append(loss(L,G))
+ vloss.append(loss(L, G))
- if it>=numItermax:
- loop=0
+ if it >= numItermax:
+ loop = 0
- if abs(vloss[-1]-vloss[-2])/abs(vloss[-2])<stopThr:
- loop=0
+ if abs(vloss[-1] - vloss[-2]) / abs(vloss[-2]) < stopThr:
+ loop = 0
if verbose:
- if it%20==0:
- print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32)
- print('{:5d}|{:8e}|{:8e}'.format(it,vloss[-1],(vloss[-1]-vloss[-2])/abs(vloss[-2])))
+ if it % 20 == 0:
+ print('{:5s}|{:12s}|{:8s}'.format(
+ 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
+ print('{:5d}|{:8e}|{:8e}'.format(
+ it, vloss[-1], (vloss[-1] - vloss[-2]) / abs(vloss[-2])))
if log:
- log['loss']=vloss
- return G,L,log
+ log['loss'] = vloss
+ return G, L, log
else:
- return G,L
+ return G, L
-def joint_OT_mapping_kernel(xs,xt,mu=1,eta=0.001,kerneltype='gaussian',sigma=1,bias=False,verbose=False,verbose2=False,numItermax = 100,numInnerItermax = 10,stopInnerThr=1e-6,stopThr=1e-5,log=False,**kwargs):
+def joint_OT_mapping_kernel(xs, xt, mu=1, eta=0.001, kerneltype='gaussian',
+ sigma=1, bias=False, verbose=False, verbose2=False,
+ numItermax=100, numInnerItermax=10,
+ stopInnerThr=1e-6, stopThr=1e-5, log=False,
+ **kwargs):
"""Joint OT and nonlinear mapping estimation with kernels as proposed in [8]
The function solves the following optimization problem:
.. math::
- \min_{\gamma,L\in\mathcal{H}}\quad \|L(X_s) -n_s\gamma X_t\|^2_F + \mu<\gamma,M>_F + \eta \|L\|^2_\mathcal{H}
+ \min_{\gamma,L\in\mathcal{H}}\quad \|L(X_s) -
+ n_s\gamma X_t\|^2_F + \mu<\gamma,M>_F + \eta \|L\|^2_\mathcal{H}
s.t. \gamma 1 = a
@@ -386,8 +443,10 @@ def joint_OT_mapping_kernel(xs,xt,mu=1,eta=0.001,kerneltype='gaussian',sigma=1,b
\gamma\geq 0
where :
- - M is the (ns,nt) squared euclidean cost matrix between samples in Xs and Xt (scaled by ns)
- - :math:`L` is a ns x d linear operator on a kernel matrix that approximates the barycentric mapping
+ - M is the (ns,nt) squared euclidean cost matrix between samples in
+ Xs and Xt (scaled by ns)
+ - :math:`L` is a ns x d linear operator on a kernel matrix that
+ approximates the barycentric mapping
- a and b are uniform source and target weights
The problem consist in solving jointly an optimal transport matrix
@@ -445,7 +504,9 @@ def joint_OT_mapping_kernel(xs,xt,mu=1,eta=0.001,kerneltype='gaussian',sigma=1,b
References
----------
- .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard, "Mapping estimation for discrete optimal transport", Neural Information Processing Systems (NIPS), 2016.
+ .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard,
+ "Mapping estimation for discrete optimal transport",
+ Neural Information Processing Systems (NIPS), 2016.
See Also
--------
@@ -454,161 +515,170 @@ def joint_OT_mapping_kernel(xs,xt,mu=1,eta=0.001,kerneltype='gaussian',sigma=1,b
"""
- ns,nt=xs.shape[0],xt.shape[0]
+ ns, nt = xs.shape[0], xt.shape[0]
- K=kernel(xs,xs,method=kerneltype,sigma=sigma)
+ K = kernel(xs, xs, method=kerneltype, sigma=sigma)
if bias:
- K1=np.hstack((K,np.ones((ns,1))))
- I=np.eye(ns+1)
- I[-1]=0
- Kp=np.eye(ns+1)
- Kp[:ns,:ns]=K
+ K1 = np.hstack((K, np.ones((ns, 1))))
+ I = np.eye(ns + 1)
+ I[-1] = 0
+ Kp = np.eye(ns + 1)
+ Kp[:ns, :ns] = K
# ls regu
- #K0 = K1.T.dot(K1)+eta*I
- #Kreg=I
+ # K0 = K1.T.dot(K1)+eta*I
+ # Kreg=I
# RKHS regul
- K0 = K1.T.dot(K1)+eta*Kp
- Kreg=Kp
+ K0 = K1.T.dot(K1) + eta * Kp
+ Kreg = Kp
else:
- K1=K
- I=np.eye(ns)
+ K1 = K
+ I = np.eye(ns)
# ls regul
- #K0 = K1.T.dot(K1)+eta*I
- #Kreg=I
+ # K0 = K1.T.dot(K1)+eta*I
+ # Kreg=I
# proper kernel ridge
- K0=K+eta*I
- Kreg=K
-
-
-
+ K0 = K + eta * I
+ Kreg = K
if log:
- log={'err':[]}
+ log = {'err': []}
- a,b=unif(ns),unif(nt)
- M=dist(xs,xt)*ns
- G=emd(a,b,M)
+ a, b = unif(ns), unif(nt)
+ M = dist(xs, xt) * ns
+ G = emd(a, b, M)
- vloss=[]
+ vloss = []
- def loss(L,G):
+ def loss(L, G):
"""Compute full loss"""
- return np.sum((K1.dot(L)-ns*G.dot(xt))**2)+mu*np.sum(G*M)+eta*np.trace(L.T.dot(Kreg).dot(L))
+ return np.sum((K1.dot(L) - ns * G.dot(xt))**2) + mu * np.sum(G * M) + eta * np.trace(L.T.dot(Kreg).dot(L))
def solve_L_nobias(G):
""" solve L problem with fixed G (least square)"""
- xst=ns*G.dot(xt)
- return np.linalg.solve(K0,xst)
+ xst = ns * G.dot(xt)
+ return np.linalg.solve(K0, xst)
def solve_L_bias(G):
""" solve L problem with fixed G (least square)"""
- xst=ns*G.dot(xt)
- return np.linalg.solve(K0,K1.T.dot(xst))
+ xst = ns * G.dot(xt)
+ return np.linalg.solve(K0, K1.T.dot(xst))
- def solve_G(L,G0):
+ def solve_G(L, G0):
"""Update G with CG algorithm"""
- xsi=K1.dot(L)
+ xsi = K1.dot(L)
+
def f(G):
- return np.sum((xsi-ns*G.dot(xt))**2)
+ return np.sum((xsi - ns * G.dot(xt))**2)
+
def df(G):
- return -2*ns*(xsi-ns*G.dot(xt)).dot(xt.T)
- G=cg(a,b,M,1.0/mu,f,df,G0=G0,numItermax=numInnerItermax,stopThr=stopInnerThr)
+ return -2 * ns * (xsi - ns * G.dot(xt)).dot(xt.T)
+ G = cg(a, b, M, 1.0 / mu, f, df, G0=G0,
+ numItermax=numInnerItermax, stopThr=stopInnerThr)
return G
if bias:
- solve_L=solve_L_bias
+ solve_L = solve_L_bias
else:
- solve_L=solve_L_nobias
+ solve_L = solve_L_nobias
- L=solve_L(G)
+ L = solve_L(G)
- vloss.append(loss(L,G))
+ vloss.append(loss(L, G))
if verbose:
- print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32)
- print('{:5d}|{:8e}|{:8e}'.format(0,vloss[-1],0))
-
+ print('{:5s}|{:12s}|{:8s}'.format(
+ 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
+ print('{:5d}|{:8e}|{:8e}'.format(0, vloss[-1], 0))
# init loop
- if numItermax>0:
- loop=1
+ if numItermax > 0:
+ loop = 1
else:
- loop=0
- it=0
+ loop = 0
+ it = 0
while loop:
- it+=1
+ it += 1
# update G
- G=solve_G(L,G)
+ G = solve_G(L, G)
- #update L
- L=solve_L(G)
+ # update L
+ L = solve_L(G)
- vloss.append(loss(L,G))
+ vloss.append(loss(L, G))
- if it>=numItermax:
- loop=0
+ if it >= numItermax:
+ loop = 0
- if abs(vloss[-1]-vloss[-2])/abs(vloss[-2])<stopThr:
- loop=0
+ if abs(vloss[-1] - vloss[-2]) / abs(vloss[-2]) < stopThr:
+ loop = 0
if verbose:
- if it%20==0:
- print('{:5s}|{:12s}|{:8s}'.format('It.','Loss','Delta loss')+'\n'+'-'*32)
- print('{:5d}|{:8e}|{:8e}'.format(it,vloss[-1],(vloss[-1]-vloss[-2])/abs(vloss[-2])))
+ if it % 20 == 0:
+ print('{:5s}|{:12s}|{:8s}'.format(
+ 'It.', 'Loss', 'Delta loss') + '\n' + '-' * 32)
+ print('{:5d}|{:8e}|{:8e}'.format(
+ it, vloss[-1], (vloss[-1] - vloss[-2]) / abs(vloss[-2])))
if log:
- log['loss']=vloss
- return G,L,log
+ log['loss'] = vloss
+ return G, L, log
else:
- return G,L
+ return G, L
+@deprecated("The class OTDA is deprecated in 0.3.1 and will be "
+ "removed in 0.5"
+ "\n\tfor standard transport use class EMDTransport instead.")
class OTDA(object):
+
"""Class for domain adaptation with optimal transport as proposed in [5]
References
----------
- .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy, "Optimal Transport for Domain Adaptation," in IEEE Transactions on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
+ .. [5] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE Transactions on
+ Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
"""
- def __init__(self,metric='sqeuclidean'):
+ def __init__(self, metric='sqeuclidean', norm=None):
""" Class initialization"""
- self.xs=0
- self.xt=0
- self.G=0
- self.metric=metric
- self.computed=False
-
-
- def fit(self,xs,xt,ws=None,wt=None,norm=None):
- """ Fit domain adaptation between samples is xs and xt (with optional weights)"""
- self.xs=xs
- self.xt=xt
+ self.xs = 0
+ self.xt = 0
+ self.G = 0
+ self.metric = metric
+ self.norm = norm
+ self.computed = False
+
+ def fit(self, xs, xt, ws=None, wt=None, max_iter=100000):
+ """Fit domain adaptation between samples is xs and xt
+ (with optional weights)"""
+ self.xs = xs
+ self.xt = xt
if wt is None:
- wt=unif(xt.shape[0])
+ wt = unif(xt.shape[0])
if ws is None:
- ws=unif(xs.shape[0])
+ ws = unif(xs.shape[0])
- self.ws=ws
- self.wt=wt
+ self.ws = ws
+ self.wt = wt
- self.M=dist(xs,xt,metric=self.metric)
- self.normalizeM(norm)
- self.G=emd(ws,wt,self.M)
- self.computed=True
+ self.M = dist(xs, xt, metric=self.metric)
+ self.M = cost_normalization(self.M, self.norm)
+ self.G = emd(ws, wt, self.M, max_iter)
+ self.computed = True
- def interp(self,direction=1):
+ def interp(self, direction=1):
"""Barycentric interpolation for the source (1) or target (-1) samples
This Barycentric interpolation solves for each source (resp target)
@@ -623,28 +693,28 @@ class OTDA(object):
metric could be used in the future.
"""
- if direction>0: # >0 then source to target
- G=self.G
- w=self.ws.reshape((self.xs.shape[0],1))
- x=self.xt
+ if direction > 0: # >0 then source to target
+ G = self.G
+ w = self.ws.reshape((self.xs.shape[0], 1))
+ x = self.xt
else:
- G=self.G.T
- w=self.wt.reshape((self.xt.shape[0],1))
- x=self.xs
+ G = self.G.T
+ w = self.wt.reshape((self.xt.shape[0], 1))
+ x = self.xs
if self.computed:
- if self.metric=='sqeuclidean':
- return np.dot(G/w,x) # weighted mean
+ if self.metric == 'sqeuclidean':
+ return np.dot(G / w, x) # weighted mean
else:
- print("Warning, metric not handled yet, using weighted average")
- return np.dot(G/w,x) # weighted mean
+ print(
+ "Warning, metric not handled yet, using weighted average")
+ return np.dot(G / w, x) # weighted mean
return None
else:
print("Warning, model not fitted yet, returning None")
return None
-
- def predict(self,x,direction=1):
+ def predict(self, x, direction=1):
""" Out of sample mapping using the formulation from [6]
For each sample x to map, it finds the nearest source sample xs and
@@ -654,183 +724,1019 @@ class OTDA(object):
References
----------
- .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014). Regularized discrete optimal transport. SIAM Journal on Imaging Sciences, 7(3), 1853-1882.
+ .. [6] Ferradans, S., Papadakis, N., Peyré, G., & Aujol, J. F. (2014).
+ Regularized discrete optimal transport. SIAM Journal on Imaging
+ Sciences, 7(3), 1853-1882.
"""
- if direction>0: # >0 then source to target
- xf=self.xt
- x0=self.xs
+ if direction > 0: # >0 then source to target
+ xf = self.xt
+ x0 = self.xs
else:
- xf=self.xs
- x0=self.xt
-
- D0=dist(x,x0) # dist netween new samples an source
- idx=np.argmin(D0,1) # closest one
- xf=self.interp(direction)# interp the source samples
- return xf[idx,:]+x-x0[idx,:] # aply the delta to the interpolation
-
- def normalizeM(self, norm):
- """ Apply normalization to the loss matrix
-
-
- Parameters
- ----------
- norm : str
- type of normalization from 'median','max','log','loglog'
-
- """
-
- if norm == "median":
- self.M /= float(np.median(self.M))
- elif norm == "max":
- self.M /= float(np.max(self.M))
- elif norm == "log":
- self.M = np.log(1 + self.M)
- elif norm == "loglog":
- self.M = np.log(1 + np.log(1 + self.M))
-
+ xf = self.xs
+ x0 = self.xt
+ D0 = dist(x, x0) # dist netween new samples an source
+ idx = np.argmin(D0, 1) # closest one
+ xf = self.interp(direction) # interp the source samples
+ # aply the delta to the interpolation
+ return xf[idx, :] + x - x0[idx, :]
+
+@deprecated("The class OTDA_sinkhorn is deprecated in 0.3.1 and will be"
+ " removed in 0.5 \nUse class SinkhornTransport instead.")
class OTDA_sinkhorn(OTDA):
- """Class for domain adaptation with optimal transport with entropic regularization"""
- def fit(self,xs,xt,reg=1,ws=None,wt=None,norm=None,**kwargs):
- """ Fit regularized domain adaptation between samples is xs and xt (with optional weights)"""
- self.xs=xs
- self.xt=xt
+ """Class for domain adaptation with optimal transport with entropic
+ regularization
+
+
+ """
+
+ def fit(self, xs, xt, reg=1, ws=None, wt=None, **kwargs):
+ """Fit regularized domain adaptation between samples is xs and xt
+ (with optional weights)"""
+ self.xs = xs
+ self.xt = xt
if wt is None:
- wt=unif(xt.shape[0])
+ wt = unif(xt.shape[0])
if ws is None:
- ws=unif(xs.shape[0])
+ ws = unif(xs.shape[0])
- self.ws=ws
- self.wt=wt
+ self.ws = ws
+ self.wt = wt
- self.M=dist(xs,xt,metric=self.metric)
- self.normalizeM(norm)
- self.G=sinkhorn(ws,wt,self.M,reg,**kwargs)
- self.computed=True
+ self.M = dist(xs, xt, metric=self.metric)
+ self.M = cost_normalization(self.M, self.norm)
+ self.G = sinkhorn(ws, wt, self.M, reg, **kwargs)
+ self.computed = True
+@deprecated("The class OTDA_lpl1 is deprecated in 0.3.1 and will be"
+ " removed in 0.5 \nUse class SinkhornLpl1Transport instead.")
class OTDA_lpl1(OTDA):
- """Class for domain adaptation with optimal transport with entropic and group regularization"""
+ """Class for domain adaptation with optimal transport with entropic and
+ group regularization"""
- def fit(self,xs,ys,xt,reg=1,eta=1,ws=None,wt=None,norm=None,**kwargs):
- """ Fit regularized domain adaptation between samples is xs and xt (with optional weights), See ot.da.sinkhorn_lpl1_mm for fit parameters"""
- self.xs=xs
- self.xt=xt
+ def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, **kwargs):
+ """Fit regularized domain adaptation between samples is xs and xt
+ (with optional weights), See ot.da.sinkhorn_lpl1_mm for fit
+ parameters"""
+ self.xs = xs
+ self.xt = xt
if wt is None:
- wt=unif(xt.shape[0])
+ wt = unif(xt.shape[0])
if ws is None:
- ws=unif(xs.shape[0])
+ ws = unif(xs.shape[0])
+
+ self.ws = ws
+ self.wt = wt
- self.ws=ws
- self.wt=wt
+ self.M = dist(xs, xt, metric=self.metric)
+ self.M = cost_normalization(self.M, self.norm)
+ self.G = sinkhorn_lpl1_mm(ws, ys, wt, self.M, reg, eta, **kwargs)
+ self.computed = True
- self.M=dist(xs,xt,metric=self.metric)
- self.normalizeM(norm)
- self.G=sinkhorn_lpl1_mm(ws,ys,wt,self.M,reg,eta,**kwargs)
- self.computed=True
+@deprecated("The class OTDA_l1L2 is deprecated in 0.3.1 and will be"
+ " removed in 0.5 \nUse class SinkhornL1l2Transport instead.")
class OTDA_l1l2(OTDA):
- """Class for domain adaptation with optimal transport with entropic and group lasso regularization"""
+ """Class for domain adaptation with optimal transport with entropic
+ and group lasso regularization"""
- def fit(self,xs,ys,xt,reg=1,eta=1,ws=None,wt=None,norm=None,**kwargs):
- """ Fit regularized domain adaptation between samples is xs and xt (with optional weights), See ot.da.sinkhorn_lpl1_gl for fit parameters"""
- self.xs=xs
- self.xt=xt
+ def fit(self, xs, ys, xt, reg=1, eta=1, ws=None, wt=None, **kwargs):
+ """Fit regularized domain adaptation between samples is xs and xt
+ (with optional weights), See ot.da.sinkhorn_lpl1_gl for fit
+ parameters"""
+ self.xs = xs
+ self.xt = xt
if wt is None:
- wt=unif(xt.shape[0])
+ wt = unif(xt.shape[0])
if ws is None:
- ws=unif(xs.shape[0])
+ ws = unif(xs.shape[0])
+
+ self.ws = ws
+ self.wt = wt
- self.ws=ws
- self.wt=wt
+ self.M = dist(xs, xt, metric=self.metric)
+ self.M = cost_normalization(self.M, self.norm)
+ self.G = sinkhorn_l1l2_gl(ws, ys, wt, self.M, reg, eta, **kwargs)
+ self.computed = True
- self.M=dist(xs,xt,metric=self.metric)
- self.normalizeM(norm)
- self.G=sinkhorn_l1l2_gl(ws,ys,wt,self.M,reg,eta,**kwargs)
- self.computed=True
+@deprecated("The class OTDA_mapping_linear is deprecated in 0.3.1 and will be"
+ " removed in 0.5 \nUse class MappingTransport instead.")
class OTDA_mapping_linear(OTDA):
- """Class for optimal transport with joint linear mapping estimation as in [8]"""
+ """Class for optimal transport with joint linear mapping estimation as in
+ [8]
+ """
def __init__(self):
""" Class initialization"""
+ self.xs = 0
+ self.xt = 0
+ self.G = 0
+ self.L = 0
+ self.bias = False
+ self.computed = False
+ self.metric = 'sqeuclidean'
- self.xs=0
- self.xt=0
- self.G=0
- self.L=0
- self.bias=False
- self.computed=False
- self.metric='sqeuclidean'
-
- def fit(self,xs,xt,mu=1,eta=1,bias=False,**kwargs):
+ def fit(self, xs, xt, mu=1, eta=1, bias=False, **kwargs):
""" Fit domain adaptation between samples is xs and xt (with optional
weights)"""
- self.xs=xs
- self.xt=xt
- self.bias=bias
+ self.xs = xs
+ self.xt = xt
+ self.bias = bias
+ self.ws = unif(xs.shape[0])
+ self.wt = unif(xt.shape[0])
- self.ws=unif(xs.shape[0])
- self.wt=unif(xt.shape[0])
-
- self.G,self.L=joint_OT_mapping_linear(xs,xt,mu=mu,eta=eta,bias=bias,**kwargs)
- self.computed=True
+ self.G, self.L = joint_OT_mapping_linear(
+ xs, xt, mu=mu, eta=eta, bias=bias, **kwargs)
+ self.computed = True
def mapping(self):
return lambda x: self.predict(x)
-
- def predict(self,x):
+ def predict(self, x):
""" Out of sample mapping estimated during the call to fit"""
if self.computed:
if self.bias:
- x=np.hstack((x,np.ones((x.shape[0],1))))
- return x.dot(self.L) # aply the delta to the interpolation
+ x = np.hstack((x, np.ones((x.shape[0], 1))))
+ return x.dot(self.L) # aply the delta to the interpolation
else:
print("Warning, model not fitted yet, returning None")
return None
-class OTDA_mapping_kernel(OTDA_mapping_linear):
- """Class for optimal transport with joint nonlinear mapping estimation as in [8]"""
+@deprecated("The class OTDA_mapping_kernel is deprecated in 0.3.1 and will be"
+ " removed in 0.5 \nUse class MappingTransport instead.")
+class OTDA_mapping_kernel(OTDA_mapping_linear):
+ """Class for optimal transport with joint nonlinear mapping
+ estimation as in [8]"""
- def fit(self,xs,xt,mu=1,eta=1,bias=False,kerneltype='gaussian',sigma=1,**kwargs):
+ def fit(self, xs, xt, mu=1, eta=1, bias=False, kerneltype='gaussian',
+ sigma=1, **kwargs):
""" Fit domain adaptation between samples is xs and xt """
- self.xs=xs
- self.xt=xt
- self.bias=bias
-
- self.ws=unif(xs.shape[0])
- self.wt=unif(xt.shape[0])
- self.kernel=kerneltype
- self.sigma=sigma
- self.kwargs=kwargs
-
+ self.xs = xs
+ self.xt = xt
+ self.bias = bias
- self.G,self.L=joint_OT_mapping_kernel(xs,xt,mu=mu,eta=eta,bias=bias,**kwargs)
- self.computed=True
+ self.ws = unif(xs.shape[0])
+ self.wt = unif(xt.shape[0])
+ self.kernel = kerneltype
+ self.sigma = sigma
+ self.kwargs = kwargs
+ self.G, self.L = joint_OT_mapping_kernel(
+ xs, xt, mu=mu, eta=eta, bias=bias, **kwargs)
+ self.computed = True
- def predict(self,x):
+ def predict(self, x):
""" Out of sample mapping estimated during the call to fit"""
if self.computed:
- K=kernel(x,self.xs,method=self.kernel,sigma=self.sigma,**self.kwargs)
+ K = kernel(
+ x, self.xs, method=self.kernel, sigma=self.sigma,
+ **self.kwargs)
if self.bias:
- K=np.hstack((K,np.ones((x.shape[0],1))))
+ K = np.hstack((K, np.ones((x.shape[0], 1))))
return K.dot(self.L)
else:
print("Warning, model not fitted yet, returning None")
- return None \ No newline at end of file
+ return None
+
+
+def distribution_estimation_uniform(X):
+ """estimates a uniform distribution from an array of samples X
+
+ Parameters
+ ----------
+ X : array-like, shape (n_samples, n_features)
+ The array of samples
+
+ Returns
+ -------
+ mu : array-like, shape (n_samples,)
+ The uniform distribution estimated from X
+ """
+
+ return unif(X.shape[0])
+
+
+class BaseTransport(BaseEstimator):
+ """Base class for OTDA objects
+
+ Notes
+ -----
+ All estimators should specify all the parameters that can be set
+ at the class level in their ``__init__`` as explicit keyword
+ arguments (no ``*args`` or ``**kwargs``).
+
+ fit method should:
+ - estimate a cost matrix and store it in a `cost_` attribute
+ - estimate a coupling matrix and store it in a `coupling_`
+ attribute
+ - estimate distributions from source and target data and store them in
+ mu_s and mu_t attributes
+ - store Xs and Xt in attributes to be used later on in transform and
+ inverse_transform methods
+
+ transform method should always get as input a Xs parameter
+ inverse_transform method should always get as input a Xt parameter
+ """
+
+ def fit(self, Xs=None, ys=None, Xt=None, yt=None):
+ """Build a coupling matrix from source and target sets of samples
+ (Xs, ys) and (Xt, yt)
+
+ Parameters
+ ----------
+ Xs : array-like, shape (n_source_samples, n_features)
+ The training input samples.
+ ys : array-like, shape (n_source_samples,)
+ The class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_labeled_target_samples,)
+ The class labels
+
+ Returns
+ -------
+ self : object
+ Returns self.
+ """
+
+ # check the necessary inputs parameters are here
+ if check_params(Xs=Xs, Xt=Xt):
+
+ # pairwise distance
+ self.cost_ = dist(Xs, Xt, metric=self.metric)
+ self.cost_ = cost_normalization(self.cost_, self.norm)
+
+ if (ys is not None) and (yt is not None):
+
+ if self.limit_max != np.infty:
+ self.limit_max = self.limit_max * np.max(self.cost_)
+
+ # assumes labeled source samples occupy the first rows
+ # and labeled target samples occupy the first columns
+ classes = np.unique(ys)
+ for c in classes:
+ idx_s = np.where((ys != c) & (ys != -1))
+ idx_t = np.where(yt == c)
+
+ # all the coefficients corresponding to a source sample
+ # and a target sample :
+ # with different labels get a infinite
+ for j in idx_t[0]:
+ self.cost_[idx_s[0], j] = self.limit_max
+
+ # distribution estimation
+ self.mu_s = self.distribution_estimation(Xs)
+ self.mu_t = self.distribution_estimation(Xt)
+
+ # store arrays of samples
+ self.xs_ = Xs
+ self.xt_ = Xt
+
+ return self
+
+ def fit_transform(self, Xs=None, ys=None, Xt=None, yt=None):
+ """Build a coupling matrix from source and target sets of samples
+ (Xs, ys) and (Xt, yt) and transports source samples Xs onto target
+ ones Xt
+
+ Parameters
+ ----------
+ Xs : array-like, shape (n_source_samples, n_features)
+ The training input samples.
+ ys : array-like, shape (n_source_samples,)
+ The class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_labeled_target_samples,)
+ The class labels
+
+ Returns
+ -------
+ transp_Xs : array-like, shape (n_source_samples, n_features)
+ The source samples samples.
+ """
+
+ return self.fit(Xs, ys, Xt, yt).transform(Xs, ys, Xt, yt)
+
+ def transform(self, Xs=None, ys=None, Xt=None, yt=None, batch_size=128):
+ """Transports source samples Xs onto target ones Xt
+
+ Parameters
+ ----------
+ Xs : array-like, shape (n_source_samples, n_features)
+ The training input samples.
+ ys : array-like, shape (n_source_samples,)
+ The class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_labeled_target_samples,)
+ The class labels
+ batch_size : int, optional (default=128)
+ The batch size for out of sample inverse transform
+
+ Returns
+ -------
+ transp_Xs : array-like, shape (n_source_samples, n_features)
+ The transport source samples.
+ """
+
+ # check the necessary inputs parameters are here
+ if check_params(Xs=Xs):
+
+ if np.array_equal(self.xs_, Xs):
+
+ # perform standard barycentric mapping
+ transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None]
+
+ # set nans to 0
+ transp[~ np.isfinite(transp)] = 0
+
+ # compute transported samples
+ transp_Xs = np.dot(transp, self.xt_)
+ else:
+ # perform out of sample mapping
+ indices = np.arange(Xs.shape[0])
+ batch_ind = [
+ indices[i:i + batch_size]
+ for i in range(0, len(indices), batch_size)]
+
+ transp_Xs = []
+ for bi in batch_ind:
+
+ # get the nearest neighbor in the source domain
+ D0 = dist(Xs[bi], self.xs_)
+ idx = np.argmin(D0, axis=1)
+
+ # transport the source samples
+ transp = self.coupling_ / np.sum(
+ self.coupling_, 1)[:, None]
+ transp[~ np.isfinite(transp)] = 0
+ transp_Xs_ = np.dot(transp, self.xt_)
+
+ # define the transported points
+ transp_Xs_ = transp_Xs_[idx, :] + Xs[bi] - self.xs_[idx, :]
+
+ transp_Xs.append(transp_Xs_)
+
+ transp_Xs = np.concatenate(transp_Xs, axis=0)
+
+ return transp_Xs
+
+ def inverse_transform(self, Xs=None, ys=None, Xt=None, yt=None,
+ batch_size=128):
+ """Transports target samples Xt onto target samples Xs
+
+ Parameters
+ ----------
+ Xs : array-like, shape (n_source_samples, n_features)
+ The training input samples.
+ ys : array-like, shape (n_source_samples,)
+ The class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_labeled_target_samples,)
+ The class labels
+ batch_size : int, optional (default=128)
+ The batch size for out of sample inverse transform
+
+ Returns
+ -------
+ transp_Xt : array-like, shape (n_source_samples, n_features)
+ The transported target samples.
+ """
+
+ # check the necessary inputs parameters are here
+ if check_params(Xt=Xt):
+
+ if np.array_equal(self.xt_, Xt):
+
+ # perform standard barycentric mapping
+ transp_ = self.coupling_.T / np.sum(self.coupling_, 0)[:, None]
+
+ # set nans to 0
+ transp_[~ np.isfinite(transp_)] = 0
+
+ # compute transported samples
+ transp_Xt = np.dot(transp_, self.xs_)
+ else:
+ # perform out of sample mapping
+ indices = np.arange(Xt.shape[0])
+ batch_ind = [
+ indices[i:i + batch_size]
+ for i in range(0, len(indices), batch_size)]
+
+ transp_Xt = []
+ for bi in batch_ind:
+
+ D0 = dist(Xt[bi], self.xt_)
+ idx = np.argmin(D0, axis=1)
+
+ # transport the target samples
+ transp_ = self.coupling_.T / np.sum(
+ self.coupling_, 0)[:, None]
+ transp_[~ np.isfinite(transp_)] = 0
+ transp_Xt_ = np.dot(transp_, self.xs_)
+
+ # define the transported points
+ transp_Xt_ = transp_Xt_[idx, :] + Xt[bi] - self.xt_[idx, :]
+
+ transp_Xt.append(transp_Xt_)
+
+ transp_Xt = np.concatenate(transp_Xt, axis=0)
+
+ return transp_Xt
+
+
+class SinkhornTransport(BaseTransport):
+ """Domain Adapatation OT method based on Sinkhorn Algorithm
+
+ Parameters
+ ----------
+ reg_e : float, optional (default=1)
+ Entropic regularization parameter
+ max_iter : int, float, optional (default=1000)
+ The minimum number of iteration before stopping the optimization
+ algorithm if no it has not converged
+ tol : float, optional (default=10e-9)
+ The precision required to stop the optimization algorithm.
+ mapping : string, optional (default="barycentric")
+ The kind of mapping to apply to transport samples from a domain into
+ another one.
+ if "barycentric" only the samples used to estimate the coupling can
+ be transported from a domain to another one.
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ norm : string, optional (default=None)
+ If given, normalize the ground metric to avoid numerical errors that
+ can occur with large metric values.
+ distribution : string, optional (default="uniform")
+ The kind of distribution estimation to employ
+ verbose : int, optional (default=0)
+ Controls the verbosity of the optimization algorithm
+ log : int, optional (default=0)
+ Controls the logs of the optimization algorithm
+ limit_max: float, optional (defaul=np.infty)
+ Controls the semi supervised mode. Transport between labeled source
+ and target samples of different classes will exhibit an infinite cost
+
+ Attributes
+ ----------
+ coupling_ : array-like, shape (n_source_samples, n_target_samples)
+ The optimal coupling
+ log_ : dictionary
+ The dictionary of log, empty dic if parameter log is not True
+
+ References
+ ----------
+ .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE Transactions
+ on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
+ .. [2] M. Cuturi, Sinkhorn Distances : Lightspeed Computation of Optimal
+ Transport, Advances in Neural Information Processing Systems (NIPS)
+ 26, 2013
+ """
+
+ def __init__(self, reg_e=1., max_iter=1000,
+ tol=10e-9, verbose=False, log=False,
+ metric="sqeuclidean", norm=None,
+ distribution_estimation=distribution_estimation_uniform,
+ out_of_sample_map='ferradans', limit_max=np.infty):
+
+ self.reg_e = reg_e
+ self.max_iter = max_iter
+ self.tol = tol
+ self.verbose = verbose
+ self.log = log
+ self.metric = metric
+ self.norm = norm
+ self.limit_max = limit_max
+ self.distribution_estimation = distribution_estimation
+ self.out_of_sample_map = out_of_sample_map
+
+ def fit(self, Xs=None, ys=None, Xt=None, yt=None):
+ """Build a coupling matrix from source and target sets of samples
+ (Xs, ys) and (Xt, yt)
+
+ Parameters
+ ----------
+ Xs : array-like, shape (n_source_samples, n_features)
+ The training input samples.
+ ys : array-like, shape (n_source_samples,)
+ The class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_labeled_target_samples,)
+ The class labels
+
+ Returns
+ -------
+ self : object
+ Returns self.
+ """
+
+ super(SinkhornTransport, self).fit(Xs, ys, Xt, yt)
+
+ # coupling estimation
+ returned_ = sinkhorn(
+ a=self.mu_s, b=self.mu_t, M=self.cost_, reg=self.reg_e,
+ numItermax=self.max_iter, stopThr=self.tol,
+ verbose=self.verbose, log=self.log)
+
+ # deal with the value of log
+ if self.log:
+ self.coupling_, self.log_ = returned_
+ else:
+ self.coupling_ = returned_
+ self.log_ = dict()
+
+ return self
+
+
+class EMDTransport(BaseTransport):
+ """Domain Adapatation OT method based on Earth Mover's Distance
+
+ Parameters
+ ----------
+ mapping : string, optional (default="barycentric")
+ The kind of mapping to apply to transport samples from a domain into
+ another one.
+ if "barycentric" only the samples used to estimate the coupling can
+ be transported from a domain to another one.
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ norm : string, optional (default=None)
+ If given, normalize the ground metric to avoid numerical errors that
+ can occur with large metric values.
+ distribution : string, optional (default="uniform")
+ The kind of distribution estimation to employ
+ verbose : int, optional (default=0)
+ Controls the verbosity of the optimization algorithm
+ log : int, optional (default=0)
+ Controls the logs of the optimization algorithm
+ limit_max: float, optional (default=10)
+ Controls the semi supervised mode. Transport between labeled source
+ and target samples of different classes will exhibit an infinite cost
+ (10 times the maximum value of the cost matrix)
+ max_iter : int, optional (default=100000)
+ The maximum number of iterations before stopping the optimization
+ algorithm if it has not converged.
+
+ Attributes
+ ----------
+ coupling_ : array-like, shape (n_source_samples, n_target_samples)
+ The optimal coupling
+
+ References
+ ----------
+ .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE Transactions
+ on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1
+ """
+
+ def __init__(self, metric="sqeuclidean", norm=None,
+ distribution_estimation=distribution_estimation_uniform,
+ out_of_sample_map='ferradans', limit_max=10,
+ max_iter=100000):
+
+ self.metric = metric
+ self.norm = norm
+ self.limit_max = limit_max
+ self.distribution_estimation = distribution_estimation
+ self.out_of_sample_map = out_of_sample_map
+ self.max_iter = max_iter
+
+ def fit(self, Xs, ys=None, Xt=None, yt=None):
+ """Build a coupling matrix from source and target sets of samples
+ (Xs, ys) and (Xt, yt)
+
+ Parameters
+ ----------
+ Xs : array-like, shape (n_source_samples, n_features)
+ The training input samples.
+ ys : array-like, shape (n_source_samples,)
+ The class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_labeled_target_samples,)
+ The class labels
+
+ Returns
+ -------
+ self : object
+ Returns self.
+ """
+
+ super(EMDTransport, self).fit(Xs, ys, Xt, yt)
+
+ # coupling estimation
+ self.coupling_ = emd(
+ a=self.mu_s, b=self.mu_t, M=self.cost_, numItermax=self.max_iter
+ )
+
+ return self
+
+
+class SinkhornLpl1Transport(BaseTransport):
+ """Domain Adapatation OT method based on sinkhorn algorithm +
+ LpL1 class regularization.
+
+ Parameters
+ ----------
+ reg_e : float, optional (default=1)
+ Entropic regularization parameter
+ reg_cl : float, optional (default=0.1)
+ Class regularization parameter
+ mapping : string, optional (default="barycentric")
+ The kind of mapping to apply to transport samples from a domain into
+ another one.
+ if "barycentric" only the samples used to estimate the coupling can
+ be transported from a domain to another one.
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ norm : string, optional (default=None)
+ If given, normalize the ground metric to avoid numerical errors that
+ can occur with large metric values.
+ distribution : string, optional (default="uniform")
+ The kind of distribution estimation to employ
+ max_iter : int, float, optional (default=10)
+ The minimum number of iteration before stopping the optimization
+ algorithm if no it has not converged
+ max_inner_iter : int, float, optional (default=200)
+ The number of iteration in the inner loop
+ verbose : int, optional (default=0)
+ Controls the verbosity of the optimization algorithm
+ limit_max: float, optional (defaul=np.infty)
+ Controls the semi supervised mode. Transport between labeled source
+ and target samples of different classes will exhibit an infinite cost
+
+ Attributes
+ ----------
+ coupling_ : array-like, shape (n_source_samples, n_target_samples)
+ The optimal coupling
+
+ References
+ ----------
+
+ .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE
+ Transactions on Pattern Analysis and Machine Intelligence ,
+ vol.PP, no.99, pp.1-1
+ .. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
+ Generalized conditional gradient: analysis of convergence
+ and applications. arXiv preprint arXiv:1510.06567.
+
+ """
+
+ def __init__(self, reg_e=1., reg_cl=0.1,
+ max_iter=10, max_inner_iter=200,
+ tol=10e-9, verbose=False,
+ metric="sqeuclidean", norm=None,
+ distribution_estimation=distribution_estimation_uniform,
+ out_of_sample_map='ferradans', limit_max=np.infty):
+
+ self.reg_e = reg_e
+ self.reg_cl = reg_cl
+ self.max_iter = max_iter
+ self.max_inner_iter = max_inner_iter
+ self.tol = tol
+ self.verbose = verbose
+ self.metric = metric
+ self.norm = norm
+ self.distribution_estimation = distribution_estimation
+ self.out_of_sample_map = out_of_sample_map
+ self.limit_max = limit_max
+
+ def fit(self, Xs, ys=None, Xt=None, yt=None):
+ """Build a coupling matrix from source and target sets of samples
+ (Xs, ys) and (Xt, yt)
+
+ Parameters
+ ----------
+ Xs : array-like, shape (n_source_samples, n_features)
+ The training input samples.
+ ys : array-like, shape (n_source_samples,)
+ The class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_labeled_target_samples,)
+ The class labels
+
+ Returns
+ -------
+ self : object
+ Returns self.
+ """
+
+ # check the necessary inputs parameters are here
+ if check_params(Xs=Xs, Xt=Xt, ys=ys):
+
+ super(SinkhornLpl1Transport, self).fit(Xs, ys, Xt, yt)
+
+ self.coupling_ = sinkhorn_lpl1_mm(
+ a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.cost_,
+ reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter,
+ numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol,
+ verbose=self.verbose)
+
+ return self
+
+
+class SinkhornL1l2Transport(BaseTransport):
+ """Domain Adapatation OT method based on sinkhorn algorithm +
+ l1l2 class regularization.
+
+ Parameters
+ ----------
+ reg_e : float, optional (default=1)
+ Entropic regularization parameter
+ reg_cl : float, optional (default=0.1)
+ Class regularization parameter
+ mapping : string, optional (default="barycentric")
+ The kind of mapping to apply to transport samples from a domain into
+ another one.
+ if "barycentric" only the samples used to estimate the coupling can
+ be transported from a domain to another one.
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ norm : string, optional (default=None)
+ If given, normalize the ground metric to avoid numerical errors that
+ can occur with large metric values.
+ distribution : string, optional (default="uniform")
+ The kind of distribution estimation to employ
+ max_iter : int, float, optional (default=10)
+ The minimum number of iteration before stopping the optimization
+ algorithm if no it has not converged
+ max_inner_iter : int, float, optional (default=200)
+ The number of iteration in the inner loop
+ verbose : int, optional (default=0)
+ Controls the verbosity of the optimization algorithm
+ log : int, optional (default=0)
+ Controls the logs of the optimization algorithm
+ limit_max: float, optional (default=10)
+ Controls the semi supervised mode. Transport between labeled source
+ and target samples of different classes will exhibit an infinite cost
+ (10 times the maximum value of the cost matrix)
+
+ Attributes
+ ----------
+ coupling_ : array-like, shape (n_source_samples, n_target_samples)
+ The optimal coupling
+ log_ : dictionary
+ The dictionary of log, empty dic if parameter log is not True
+
+ References
+ ----------
+
+ .. [1] N. Courty; R. Flamary; D. Tuia; A. Rakotomamonjy,
+ "Optimal Transport for Domain Adaptation," in IEEE
+ Transactions on Pattern Analysis and Machine Intelligence ,
+ vol.PP, no.99, pp.1-1
+ .. [2] Rakotomamonjy, A., Flamary, R., & Courty, N. (2015).
+ Generalized conditional gradient: analysis of convergence
+ and applications. arXiv preprint arXiv:1510.06567.
+
+ """
+
+ def __init__(self, reg_e=1., reg_cl=0.1,
+ max_iter=10, max_inner_iter=200,
+ tol=10e-9, verbose=False, log=False,
+ metric="sqeuclidean", norm=None,
+ distribution_estimation=distribution_estimation_uniform,
+ out_of_sample_map='ferradans', limit_max=10):
+
+ self.reg_e = reg_e
+ self.reg_cl = reg_cl
+ self.max_iter = max_iter
+ self.max_inner_iter = max_inner_iter
+ self.tol = tol
+ self.verbose = verbose
+ self.log = log
+ self.metric = metric
+ self.norm = norm
+ self.distribution_estimation = distribution_estimation
+ self.out_of_sample_map = out_of_sample_map
+ self.limit_max = limit_max
+
+ def fit(self, Xs, ys=None, Xt=None, yt=None):
+ """Build a coupling matrix from source and target sets of samples
+ (Xs, ys) and (Xt, yt)
+
+ Parameters
+ ----------
+ Xs : array-like, shape (n_source_samples, n_features)
+ The training input samples.
+ ys : array-like, shape (n_source_samples,)
+ The class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_labeled_target_samples,)
+ The class labels
+
+ Returns
+ -------
+ self : object
+ Returns self.
+ """
+
+ # check the necessary inputs parameters are here
+ if check_params(Xs=Xs, Xt=Xt, ys=ys):
+
+ super(SinkhornL1l2Transport, self).fit(Xs, ys, Xt, yt)
+
+ returned_ = sinkhorn_l1l2_gl(
+ a=self.mu_s, labels_a=ys, b=self.mu_t, M=self.cost_,
+ reg=self.reg_e, eta=self.reg_cl, numItermax=self.max_iter,
+ numInnerItermax=self.max_inner_iter, stopInnerThr=self.tol,
+ verbose=self.verbose, log=self.log)
+
+ # deal with the value of log
+ if self.log:
+ self.coupling_, self.log_ = returned_
+ else:
+ self.coupling_ = returned_
+ self.log_ = dict()
+
+ return self
+
+
+class MappingTransport(BaseEstimator):
+ """MappingTransport: DA methods that aims at jointly estimating a optimal
+ transport coupling and the associated mapping
+
+ Parameters
+ ----------
+ mu : float, optional (default=1)
+ Weight for the linear OT loss (>0)
+ eta : float, optional (default=0.001)
+ Regularization term for the linear mapping L (>0)
+ bias : bool, optional (default=False)
+ Estimate linear mapping with constant bias
+ metric : string, optional (default="sqeuclidean")
+ The ground metric for the Wasserstein problem
+ norm : string, optional (default=None)
+ If given, normalize the ground metric to avoid numerical errors that
+ can occur with large metric values.
+ kernel : string, optional (default="linear")
+ The kernel to use either linear or gaussian
+ sigma : float, optional (default=1)
+ The gaussian kernel parameter
+ max_iter : int, optional (default=100)
+ Max number of BCD iterations
+ tol : float, optional (default=1e-5)
+ Stop threshold on relative loss decrease (>0)
+ max_inner_iter : int, optional (default=10)
+ Max number of iterations (inner CG solver)
+ inner_tol : float, optional (default=1e-6)
+ Stop threshold on error (inner CG solver) (>0)
+ verbose : bool, optional (default=False)
+ Print information along iterations
+ log : bool, optional (default=False)
+ record log if True
+
+ Attributes
+ ----------
+ coupling_ : array-like, shape (n_source_samples, n_target_samples)
+ The optimal coupling
+ mapping_ : array-like, shape (n_features (+ 1), n_features)
+ (if bias) for kernel == linear
+ The associated mapping
+ array-like, shape (n_source_samples (+ 1), n_features)
+ (if bias) for kernel == gaussian
+ log_ : dictionary
+ The dictionary of log, empty dic if parameter log is not True
+
+ References
+ ----------
+
+ .. [8] M. Perrot, N. Courty, R. Flamary, A. Habrard,
+ "Mapping estimation for discrete optimal transport",
+ Neural Information Processing Systems (NIPS), 2016.
+
+ """
+
+ def __init__(self, mu=1, eta=0.001, bias=False, metric="sqeuclidean",
+ norm=None, kernel="linear", sigma=1, max_iter=100, tol=1e-5,
+ max_inner_iter=10, inner_tol=1e-6, log=False, verbose=False,
+ verbose2=False):
+
+ self.metric = metric
+ self.norm = norm
+ self.mu = mu
+ self.eta = eta
+ self.bias = bias
+ self.kernel = kernel
+ self.sigma = sigma
+ self.max_iter = max_iter
+ self.tol = tol
+ self.max_inner_iter = max_inner_iter
+ self.inner_tol = inner_tol
+ self.log = log
+ self.verbose = verbose
+ self.verbose2 = verbose2
+
+ def fit(self, Xs=None, ys=None, Xt=None, yt=None):
+ """Builds an optimal coupling and estimates the associated mapping
+ from source and target sets of samples (Xs, ys) and (Xt, yt)
+
+ Parameters
+ ----------
+ Xs : array-like, shape (n_source_samples, n_features)
+ The training input samples.
+ ys : array-like, shape (n_source_samples,)
+ The class labels
+ Xt : array-like, shape (n_target_samples, n_features)
+ The training input samples.
+ yt : array-like, shape (n_labeled_target_samples,)
+ The class labels
+
+ Returns
+ -------
+ self : object
+ Returns self
+ """
+
+ # check the necessary inputs parameters are here
+ if check_params(Xs=Xs, Xt=Xt):
+
+ self.xs_ = Xs
+ self.xt_ = Xt
+
+ if self.kernel == "linear":
+ returned_ = joint_OT_mapping_linear(
+ Xs, Xt, mu=self.mu, eta=self.eta, bias=self.bias,
+ verbose=self.verbose, verbose2=self.verbose2,
+ numItermax=self.max_iter,
+ numInnerItermax=self.max_inner_iter, stopThr=self.tol,
+ stopInnerThr=self.inner_tol, log=self.log)
+
+ elif self.kernel == "gaussian":
+ returned_ = joint_OT_mapping_kernel(
+ Xs, Xt, mu=self.mu, eta=self.eta, bias=self.bias,
+ sigma=self.sigma, verbose=self.verbose,
+ verbose2=self.verbose, numItermax=self.max_iter,
+ numInnerItermax=self.max_inner_iter,
+ stopInnerThr=self.inner_tol, stopThr=self.tol,
+ log=self.log)
+
+ # deal with the value of log
+ if self.log:
+ self.coupling_, self.mapping_, self.log_ = returned_
+ else:
+ self.coupling_, self.mapping_ = returned_
+ self.log_ = dict()
+
+ return self
+
+ def transform(self, Xs):
+ """Transports source samples Xs onto target ones Xt
+
+ Parameters
+ ----------
+ Xs : array-like, shape (n_source_samples, n_features)
+ The training input samples.
+
+ Returns
+ -------
+ transp_Xs : array-like, shape (n_source_samples, n_features)
+ The transport source samples.
+ """
+
+ # check the necessary inputs parameters are here
+ if check_params(Xs=Xs):
+
+ if np.array_equal(self.xs_, Xs):
+ # perform standard barycentric mapping
+ transp = self.coupling_ / np.sum(self.coupling_, 1)[:, None]
+
+ # set nans to 0
+ transp[~ np.isfinite(transp)] = 0
+
+ # compute transported samples
+ transp_Xs = np.dot(transp, self.xt_)
+ else:
+ if self.kernel == "gaussian":
+ K = kernel(Xs, self.xs_, method=self.kernel,
+ sigma=self.sigma)
+ elif self.kernel == "linear":
+ K = Xs
+ if self.bias:
+ K = np.hstack((K, np.ones((Xs.shape[0], 1))))
+ transp_Xs = K.dot(self.mapping_)
+
+ return transp_Xs