summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorHexuan_Liu <hl6uk@virginia.edu>2021-10-29 09:09:47 -0700
committerGitHub <noreply@github.com>2021-10-29 18:09:47 +0200
commit1b5c35b62980038960e1c1bdd15dce4b8cdd1e7e (patch)
treecb616672136beee31bc9b77b0b524d0bd02c71a8
parent79a7a2991168aade2fbb09cf64fb490155a7faac (diff)
[MRG] add normalization of distances for WDA (#172)
* edit dr.py * Correct normalization + optional parameter * pep8? * final! Co-authored-by: RĂ©mi Flamary <remi.flamary@gmail.com>
-rw-r--r--ot/dr.py18
1 files changed, 16 insertions, 2 deletions
diff --git a/ot/dr.py b/ot/dr.py
index de39662..7469270 100644
--- a/ot/dr.py
+++ b/ot/dr.py
@@ -109,7 +109,7 @@ def fda(X, y, p=2, reg=1e-16):
return Popt, proj
-def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None):
+def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None, normalize=False):
r"""
Wasserstein Discriminant Analysis [11]_
@@ -139,6 +139,8 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None):
else should be a pymanopt.solvers
P0 : ndarray, shape (d, p)
Initial starting point for projection.
+ normalize : bool, optional
+ Normalise the Wasserstaiun distane by the average distance on P0 (default : False)
verbose : int, optional
Print information along iterations.
@@ -164,6 +166,18 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None):
# compute uniform weighs
wc = [np.ones((x.shape[0]), dtype=np.float32) / x.shape[0] for x in xc]
+ # pre-compute reg_c,c'
+ if P0 is not None and normalize:
+ regmean = np.zeros((len(xc), len(xc)))
+ for i, xi in enumerate(xc):
+ xi = np.dot(xi, P0)
+ for j, xj in enumerate(xc[i:]):
+ xj = np.dot(xj, P0)
+ M = dist(xi, xj)
+ regmean[i, j] = np.sum(M) / (len(xi) * len(xj))
+ else:
+ regmean = np.ones((len(xc), len(xc)))
+
def cost(P):
# wda loss
loss_b = 0
@@ -174,7 +188,7 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None):
for j, xj in enumerate(xc[i:]):
xj = np.dot(xj, P)
M = dist(xi, xj)
- G = sinkhorn(wc[i], wc[j + i], M, reg, k)
+ G = sinkhorn(wc[i], wc[j + i], M, reg * regmean[i, j], k)
if j == 0:
loss_w += np.sum(G * M)
else: