From 1b5c35b62980038960e1c1bdd15dce4b8cdd1e7e Mon Sep 17 00:00:00 2001
From: Hexuan_Liu <hl6uk@virginia.edu>
Date: Fri, 29 Oct 2021 09:09:47 -0700
Subject: [MRG] add normalization of distances for WDA (#172)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* edit dr.py

* Correct normalization + optional parameter

* pep8?

* final!

Co-authored-by: Rémi Flamary <remi.flamary@gmail.com>
---
 ot/dr.py | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

(limited to 'ot')

diff --git a/ot/dr.py b/ot/dr.py
index de39662..7469270 100644
--- a/ot/dr.py
+++ b/ot/dr.py
@@ -109,7 +109,7 @@ def fda(X, y, p=2, reg=1e-16):
     return Popt, proj
 
 
-def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None):
+def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None, normalize=False):
     r"""
     Wasserstein Discriminant Analysis [11]_
 
@@ -139,6 +139,8 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None):
         else should be a pymanopt.solvers
     P0 : ndarray, shape (d, p)
         Initial starting point for projection.
+    normalize : bool, optional
+        Normalise the Wasserstaiun distane by the average distance on P0 (default : False)
     verbose : int, optional
         Print information along iterations.
 
@@ -164,6 +166,18 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None):
     # compute uniform weighs
     wc = [np.ones((x.shape[0]), dtype=np.float32) / x.shape[0] for x in xc]
 
+    # pre-compute reg_c,c'
+    if P0 is not None and normalize:
+        regmean = np.zeros((len(xc), len(xc)))
+        for i, xi in enumerate(xc):
+            xi = np.dot(xi, P0)
+            for j, xj in enumerate(xc[i:]):
+                xj = np.dot(xj, P0)
+                M = dist(xi, xj)
+                regmean[i, j] = np.sum(M) / (len(xi) * len(xj))
+    else:
+        regmean = np.ones((len(xc), len(xc)))
+
     def cost(P):
         # wda loss
         loss_b = 0
@@ -174,7 +188,7 @@ def wda(X, y, p=2, reg=1, k=10, solver=None, maxiter=100, verbose=0, P0=None):
             for j, xj in enumerate(xc[i:]):
                 xj = np.dot(xj, P)
                 M = dist(xi, xj)
-                G = sinkhorn(wc[i], wc[j + i], M, reg, k)
+                G = sinkhorn(wc[i], wc[j + i], M, reg * regmean[i, j], k)
                 if j == 0:
                     loss_w += np.sum(G * M)
                 else:
-- 
cgit v1.2.3