Added log option to muliprocess emd

author: Antoine Rolet <antoine.rolet@gmail.com> 2017-09-07 14:35:35 +0900
committer: Antoine Rolet <antoine.rolet@gmail.com> 2017-09-07 14:35:35 +0900
commit: ab65f86304b03a967054eeeaf73b8c8277618d65 (patch)
tree: afdf3a385588277dfe32f8ac92d5009f14ea0c4e
parent: 12d9b3ff72e9669ccc0162e82b7a33beb51d3e25 (diff)
2 files changed, 57 insertions, 39 deletions
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index c15e6b9..8edd8ec 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -7,11 +7,13 @@ Solvers for the original linear program OT problem
 #
 # License: MIT License
 
+import multiprocessing
+
 import numpy as np
+
 # import compiled emd
 from .emd_wrap import emd_c
 from ..utils import parmap
-import multiprocessing
 
 
 def emd(a, b, M, numItermax=100000, log=False):
@@ -88,9 +90,9 @@ def emd(a, b, M, numItermax=100000, log=False):
 
     # if empty array given then use unifor distributions
     if len(a) == 0:
-        a = np.ones((M.shape[0], ), dtype=np.float64)/M.shape[0]
+        a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
     if len(b) == 0:
-        b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1]
+        b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
 
     G, cost, u, v = emd_c(a, b, M, numItermax)
     if log:
@@ -101,7 +103,8 @@ def emd(a, b, M, numItermax=100000, log=False):
         return G, log
     return G
 
-def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000):
+
+def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log=False):
     """Solves the Earth Movers distance problem and returns the loss
 
     .. math::
@@ -168,16 +171,26 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000):
 
     # if empty array given then use unifor distributions
     if len(a) == 0:
-        a = np.ones((M.shape[0], ), dtype=np.float64)/M.shape[0]
+        a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
     if len(b) == 0:
-        b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1]
-    
-    if len(b.shape)==1:
-        return emd_c(a, b, M, numItermax)[1]
+        b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
+
+    if log:
+        def f(b):
+            G, cost, u, v = emd_c(a, b, M, numItermax)
+            log = {}
+            log['G'] = G
+            log['u'] = u
+            log['v'] = v
+            return [cost, log]
+    else:
+        def f(b):
+            return emd_c(a, b, M, numItermax)[1]
+
+    if len(b.shape) == 1:
+        return f(b)
     nb = b.shape[1]
     # res = [emd2_c(a, b[:, i].copy(), M, numItermax) for i in range(nb)]
 
-    def f(b):
-        return emd_c(a,b,M, numItermax)[1]
-    res= parmap(f, [b[:,i] for i in range(nb)],processes)
-    return np.array(res)
+    res = parmap(f, [b[:, i] for i in range(nb)], processes)
+    return res
diff --git a/test/test_ot.py b/test/test_ot.py
index 78f64ab..feadef4 100644
--- a/test/test_ot.py
+++ b/test/test_ot.py
@@ -4,11 +4,12 @@
 #
 # License: MIT License
 
+import warnings
+
 import numpy as np
 
 import ot
 from ot.datasets import get_1D_gauss as gauss
-import warnings
 
 
 def test_doctest():
@@ -100,6 +101,21 @@ def test_emd2_multi():
 
     np.testing.assert_allclose(emd1, emdn)
 
+    # emd loss multipro proc with log
+    ot.tic()
+    emdn = ot.emd2(a, b, M, log=True)
+    ot.toc('multi proc : {} s')
+
+    for i in range(len(emdn)):
+        emd = emdn[i]
+        log = emd[1]
+        cost = emd[0]
+        check_duality_gap(a, b[:, i], M, log['G'], log['u'], log['v'], cost)
+        emdn[i] = cost
+
+    emdn = np.array(emdn)
+    np.testing.assert_allclose(emd1, emdn)
+
 
 def test_warnings():
     n = 100  # nb bins
@@ -119,32 +135,22 @@ def test_warnings():
 
     # loss matrix
     M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2)
-    # M/=M.max()
-
-    # %%
 
     print('Computing {} EMD '.format(1))
     with warnings.catch_warnings(record=True) as w:
-        # Cause all warnings to always be triggered.
         warnings.simplefilter("always")
-        # Trigger a warning.
         print('Computing {} EMD '.format(1))
         G = ot.emd(a, b, M, numItermax=1)
-        # Verify some things
         assert "numItermax" in str(w[-1].message)
         assert len(w) == 1
-        # Trigger a warning.
-        a[0]=100
+        a[0] = 100
         print('Computing {} EMD '.format(2))
         G = ot.emd(a, b, M)
-        # Verify some things
         assert "infeasible" in str(w[-1].message)
         assert len(w) == 2
-        # Trigger a warning.
-        a[0]=-1
+        a[0] = -1
         print('Computing {} EMD '.format(2))
         G = ot.emd(a, b, M)
-        # Verify some things
         assert "infeasible" in str(w[-1].message)
         assert len(w) == 3
 
@@ -167,9 +173,6 @@ def test_dual_variables():
 
     # loss matrix
     M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2)
-    # M/=M.max()
-
-    # %%
 
     print('Computing {} EMD '.format(1))
 
@@ -178,26 +181,28 @@ def test_dual_variables():
     G, log = ot.emd(a, b, M, log=True)
     ot.toc('1 proc : {} s')
 
-    cost1 = (G * M).sum()
-    cost_dual = np.vdot(a, log['u']) + np.vdot(b, log['v'])
-
     ot.tic()
     G2 = ot.emd(b, a, np.ascontiguousarray(M.T))
     ot.toc('1 proc : {} s')
 
-    cost2 = (G2 * M.T).sum()
+    cost1 = (G * M).sum()
+    # Check symmetry
+    np.testing.assert_array_almost_equal(cost1, (M * G2.T).sum())
+    # Check with closed-form solution for gaussians
+    np.testing.assert_almost_equal(cost1, np.abs(mean1 - mean2))
 
     # Check that both cost computations are equivalent
     np.testing.assert_almost_equal(cost1, log['cost'])
+    check_duality_gap(a, b, M, G, log['u'], log['v'], log['cost'])
+
+
+def check_duality_gap(a, b, M, G, u, v, cost):
+    cost_dual = np.vdot(a, u) + np.vdot(b, v)
     # Check that dual and primal cost are equal
-    np.testing.assert_almost_equal(cost1, cost_dual)
-    # Check symmetry
-    np.testing.assert_almost_equal(cost1, cost2)
-    # Check with closed-form solution for gaussians
-    np.testing.assert_almost_equal(cost1, np.abs(mean1 - mean2))
+    np.testing.assert_almost_equal(cost_dual, cost)
 
     [ind1, ind2] = np.nonzero(G)
 
     # Check that reduced cost is zero on transport arcs
-    np.testing.assert_array_almost_equal((M - log['u'].reshape(-1, 1) - log['v'].reshape(1, -1))[ind1, ind2],
+    np.testing.assert_array_almost_equal((M - u.reshape(-1, 1) - v.reshape(1, -1))[ind1, ind2],
                                          np.zeros(ind1.size))
author	Antoine Rolet <antoine.rolet@gmail.com>	2017-09-07 14:35:35 +0900
committer	Antoine Rolet <antoine.rolet@gmail.com>	2017-09-07 14:35:35 +0900
commit	ab65f86304b03a967054eeeaf73b8c8277618d65 (patch)
tree	afdf3a385588277dfe32f8ac92d5009f14ea0c4e
parent	12d9b3ff72e9669ccc0162e82b7a33beb51d3e25 (diff)