summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAntoine Rolet <antoine.rolet@gmail.com>2017-09-07 14:35:35 +0900
committerAntoine Rolet <antoine.rolet@gmail.com>2017-09-07 14:35:35 +0900
commitab65f86304b03a967054eeeaf73b8c8277618d65 (patch)
treeafdf3a385588277dfe32f8ac92d5009f14ea0c4e
parent12d9b3ff72e9669ccc0162e82b7a33beb51d3e25 (diff)
Added log option to muliprocess emd
-rw-r--r--ot/lp/__init__.py39
-rw-r--r--test/test_ot.py57
2 files changed, 57 insertions, 39 deletions
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index c15e6b9..8edd8ec 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -7,11 +7,13 @@ Solvers for the original linear program OT problem
#
# License: MIT License
+import multiprocessing
+
import numpy as np
+
# import compiled emd
from .emd_wrap import emd_c
from ..utils import parmap
-import multiprocessing
def emd(a, b, M, numItermax=100000, log=False):
@@ -88,9 +90,9 @@ def emd(a, b, M, numItermax=100000, log=False):
# if empty array given then use unifor distributions
if len(a) == 0:
- a = np.ones((M.shape[0], ), dtype=np.float64)/M.shape[0]
+ a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
if len(b) == 0:
- b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1]
+ b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
G, cost, u, v = emd_c(a, b, M, numItermax)
if log:
@@ -101,7 +103,8 @@ def emd(a, b, M, numItermax=100000, log=False):
return G, log
return G
-def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000):
+
+def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log=False):
"""Solves the Earth Movers distance problem and returns the loss
.. math::
@@ -168,16 +171,26 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000):
# if empty array given then use unifor distributions
if len(a) == 0:
- a = np.ones((M.shape[0], ), dtype=np.float64)/M.shape[0]
+ a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
if len(b) == 0:
- b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1]
-
- if len(b.shape)==1:
- return emd_c(a, b, M, numItermax)[1]
+ b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
+
+ if log:
+ def f(b):
+ G, cost, u, v = emd_c(a, b, M, numItermax)
+ log = {}
+ log['G'] = G
+ log['u'] = u
+ log['v'] = v
+ return [cost, log]
+ else:
+ def f(b):
+ return emd_c(a, b, M, numItermax)[1]
+
+ if len(b.shape) == 1:
+ return f(b)
nb = b.shape[1]
# res = [emd2_c(a, b[:, i].copy(), M, numItermax) for i in range(nb)]
- def f(b):
- return emd_c(a,b,M, numItermax)[1]
- res= parmap(f, [b[:,i] for i in range(nb)],processes)
- return np.array(res)
+ res = parmap(f, [b[:, i] for i in range(nb)], processes)
+ return res
diff --git a/test/test_ot.py b/test/test_ot.py
index 78f64ab..feadef4 100644
--- a/test/test_ot.py
+++ b/test/test_ot.py
@@ -4,11 +4,12 @@
#
# License: MIT License
+import warnings
+
import numpy as np
import ot
from ot.datasets import get_1D_gauss as gauss
-import warnings
def test_doctest():
@@ -100,6 +101,21 @@ def test_emd2_multi():
np.testing.assert_allclose(emd1, emdn)
+ # emd loss multipro proc with log
+ ot.tic()
+ emdn = ot.emd2(a, b, M, log=True)
+ ot.toc('multi proc : {} s')
+
+ for i in range(len(emdn)):
+ emd = emdn[i]
+ log = emd[1]
+ cost = emd[0]
+ check_duality_gap(a, b[:, i], M, log['G'], log['u'], log['v'], cost)
+ emdn[i] = cost
+
+ emdn = np.array(emdn)
+ np.testing.assert_allclose(emd1, emdn)
+
def test_warnings():
n = 100 # nb bins
@@ -119,32 +135,22 @@ def test_warnings():
# loss matrix
M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2)
- # M/=M.max()
-
- # %%
print('Computing {} EMD '.format(1))
with warnings.catch_warnings(record=True) as w:
- # Cause all warnings to always be triggered.
warnings.simplefilter("always")
- # Trigger a warning.
print('Computing {} EMD '.format(1))
G = ot.emd(a, b, M, numItermax=1)
- # Verify some things
assert "numItermax" in str(w[-1].message)
assert len(w) == 1
- # Trigger a warning.
- a[0]=100
+ a[0] = 100
print('Computing {} EMD '.format(2))
G = ot.emd(a, b, M)
- # Verify some things
assert "infeasible" in str(w[-1].message)
assert len(w) == 2
- # Trigger a warning.
- a[0]=-1
+ a[0] = -1
print('Computing {} EMD '.format(2))
G = ot.emd(a, b, M)
- # Verify some things
assert "infeasible" in str(w[-1].message)
assert len(w) == 3
@@ -167,9 +173,6 @@ def test_dual_variables():
# loss matrix
M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2)
- # M/=M.max()
-
- # %%
print('Computing {} EMD '.format(1))
@@ -178,26 +181,28 @@ def test_dual_variables():
G, log = ot.emd(a, b, M, log=True)
ot.toc('1 proc : {} s')
- cost1 = (G * M).sum()
- cost_dual = np.vdot(a, log['u']) + np.vdot(b, log['v'])
-
ot.tic()
G2 = ot.emd(b, a, np.ascontiguousarray(M.T))
ot.toc('1 proc : {} s')
- cost2 = (G2 * M.T).sum()
+ cost1 = (G * M).sum()
+ # Check symmetry
+ np.testing.assert_array_almost_equal(cost1, (M * G2.T).sum())
+ # Check with closed-form solution for gaussians
+ np.testing.assert_almost_equal(cost1, np.abs(mean1 - mean2))
# Check that both cost computations are equivalent
np.testing.assert_almost_equal(cost1, log['cost'])
+ check_duality_gap(a, b, M, G, log['u'], log['v'], log['cost'])
+
+
+def check_duality_gap(a, b, M, G, u, v, cost):
+ cost_dual = np.vdot(a, u) + np.vdot(b, v)
# Check that dual and primal cost are equal
- np.testing.assert_almost_equal(cost1, cost_dual)
- # Check symmetry
- np.testing.assert_almost_equal(cost1, cost2)
- # Check with closed-form solution for gaussians
- np.testing.assert_almost_equal(cost1, np.abs(mean1 - mean2))
+ np.testing.assert_almost_equal(cost_dual, cost)
[ind1, ind2] = np.nonzero(G)
# Check that reduced cost is zero on transport arcs
- np.testing.assert_array_almost_equal((M - log['u'].reshape(-1, 1) - log['v'].reshape(1, -1))[ind1, ind2],
+ np.testing.assert_array_almost_equal((M - u.reshape(-1, 1) - v.reshape(1, -1))[ind1, ind2],
np.zeros(ind1.size))