From 57321bd0172c97b77dfc8b14972c18d063b6dda8 Mon Sep 17 00:00:00 2001 From: RĂ©mi Flamary Date: Mon, 2 Dec 2019 11:13:07 +0100 Subject: add awesome sparse solver --- ot/lp/EMD_wrapper.cpp | 65 ++++++++++++++++++++++++++++++++++++--------------- ot/lp/emd_wrap.pyx | 2 +- test/test_ot.py | 20 ++++++++++++++++ 3 files changed, 67 insertions(+), 20 deletions(-) diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index 3ca7319..2aa44c1 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -111,23 +111,19 @@ int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, long *iG, long *jG, double *G, double* alpha, double* beta, double *cost, int maxIter) { // beware M and C anre strored in row major C style!!! - int n, m, i, cur; + + // Get the number of non zero coordinates for r and c and vectors + int n, m, i, cur; typedef FullBipartiteDigraph Digraph; DIGRAPH_TYPEDEFS(FullBipartiteDigraph); - std::vector indI(n), indJ(m); - std::vector weights1(n), weights2(m); - Digraph di(n, m); - NetworkSimplexSimple net(di, true, n+m, n*m, maxIter); - - // Get the number of non zero coordinates for r and c and vectors + // Get the number of non zero coordinates for r and c n=0; for (int i=0; i0) { - weights1[ n ] = val; - indI[n++]=i; + n++; }else if(val<0){ return INFEASIBLE; } @@ -136,13 +132,41 @@ int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, for (int i=0; i0) { - weights2[ m ] = -val; - indJ[m++]=i; + m++; }else if(val<0){ return INFEASIBLE; } } + // Define the graph + + std::vector indI(n), indJ(m); + std::vector weights1(n), weights2(m); + Digraph di(n, m); + NetworkSimplexSimple net(di, true, n+m, n*m, maxIter); + + // Set supply and demand, don't account for 0 values (faster) + + cur=0; + for (int i=0; i0) { + weights1[ cur ] = val; + indI[cur++]=i; + } + } + + // Demand is actually negative supply... + + cur=0; + for (int i=0; i0) { + weights2[ cur ] = -val; + indJ[cur++]=i; + } + } + // Define the graph net.supplyMap(&weights1[0], n, &weights2[0], m); @@ -166,14 +190,17 @@ int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, int i = di.source(a); int j = di.target(a); double flow = net.flow(a); - *cost += flow * (*(D+indI[i]*n2+indJ[j-n])); - - *(G+cur) = flow; - *(iG+cur) = indI[i]; - *(jG+cur) = indJ[j]; - *(alpha + indI[i]) = -net.potential(i); - *(beta + indJ[j-n]) = net.potential(j); - cur++; + if (flow>0) + { + *cost += flow * (*(D+indI[i]*n2+indJ[j-n])); + + *(G+cur) = flow; + *(iG+cur) = indI[i]; + *(jG+cur) = indJ[j-n]; + *(alpha + indI[i]) = -net.potential(i); + *(beta + indJ[j-n]) = net.potential(j); + cur++; + } } } diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 345cb66..f183995 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -111,7 +111,7 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod jG=np.zeros(nmax,dtype=np.int) - result_code = EMD_wrap_return_sparse(n1, n2, a.data, b.data, M.data, iG.data, jG.data, G.data, alpha.data, beta.data, &cost, max_iter) + result_code = EMD_wrap_return_sparse(n1, n2, a.data, b.data, M.data, iG.data, jG.data, Gv.data, alpha.data, beta.data, &cost, max_iter) return Gv, iG, jG, cost, alpha, beta, result_code diff --git a/test/test_ot.py b/test/test_ot.py index dacae0a..4d59e12 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -118,6 +118,26 @@ def test_emd_empty(): np.testing.assert_allclose(w, 0) +def test_emd_sparse(): + + n = 100 + rng = np.random.RandomState(0) + + x = rng.randn(n, 2) + x2 = rng.randn(n, 2) + u = ot.utils.unif(n) + + M = ot.dist(x, x2) + + G = ot.emd([], [], M) + + Gs = ot.emd([], [], M, sparse=True) + + # check G is the same + np.testing.assert_allclose(G, Gs.todense()) + # check constraints + + def test_emd2_multi(): n = 500 # nb bins -- cgit v1.2.3