From ef12867f1425ee86b3cfddef4287b52d46114e83 Mon Sep 17 00:00:00 2001 From: Nicolas Courty Date: Thu, 23 Apr 2020 13:03:28 +0200 Subject: [WIP] Issue with sparse emd and adding tests on macos (#158) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * First commit-warning removal * remove dense feature * pep8 * pep8 * EMD.h * pep8 again * tic toc tolerance Co-authored-by: RĂ©mi Flamary --- ot/lp/EMD.h | 3 - ot/lp/EMD_wrapper.cpp | 182 ----------------------------------------- ot/lp/__init__.py | 45 +++------- ot/lp/emd_wrap.pyx | 38 ++------- ot/lp/network_simplex_simple.h | 5 +- 5 files changed, 20 insertions(+), 253 deletions(-) (limited to 'ot') diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h index 2adaace..c0fe7a3 100644 --- a/ot/lp/EMD.h +++ b/ot/lp/EMD.h @@ -32,9 +32,6 @@ enum ProblemType { int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter); -int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, - long *iG, long *jG, double *G, long * nG, - double* alpha, double* beta, double *cost, int maxIter); #endif diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index 28e4af2..bc873ed 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -106,185 +106,3 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, return ret; } - -int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, - long *iG, long *jG, double *G, long * nG, - double* alpha, double* beta, double *cost, int maxIter) { - // beware M and C anre strored in row major C style!!! - - // Get the number of non zero coordinates for r and c and vectors - int n, m, i, cur; - - typedef FullBipartiteDigraph Digraph; - DIGRAPH_TYPEDEFS(FullBipartiteDigraph); - - // Get the number of non zero coordinates for r and c - n=0; - for (int i=0; i0) { - n++; - }else if(val<0){ - return INFEASIBLE; - } - } - m=0; - for (int i=0; i0) { - m++; - }else if(val<0){ - return INFEASIBLE; - } - } - - // Define the graph - - std::vector indI(n), indJ(m); - std::vector weights1(n), weights2(m); - Digraph di(n, m); - NetworkSimplexSimple net(di, true, n+m, n*m, maxIter); - - // Set supply and demand, don't account for 0 values (faster) - - cur=0; - for (int i=0; i0) { - weights1[ cur ] = val; - indI[cur++]=i; - } - } - - // Demand is actually negative supply... - - cur=0; - for (int i=0; i0) { - weights2[ cur ] = -val; - indJ[cur++]=i; - } - } - - // Define the graph - net.supplyMap(&weights1[0], n, &weights2[0], m); - - // Set the cost of each edge - for (int i=0; i0) - { - *cost += flow * (*(D+indI[i]*n2+indJ[j-n])); - - *(G+cur) = flow; - *(iG+cur) = indI[i]; - *(jG+cur) = indJ[j-n]; - *(alpha + indI[i]) = -net.potential(i); - *(beta + indJ[j-n]) = net.potential(j); - cur++; - } - } - *nG=cur; // nb of value +1 for numpy indexing - - } - - - return ret; -} - -int EMD_wrap_all_sparse(int n1, int n2, double *X, double *Y, - long *iD, long *jD, double *D, long nD, - long *iG, long *jG, double *G, long * nG, - double* alpha, double* beta, double *cost, int maxIter) { - // beware M and C anre strored in row major C style!!! - - // Get the number of non zero coordinates for r and c and vectors - int n, m, cur; - - typedef FullBipartiteDigraph Digraph; - DIGRAPH_TYPEDEFS(FullBipartiteDigraph); - - n=n1; - m=n2; - - - // Define the graph - - - std::vector weights2(m); - Digraph di(n, m); - NetworkSimplexSimple net(di, true, n+m, n*m, maxIter); - - // Set supply and demand, don't account for 0 values (faster) - - - // Demand is actually negative supply... - - cur=0; - for (int i=0; i0) { - weights2[ cur ] = -val; - } - } - - // Define the graph - net.supplyMap(X, n, &weights2[0], m); - - // Set the cost of each edge - for (int k=0; k0) - { - - *(G+cur) = flow; - *(iG+cur) = i; - *(jG+cur) = j-n; - *(alpha + i) = -net.potential(i); - *(beta + j-n) = net.potential(j); - cur++; - } - } - *nG=cur; // nb of value +1 for numpy indexing - - } - - - return ret; -} - diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 8d1baa0..ad390c5 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -172,7 +172,7 @@ def estimate_dual_null_weights(alpha0, beta0, a, b, M): return center_ot_dual(alpha, beta, a, b) -def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True): +def emd(a, b, M, numItermax=100000, log=False, center_dual=True): r"""Solves the Earth Movers distance problem and returns the OT matrix @@ -207,10 +207,6 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True): log: bool, optional (default=False) If True, returns a dictionary containing the cost and dual variables. Otherwise returns only the optimal transportation matrix. - dense: boolean, optional (default=True) - If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt). - Otherwise returns a sparse representation using scipy's `coo_matrix` - format. center_dual: boolean, optional (default=True) If True, centers the dual potential using function :ref:`center_ot_dual`. @@ -267,25 +263,14 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True): asel = a != 0 bsel = b != 0 - if dense: - G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense) - - if center_dual: - u, v = center_ot_dual(u, v, a, b) + G, cost, u, v, result_code = emd_c(a, b, M, numItermax) - if np.any(~asel) or np.any(~bsel): - u, v = estimate_dual_null_weights(u, v, a, b, M) - - else: - Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense) - G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0])) - - if center_dual: - u, v = center_ot_dual(u, v, a, b) - - if np.any(~asel) or np.any(~bsel): - u, v = estimate_dual_null_weights(u, v, a, b, M) + if center_dual: + u, v = center_ot_dual(u, v, a, b) + if np.any(~asel) or np.any(~bsel): + u, v = estimate_dual_null_weights(u, v, a, b, M) + result_code_string = check_result(result_code) if log: log = {} @@ -299,7 +284,7 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True): def emd2(a, b, M, processes=multiprocessing.cpu_count(), - numItermax=100000, log=False, dense=True, return_matrix=False, + numItermax=100000, log=False, return_matrix=False, center_dual=True): r"""Solves the Earth Movers distance problem and returns the loss @@ -404,11 +389,8 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), if log or return_matrix: def f(b): bsel = b != 0 - if dense: - G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense) - else: - Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense) - G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0])) + + G, cost, u, v, result_code = emd_c(a, b, M, numItermax) if center_dual: u, v = center_ot_dual(u, v, a, b) @@ -428,11 +410,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), else: def f(b): bsel = b != 0 - if dense: - G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense) - else: - Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense) - G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0])) + G, cost, u, v, result_code = emd_c(a, b, M, numItermax) if center_dual: u, v = center_ot_dual(u, v, a, b) @@ -440,7 +418,6 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), if np.any(~asel) or np.any(~bsel): u, v = estimate_dual_null_weights(u, v, a, b, M) - result_code_string = check_result(result_code) check_result(result_code) return cost diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index d345fd4..b6bda47 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -20,9 +20,6 @@ import warnings cdef extern from "EMD.h": int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter) - int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, - long *iG, long *jG, double *G, long * nG, - double* alpha, double* beta, double *cost, int maxIter) cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED @@ -38,13 +35,10 @@ def check_result(result_code): message = "numItermax reached before optimality. Try to increase numItermax." warnings.warn(message) return message - - - - + @cython.boundscheck(False) @cython.wraparound(False) -def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter, bint dense): +def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter): """ Solves the Earth Movers distance problem and returns the optimal transport matrix @@ -83,8 +77,6 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod max_iter : int The maximum number of iterations before stopping the optimization algorithm if it has not converged. - dense : bool - Return a sparse transport matrix if set to False Returns ------- @@ -114,29 +106,13 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod if not len(b): b=np.ones((n2,))/n2 - if dense: - # init OT matrix - G=np.zeros([n1, n2]) - - # calling the function - result_code = EMD_wrap(n1, n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, max_iter) - - return G, cost, alpha, beta, result_code - - - else: - - # init sparse OT matrix - Gv=np.zeros(nmax) - iG=np.zeros(nmax,dtype=np.int) - jG=np.zeros(nmax,dtype=np.int) - - - result_code = EMD_wrap_return_sparse(n1, n2, a.data, b.data, M.data, iG.data, jG.data, Gv.data, &nG, alpha.data, beta.data, &cost, max_iter) - + # init OT matrix + G=np.zeros([n1, n2]) - return Gv[:nG], iG[:nG], jG[:nG], cost, alpha, beta, result_code + # calling the function + result_code = EMD_wrap(n1, n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, max_iter) + return G, cost, alpha, beta, result_code @cython.boundscheck(False) diff --git a/ot/lp/network_simplex_simple.h b/ot/lp/network_simplex_simple.h index 498e921..5d93040 100644 --- a/ot/lp/network_simplex_simple.h +++ b/ot/lp/network_simplex_simple.h @@ -875,7 +875,7 @@ namespace lemon { c += Number(it->second) * Number(_cost[it->first]); return c;*/ - for (int i=0; i<_flow.size(); i++) + for (unsigned long i=0; i<_flow.size(); i++) c += _flow[i] * Number(_cost[i]); return c; @@ -1257,7 +1257,7 @@ namespace lemon { u = w; } _pred[u_in] = in_arc; - _forward[u_in] = (u_in == _source[in_arc]); + _forward[u_in] = ((unsigned int)u_in == _source[in_arc]); _succ_num[u_in] = old_succ_num; // Set limits for updating _last_succ form v_in and v_out @@ -1418,7 +1418,6 @@ namespace lemon { template ProblemType start() { PivotRuleImpl pivot(*this); - double prevCost=-1; ProblemType retVal = OPTIMAL; // Perform heuristic initial pivots -- cgit v1.2.3