summaryrefslogtreecommitdiff
path: root/ot
diff options
context:
space:
mode:
authorNicolas Courty <ncourty@irisa.fr>2020-04-23 13:03:28 +0200
committerGitHub <noreply@github.com>2020-04-23 13:03:28 +0200
commitef12867f1425ee86b3cfddef4287b52d46114e83 (patch)
tree38e023c5561b1669f4d8e602feb6728f51e1b359 /ot
parentbacb0b992aa4e1ba7e5fd0beb0bf9617c801f833 (diff)
[WIP] Issue with sparse emd and adding tests on macos (#158)
* First commit-warning removal * remove dense feature * pep8 * pep8 * EMD.h * pep8 again * tic toc tolerance Co-authored-by: RĂ©mi Flamary <remi.flamary@gmail.com>
Diffstat (limited to 'ot')
-rw-r--r--ot/lp/EMD.h3
-rw-r--r--ot/lp/EMD_wrapper.cpp182
-rw-r--r--ot/lp/__init__.py45
-rw-r--r--ot/lp/emd_wrap.pyx38
-rw-r--r--ot/lp/network_simplex_simple.h5
5 files changed, 20 insertions, 253 deletions
diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h
index 2adaace..c0fe7a3 100644
--- a/ot/lp/EMD.h
+++ b/ot/lp/EMD.h
@@ -32,9 +32,6 @@ enum ProblemType {
int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter);
-int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D,
- long *iG, long *jG, double *G, long * nG,
- double* alpha, double* beta, double *cost, int maxIter);
#endif
diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp
index 28e4af2..bc873ed 100644
--- a/ot/lp/EMD_wrapper.cpp
+++ b/ot/lp/EMD_wrapper.cpp
@@ -106,185 +106,3 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G,
return ret;
}
-
-int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D,
- long *iG, long *jG, double *G, long * nG,
- double* alpha, double* beta, double *cost, int maxIter) {
- // beware M and C anre strored in row major C style!!!
-
- // Get the number of non zero coordinates for r and c and vectors
- int n, m, i, cur;
-
- typedef FullBipartiteDigraph Digraph;
- DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
-
- // Get the number of non zero coordinates for r and c
- n=0;
- for (int i=0; i<n1; i++) {
- double val=*(X+i);
- if (val>0) {
- n++;
- }else if(val<0){
- return INFEASIBLE;
- }
- }
- m=0;
- for (int i=0; i<n2; i++) {
- double val=*(Y+i);
- if (val>0) {
- m++;
- }else if(val<0){
- return INFEASIBLE;
- }
- }
-
- // Define the graph
-
- std::vector<int> indI(n), indJ(m);
- std::vector<double> weights1(n), weights2(m);
- Digraph di(n, m);
- NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, maxIter);
-
- // Set supply and demand, don't account for 0 values (faster)
-
- cur=0;
- for (int i=0; i<n1; i++) {
- double val=*(X+i);
- if (val>0) {
- weights1[ cur ] = val;
- indI[cur++]=i;
- }
- }
-
- // Demand is actually negative supply...
-
- cur=0;
- for (int i=0; i<n2; i++) {
- double val=*(Y+i);
- if (val>0) {
- weights2[ cur ] = -val;
- indJ[cur++]=i;
- }
- }
-
- // Define the graph
- net.supplyMap(&weights1[0], n, &weights2[0], m);
-
- // Set the cost of each edge
- for (int i=0; i<n; i++) {
- for (int j=0; j<m; j++) {
- double val=*(D+indI[i]*n2+indJ[j]);
- net.setCost(di.arcFromId(i*m+j), val);
- }
- }
-
-
- // Solve the problem with the network simplex algorithm
-
- int ret=net.run();
- if (ret==(int)net.OPTIMAL || ret==(int)net.MAX_ITER_REACHED) {
- *cost = 0;
- Arc a; di.first(a);
- cur=0;
- for (; a != INVALID; di.next(a)) {
- int i = di.source(a);
- int j = di.target(a);
- double flow = net.flow(a);
- if (flow>0)
- {
- *cost += flow * (*(D+indI[i]*n2+indJ[j-n]));
-
- *(G+cur) = flow;
- *(iG+cur) = indI[i];
- *(jG+cur) = indJ[j-n];
- *(alpha + indI[i]) = -net.potential(i);
- *(beta + indJ[j-n]) = net.potential(j);
- cur++;
- }
- }
- *nG=cur; // nb of value +1 for numpy indexing
-
- }
-
-
- return ret;
-}
-
-int EMD_wrap_all_sparse(int n1, int n2, double *X, double *Y,
- long *iD, long *jD, double *D, long nD,
- long *iG, long *jG, double *G, long * nG,
- double* alpha, double* beta, double *cost, int maxIter) {
- // beware M and C anre strored in row major C style!!!
-
- // Get the number of non zero coordinates for r and c and vectors
- int n, m, cur;
-
- typedef FullBipartiteDigraph Digraph;
- DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
-
- n=n1;
- m=n2;
-
-
- // Define the graph
-
-
- std::vector<double> weights2(m);
- Digraph di(n, m);
- NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, maxIter);
-
- // Set supply and demand, don't account for 0 values (faster)
-
-
- // Demand is actually negative supply...
-
- cur=0;
- for (int i=0; i<n2; i++) {
- double val=*(Y+i);
- if (val>0) {
- weights2[ cur ] = -val;
- }
- }
-
- // Define the graph
- net.supplyMap(X, n, &weights2[0], m);
-
- // Set the cost of each edge
- for (int k=0; k<nD; k++) {
- int i = iD[k];
- int j = jD[k];
- net.setCost(di.arcFromId(i*m+j), D[k]);
-
- }
-
-
- // Solve the problem with the network simplex algorithm
-
- int ret=net.run();
- if (ret==(int)net.OPTIMAL || ret==(int)net.MAX_ITER_REACHED) {
- *cost = net.totalCost();
- Arc a; di.first(a);
- cur=0;
- for (; a != INVALID; di.next(a)) {
- int i = di.source(a);
- int j = di.target(a);
- double flow = net.flow(a);
- if (flow>0)
- {
-
- *(G+cur) = flow;
- *(iG+cur) = i;
- *(jG+cur) = j-n;
- *(alpha + i) = -net.potential(i);
- *(beta + j-n) = net.potential(j);
- cur++;
- }
- }
- *nG=cur; // nb of value +1 for numpy indexing
-
- }
-
-
- return ret;
-}
-
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 8d1baa0..ad390c5 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -172,7 +172,7 @@ def estimate_dual_null_weights(alpha0, beta0, a, b, M):
return center_ot_dual(alpha, beta, a, b)
-def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
+def emd(a, b, M, numItermax=100000, log=False, center_dual=True):
r"""Solves the Earth Movers distance problem and returns the OT matrix
@@ -207,10 +207,6 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
log: bool, optional (default=False)
If True, returns a dictionary containing the cost and dual
variables. Otherwise returns only the optimal transportation matrix.
- dense: boolean, optional (default=True)
- If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
- Otherwise returns a sparse representation using scipy's `coo_matrix`
- format.
center_dual: boolean, optional (default=True)
If True, centers the dual potential using function
:ref:`center_ot_dual`.
@@ -267,25 +263,14 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
asel = a != 0
bsel = b != 0
- if dense:
- G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
-
- if center_dual:
- u, v = center_ot_dual(u, v, a, b)
+ G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
- if np.any(~asel) or np.any(~bsel):
- u, v = estimate_dual_null_weights(u, v, a, b, M)
-
- else:
- Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
- G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
-
- if center_dual:
- u, v = center_ot_dual(u, v, a, b)
-
- if np.any(~asel) or np.any(~bsel):
- u, v = estimate_dual_null_weights(u, v, a, b, M)
+ if center_dual:
+ u, v = center_ot_dual(u, v, a, b)
+ if np.any(~asel) or np.any(~bsel):
+ u, v = estimate_dual_null_weights(u, v, a, b, M)
+
result_code_string = check_result(result_code)
if log:
log = {}
@@ -299,7 +284,7 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
def emd2(a, b, M, processes=multiprocessing.cpu_count(),
- numItermax=100000, log=False, dense=True, return_matrix=False,
+ numItermax=100000, log=False, return_matrix=False,
center_dual=True):
r"""Solves the Earth Movers distance problem and returns the loss
@@ -404,11 +389,8 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
if log or return_matrix:
def f(b):
bsel = b != 0
- if dense:
- G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
- else:
- Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
- G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+
+ G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
if center_dual:
u, v = center_ot_dual(u, v, a, b)
@@ -428,11 +410,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
else:
def f(b):
bsel = b != 0
- if dense:
- G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
- else:
- Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
- G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+ G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
if center_dual:
u, v = center_ot_dual(u, v, a, b)
@@ -440,7 +418,6 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
if np.any(~asel) or np.any(~bsel):
u, v = estimate_dual_null_weights(u, v, a, b, M)
- result_code_string = check_result(result_code)
check_result(result_code)
return cost
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index d345fd4..b6bda47 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -20,9 +20,6 @@ import warnings
cdef extern from "EMD.h":
int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter)
- int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D,
- long *iG, long *jG, double *G, long * nG,
- double* alpha, double* beta, double *cost, int maxIter)
cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED
@@ -38,13 +35,10 @@ def check_result(result_code):
message = "numItermax reached before optimality. Try to increase numItermax."
warnings.warn(message)
return message
-
-
-
-
+
@cython.boundscheck(False)
@cython.wraparound(False)
-def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter, bint dense):
+def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter):
"""
Solves the Earth Movers distance problem and returns the optimal transport matrix
@@ -83,8 +77,6 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
max_iter : int
The maximum number of iterations before stopping the optimization
algorithm if it has not converged.
- dense : bool
- Return a sparse transport matrix if set to False
Returns
-------
@@ -114,29 +106,13 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
if not len(b):
b=np.ones((n2,))/n2
- if dense:
- # init OT matrix
- G=np.zeros([n1, n2])
-
- # calling the function
- result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
-
- return G, cost, alpha, beta, result_code
-
-
- else:
-
- # init sparse OT matrix
- Gv=np.zeros(nmax)
- iG=np.zeros(nmax,dtype=np.int)
- jG=np.zeros(nmax,dtype=np.int)
-
-
- result_code = EMD_wrap_return_sparse(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <long*> iG.data, <long*> jG.data, <double*> Gv.data, <long*> &nG, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
-
+ # init OT matrix
+ G=np.zeros([n1, n2])
- return Gv[:nG], iG[:nG], jG[:nG], cost, alpha, beta, result_code
+ # calling the function
+ result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
+ return G, cost, alpha, beta, result_code
@cython.boundscheck(False)
diff --git a/ot/lp/network_simplex_simple.h b/ot/lp/network_simplex_simple.h
index 498e921..5d93040 100644
--- a/ot/lp/network_simplex_simple.h
+++ b/ot/lp/network_simplex_simple.h
@@ -875,7 +875,7 @@ namespace lemon {
c += Number(it->second) * Number(_cost[it->first]);
return c;*/
- for (int i=0; i<_flow.size(); i++)
+ for (unsigned long i=0; i<_flow.size(); i++)
c += _flow[i] * Number(_cost[i]);
return c;
@@ -1257,7 +1257,7 @@ namespace lemon {
u = w;
}
_pred[u_in] = in_arc;
- _forward[u_in] = (u_in == _source[in_arc]);
+ _forward[u_in] = ((unsigned int)u_in == _source[in_arc]);
_succ_num[u_in] = old_succ_num;
// Set limits for updating _last_succ form v_in and v_out
@@ -1418,7 +1418,6 @@ namespace lemon {
template <typename PivotRuleImpl>
ProblemType start() {
PivotRuleImpl pivot(*this);
- double prevCost=-1;
ProblemType retVal = OPTIMAL;
// Perform heuristic initial pivots