[WIP] Issue with sparse emd and adding tests on macos (#158)

* First commit-warning removal * remove dense feature * pep8 * pep8 * EMD.h * pep8 again * tic toc tolerance Co-authored-by: Rémi Flamary <remi.flamary@gmail.com>
author: Nicolas Courty <ncourty@irisa.fr> 2020-04-23 13:03:28 +0200
committer: GitHub <noreply@github.com> 2020-04-23 13:03:28 +0200
commit: ef12867f1425ee86b3cfddef4287b52d46114e83 (patch)
tree: 38e023c5561b1669f4d8e602feb6728f51e1b359 /ot
parent: bacb0b992aa4e1ba7e5fd0beb0bf9617c801f833 (diff)
5 files changed, 20 insertions, 253 deletions
diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h
index 2adaace..c0fe7a3 100644
--- a/ot/lp/EMD.h
+++ b/ot/lp/EMD.h
@@ -32,9 +32,6 @@ enum ProblemType {
 
 int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter);
 
-int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, 
-                    long *iG, long *jG, double *G, long * nG,
-                    double* alpha, double* beta, double *cost, int maxIter);
 
 
 #endif
diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp
index 28e4af2..bc873ed 100644
--- a/ot/lp/EMD_wrapper.cpp
+++ b/ot/lp/EMD_wrapper.cpp
@@ -106,185 +106,3 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G,
     return ret;
 }
 
-
-int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, 
-                    long *iG, long *jG, double *G, long * nG,
-                    double* alpha, double* beta, double *cost, int maxIter)  {
-    // beware M and C anre strored in row major C style!!!
-
-    // Get the number of non zero coordinates for r and c and vectors
-    int n, m, i, cur;
-
-    typedef FullBipartiteDigraph Digraph;
-    DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
-
-    // Get the number of non zero coordinates for r and c
-    n=0;
-    for (int i=0; i<n1; i++) {
-        double val=*(X+i);
-        if (val>0) {
-            n++;
-        }else if(val<0){
-			return INFEASIBLE;
-		}
-    }
-    m=0;
-    for (int i=0; i<n2; i++) {
-        double val=*(Y+i);
-        if (val>0) {
-            m++;
-        }else if(val<0){
-			return INFEASIBLE;
-		}
-    }
-
-    // Define the graph
-
-    std::vector<int> indI(n), indJ(m);
-    std::vector<double> weights1(n), weights2(m);
-    Digraph di(n, m);
-    NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, maxIter);
-
-    // Set supply and demand, don't account for 0 values (faster)
-
-    cur=0;
-    for (int i=0; i<n1; i++) {
-        double val=*(X+i);
-        if (val>0) {
-            weights1[ cur ] = val;
-            indI[cur++]=i;
-        }
-    }
-
-    // Demand is actually negative supply...
-
-    cur=0;
-    for (int i=0; i<n2; i++) {
-        double val=*(Y+i);
-        if (val>0) {
-            weights2[ cur ] = -val;
-            indJ[cur++]=i;
-        }
-    }
-
-    // Define the graph
-    net.supplyMap(&weights1[0], n, &weights2[0], m);
-
-    // Set the cost of each edge
-    for (int i=0; i<n; i++) {
-        for (int j=0; j<m; j++) {
-            double val=*(D+indI[i]*n2+indJ[j]);
-            net.setCost(di.arcFromId(i*m+j), val);
-        }
-    }
-
-
-    // Solve the problem with the network simplex algorithm
-
-    int ret=net.run();
-    if (ret==(int)net.OPTIMAL || ret==(int)net.MAX_ITER_REACHED) {
-        *cost = 0;
-        Arc a; di.first(a);
-        cur=0;
-        for (; a != INVALID; di.next(a)) {
-            int i = di.source(a);
-            int j = di.target(a);
-            double flow = net.flow(a);
-            if (flow>0)
-            {
-                *cost += flow * (*(D+indI[i]*n2+indJ[j-n]));
-
-                *(G+cur) = flow;
-                *(iG+cur) = indI[i];
-                *(jG+cur) = indJ[j-n];
-                *(alpha + indI[i]) = -net.potential(i);
-                *(beta + indJ[j-n]) = net.potential(j);
-                cur++;
-            }
-        }
-        *nG=cur; // nb of value +1 for numpy indexing
-
-    }
-
-
-    return ret;
-}
-
-int EMD_wrap_all_sparse(int n1, int n2, double *X, double *Y, 
-                    long *iD, long *jD, double *D, long  nD,
-                    long *iG, long *jG, double *G, long * nG,
-                    double* alpha, double* beta, double *cost, int maxIter)  {
-    // beware M and C anre strored in row major C style!!!
-
-    // Get the number of non zero coordinates for r and c and vectors
-    int n, m, cur;
-
-    typedef FullBipartiteDigraph Digraph;
-    DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
-
-    n=n1;
-    m=n2;
-
-
-    // Define the graph
-
-
-    std::vector<double>  weights2(m);
-    Digraph di(n, m);
-    NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, maxIter);
-
-    // Set supply and demand, don't account for 0 values (faster)
-
-
-    // Demand is actually negative supply...
-
-    cur=0;
-    for (int i=0; i<n2; i++) {
-        double val=*(Y+i);
-        if (val>0) {
-            weights2[ cur ] = -val;
-        }
-    }
-
-    // Define the graph
-    net.supplyMap(X, n, &weights2[0], m);
-
-    // Set the cost of each edge
-    for (int k=0; k<nD; k++) {
-            int i = iD[k];
-            int j = jD[k];
-            net.setCost(di.arcFromId(i*m+j), D[k]);
-        
-    }
-
-
-    // Solve the problem with the network simplex algorithm
-
-    int ret=net.run();
-    if (ret==(int)net.OPTIMAL || ret==(int)net.MAX_ITER_REACHED) {
-        *cost = net.totalCost();
-        Arc a; di.first(a);
-        cur=0;
-        for (; a != INVALID; di.next(a)) {
-            int i = di.source(a);
-            int j = di.target(a);
-            double flow = net.flow(a);
-            if (flow>0)
-            {
-                
-                *(G+cur) = flow;
-                *(iG+cur) = i;
-                *(jG+cur) = j-n;
-                *(alpha + i) = -net.potential(i);
-                *(beta + j-n) = net.potential(j);
-                cur++;
-            }
-        }
-        *nG=cur; // nb of value +1 for numpy indexing
-
-    }
-
-
-    return ret;
-}
-
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 8d1baa0..ad390c5 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -172,7 +172,7 @@ def estimate_dual_null_weights(alpha0, beta0, a, b, M):
     return center_ot_dual(alpha, beta, a, b)
 
 
-def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
+def emd(a, b, M, numItermax=100000, log=False, center_dual=True):
     r"""Solves the Earth Movers distance problem and returns the OT matrix
 
 
@@ -207,10 +207,6 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
     log: bool, optional (default=False)
         If True, returns a dictionary containing the cost and dual
         variables. Otherwise returns only the optimal transportation matrix.
-    dense: boolean, optional (default=True)
-        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
-        Otherwise returns a sparse representation using scipy's `coo_matrix`
-        format.
     center_dual: boolean, optional (default=True)
         If True, centers the dual potential using function
         :ref:`center_ot_dual`.
@@ -267,25 +263,14 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
     asel = a != 0
     bsel = b != 0
 
-    if dense:
-        G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
-
-        if center_dual:
-            u, v = center_ot_dual(u, v, a, b)
+    G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
 
-        if np.any(~asel) or np.any(~bsel):
-            u, v = estimate_dual_null_weights(u, v, a, b, M)
-
-    else:
-        Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
-        G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
-
-        if center_dual:
-            u, v = center_ot_dual(u, v, a, b)
-
-        if np.any(~asel) or np.any(~bsel):
-            u, v = estimate_dual_null_weights(u, v, a, b, M)
+    if center_dual:
+        u, v = center_ot_dual(u, v, a, b)
 
+    if np.any(~asel) or np.any(~bsel):
+        u, v = estimate_dual_null_weights(u, v, a, b, M)
+    
     result_code_string = check_result(result_code)
     if log:
         log = {}
@@ -299,7 +284,7 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
 
 
 def emd2(a, b, M, processes=multiprocessing.cpu_count(),
-         numItermax=100000, log=False, dense=True, return_matrix=False,
+         numItermax=100000, log=False, return_matrix=False,
          center_dual=True):
     r"""Solves the Earth Movers distance problem and returns the loss
 
@@ -404,11 +389,8 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     if log or return_matrix:
         def f(b):
             bsel = b != 0
-            if dense:
-                G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
-            else:
-                Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
-                G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+            
+            G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
 
             if center_dual:
                 u, v = center_ot_dual(u, v, a, b)
@@ -428,11 +410,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     else:
         def f(b):
             bsel = b != 0
-            if dense:
-                G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
-            else:
-                Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
-                G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+            G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
 
             if center_dual:
                 u, v = center_ot_dual(u, v, a, b)
@@ -440,7 +418,6 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
             if np.any(~asel) or np.any(~bsel):
                 u, v = estimate_dual_null_weights(u, v, a, b, M)
 
-            result_code_string = check_result(result_code)
             check_result(result_code)
             return cost
 
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index d345fd4..b6bda47 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -20,9 +20,6 @@ import warnings
 
 cdef extern from "EMD.h":
     int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter)
-    int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, 
-                    long *iG, long *jG, double *G, long * nG,
-                    double* alpha, double* beta, double *cost, int maxIter)
     cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED
 
 
@@ -38,13 +35,10 @@ def check_result(result_code):
         message = "numItermax reached before optimality. Try to increase numItermax."
     warnings.warn(message)
     return message
-
-
-
-
+ 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"]  b, np.ndarray[double, ndim=2, mode="c"]  M, int max_iter, bint dense):
+def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"]  b, np.ndarray[double, ndim=2, mode="c"]  M, int max_iter):
     """
         Solves the Earth Movers distance problem and returns the optimal transport matrix
 
@@ -83,8 +77,6 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
     max_iter : int
         The maximum number of iterations before stopping the optimization
         algorithm if it has not converged.
-    dense : bool
-        Return a sparse transport matrix if set to False
 
     Returns
     -------
@@ -114,29 +106,13 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
     if not len(b):
         b=np.ones((n2,))/n2
 
-    if dense:
-        # init OT matrix
-        G=np.zeros([n1, n2])
-
-        # calling the function
-        result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
-
-        return G, cost, alpha, beta, result_code
-
-
-    else:
-        
-        # init sparse OT matrix
-        Gv=np.zeros(nmax)
-        iG=np.zeros(nmax,dtype=np.int)
-        jG=np.zeros(nmax,dtype=np.int)
-
-
-        result_code = EMD_wrap_return_sparse(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <long*> iG.data, <long*> jG.data, <double*> Gv.data, <long*> &nG, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
-
+    # init OT matrix
+    G=np.zeros([n1, n2])
 
-        return Gv[:nG], iG[:nG], jG[:nG], cost, alpha, beta, result_code
+    # calling the function
+    result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
 
+    return G, cost, alpha, beta, result_code
 
 
 @cython.boundscheck(False)
diff --git a/ot/lp/network_simplex_simple.h b/ot/lp/network_simplex_simple.h
index 498e921..5d93040 100644
--- a/ot/lp/network_simplex_simple.h
+++ b/ot/lp/network_simplex_simple.h
@@ -875,7 +875,7 @@ namespace lemon {
              c += Number(it->second) * Number(_cost[it->first]);
              return c;*/
 
-            for (int i=0; i<_flow.size(); i++)
+            for (unsigned long i=0; i<_flow.size(); i++)
                 c += _flow[i] * Number(_cost[i]);
             return c;
 
@@ -1257,7 +1257,7 @@ namespace lemon {
                 u = w;
             }
             _pred[u_in] = in_arc;
-            _forward[u_in] = (u_in == _source[in_arc]);
+            _forward[u_in] = ((unsigned int)u_in == _source[in_arc]);
             _succ_num[u_in] = old_succ_num;
 
             // Set limits for updating _last_succ form v_in and v_out
@@ -1418,7 +1418,6 @@ namespace lemon {
         template <typename PivotRuleImpl>
         ProblemType start() {
             PivotRuleImpl pivot(*this);
-            double prevCost=-1;
 			ProblemType retVal = OPTIMAL;
 
             // Perform heuristic initial pivots
author	Nicolas Courty <ncourty@irisa.fr>	2020-04-23 13:03:28 +0200
committer	GitHub <noreply@github.com>	2020-04-23 13:03:28 +0200
commit	ef12867f1425ee86b3cfddef4287b52d46114e83 (patch)
tree	38e023c5561b1669f4d8e602feb6728f51e1b359 /ot
parent	bacb0b992aa4e1ba7e5fd0beb0bf9617c801f833 (diff)