diff options
Diffstat (limited to 'ot/lp')
-rw-r--r-- | ot/lp/EMD.h | 2 | ||||
-rw-r--r-- | ot/lp/EMD_wrapper.cpp | 4 | ||||
-rw-r--r-- | ot/lp/__init__.py | 46 | ||||
-rw-r--r-- | ot/lp/emd_wrap.pyx | 82 |
4 files changed, 68 insertions, 66 deletions
diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h index 956830c..aa92441 100644 --- a/ot/lp/EMD.h +++ b/ot/lp/EMD.h @@ -29,6 +29,6 @@ enum ProblemType { UNBOUNDED }; -int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax); +int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int max_iter); #endif diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index 83ed56c..c8c2eb3 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -15,7 +15,7 @@ #include "EMD.h" -int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax) { +int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int max_iter) { // beware M and C anre strored in row major C style!!! int n, m, i,cur; double max; @@ -45,7 +45,7 @@ int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *c std::vector<int> indI(n), indJ(m); std::vector<double> weights1(n), weights2(m); Digraph di(n, m); - NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, numItermax); + NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, max_iter); // Set supply and demand, don't account for 0 values (faster) diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 5143a70..7bef648 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -14,8 +14,7 @@ from ..utils import parmap import multiprocessing - -def emd(a, b, M, numItermax=10000): +def emd(a, b, M, max_iter=100000): """Solves the Earth Movers distance problem and returns the OT matrix @@ -40,8 +39,9 @@ def emd(a, b, M, numItermax=10000): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - numItermax : int - Maximum number of iterations made by the LP solver. + max_iter : int, optional (default=100000) + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. Returns ------- @@ -54,7 +54,7 @@ def emd(a, b, M, numItermax=10000): Simple example with obvious solution. The function emd accepts lists and perform automatic conversion to numpy arrays - + >>> import ot >>> a=[.5,.5] >>> b=[.5,.5] @@ -86,10 +86,11 @@ def emd(a, b, M, numItermax=10000): if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - return emd_c(a, b, M, numItermax) + return emd_c(a, b, M, max_iter) + -def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=10000): - """Solves the Earth Movers distance problem and returns the loss +def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000): + """Solves the Earth Movers distance problem and returns the loss .. math:: \gamma = arg\min_\gamma <\gamma,M>_F @@ -112,8 +113,9 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=10000): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - numItermax : int - Maximum number of iterations made by the LP solver. + max_iter : int, optional (default=100000) + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. Returns ------- @@ -126,15 +128,15 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=10000): Simple example with obvious solution. The function emd accepts lists and perform automatic conversion to numpy arrays - - + + >>> import ot >>> a=[.5,.5] >>> b=[.5,.5] >>> M=[[0.,1.],[1.,0.]] >>> ot.emd2(a,b,M) 0.0 - + References ---------- @@ -157,16 +159,14 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=10000): a = np.ones((M.shape[0], ), dtype=np.float64)/M.shape[0] if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - - if len(b.shape)==1: - return emd2_c(a, b, M, numItermax) + + if len(b.shape) == 1: + return emd2_c(a, b, M, max_iter) else: - nb=b.shape[1] - #res=[emd2_c(a,b[:,i].copy(),M, numItermax) for i in range(nb)] + nb = b.shape[1] + # res = [emd2_c(a, b[:, i].copy(), M, max_iter) for i in range(nb)] + def f(b): - return emd2_c(a,b,M, numItermax) - res= parmap(f, [b[:,i] for i in range(nb)],processes) + return emd2_c(a, b, M, max_iter) + res = parmap(f, [b[:, i] for i in range(nb)], processes) return np.array(res) - - -
\ No newline at end of file diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 6039e1f..26d3330 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -15,56 +15,57 @@ cimport cython cdef extern from "EMD.h": - int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax) + int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int max_iter) cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED @cython.boundscheck(False) @cython.wraparound(False) -def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax): +def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int max_iter): """ Solves the Earth Movers distance problem and returns the optimal transport matrix - + gamm=emd(a,b,M) - + .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F - + \gamma = arg\min_\gamma <\gamma,M>_F + s.t. \gamma 1 = a - - \gamma^T 1= b - + + \gamma^T 1= b + \gamma\geq 0 where : - + - M is the metric cost matrix - a and b are the sample weights - + Parameters ---------- a : (ns,) ndarray, float64 - source histogram + source histogram b : (nt,) ndarray, float64 target histogram M : (ns,nt) ndarray, float64 - loss matrix - numItermax : int - Maximum number of iterations made by the LP solver. - - + loss matrix + max_iter : int + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. + + Returns ------- gamma: (ns x nt) ndarray Optimal transportation matrix for the given parameters - + """ cdef int n1= M.shape[0] cdef int n2= M.shape[1] cdef float cost=0 cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2]) - + if not len(a): a=np.ones((n1,))/n1 @@ -72,7 +73,7 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod b=np.ones((n2,))/n2 # calling the function - cdef int resultSolver = EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data,<double*> &cost, numItermax) + cdef int resultSolver = EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data,<double*> &cost, max_iter) if resultSolver != OPTIMAL: if resultSolver == INFEASIBLE: print("Problem infeasible. Try to increase numItermax.") @@ -83,49 +84,50 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod @cython.boundscheck(False) @cython.wraparound(False) -def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax): +def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int max_iter): """ Solves the Earth Movers distance problem and returns the optimal transport loss - + gamm=emd(a,b,M) - + .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F - + \gamma = arg\min_\gamma <\gamma,M>_F + s.t. \gamma 1 = a - - \gamma^T 1= b - + + \gamma^T 1= b + \gamma\geq 0 where : - + - M is the metric cost matrix - a and b are the sample weights - + Parameters ---------- a : (ns,) ndarray, float64 - source histogram + source histogram b : (nt,) ndarray, float64 target histogram M : (ns,nt) ndarray, float64 - loss matrix - numItermax : int - Maximum number of iterations made by the LP solver. - - + loss matrix + max_iter : int + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. + + Returns ------- gamma: (ns x nt) ndarray Optimal transportation matrix for the given parameters - + """ cdef int n1= M.shape[0] cdef int n2= M.shape[1] cdef float cost=0 cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2]) - + if not len(a): a=np.ones((n1,))/n1 @@ -133,13 +135,13 @@ def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mo b=np.ones((n2,))/n2 # calling the function - cdef int resultSolver = EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data,<double*> &cost, numItermax) + cdef int resultSolver = EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data,<double*> &cost, max_iter) if resultSolver != OPTIMAL: if resultSolver == INFEASIBLE: print("Problem infeasible. Try to inscrease numItermax.") elif resultSolver == UNBOUNDED: print("Problem unbounded") - + cost=0 for i in range(n1): for j in range(n2): |