From 6ae3ad7bb48b1fa8964cfd2791bdb86267776495 Mon Sep 17 00:00:00 2001 From: aje Date: Tue, 29 Aug 2017 15:38:11 +0200 Subject: Changes to LP solver: - Allow to modify the maximal number of iterations - Display an error message in the python console if the solver encountered an issue --- ot/da.py | 4 ++-- ot/lp/EMD.h | 7 ++++++- ot/lp/EMD_wrapper.cpp | 7 +++---- ot/lp/__init__.py | 16 ++++++++++------ ot/lp/emd_wrap.pyx | 25 ++++++++++++++++++++----- 5 files changed, 41 insertions(+), 18 deletions(-) (limited to 'ot') diff --git a/ot/da.py b/ot/da.py index 78dc150..0dfd02f 100644 --- a/ot/da.py +++ b/ot/da.py @@ -658,7 +658,7 @@ class OTDA(object): self.metric = metric self.computed = False - def fit(self, xs, xt, ws=None, wt=None, norm=None): + def fit(self, xs, xt, ws=None, wt=None, norm=None, numItermax=10000): """Fit domain adaptation between samples is xs and xt (with optional weights)""" self.xs = xs @@ -674,7 +674,7 @@ class OTDA(object): self.M = dist(xs, xt, metric=self.metric) self.normalizeM(norm) - self.G = emd(ws, wt, self.M) + self.G = emd(ws, wt, self.M, numItermax) self.computed = True def interp(self, direction=1): diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h index 40d7192..956830c 100644 --- a/ot/lp/EMD.h +++ b/ot/lp/EMD.h @@ -23,7 +23,12 @@ using namespace lemon; typedef unsigned int node_id_type; +enum ProblemType { + INFEASIBLE, + OPTIMAL, + UNBOUNDED +}; -void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost); +int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax); #endif diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index cad4750..83ed56c 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -15,11 +15,10 @@ #include "EMD.h" -void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost) { +int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax) { // beware M and C anre strored in row major C style!!! int n, m, i,cur; double max; - int max_iter=10000; typedef FullBipartiteDigraph Digraph; DIGRAPH_TYPEDEFS(FullBipartiteDigraph); @@ -46,7 +45,7 @@ void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double * std::vector indI(n), indJ(m); std::vector weights1(n), weights2(m); Digraph di(n, m); - NetworkSimplexSimple net(di, true, n+m, n*m,max_iter); + NetworkSimplexSimple net(di, true, n+m, n*m, numItermax); // Set supply and demand, don't account for 0 values (faster) @@ -116,5 +115,5 @@ void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double * }; - + return ret; } diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 6e0bdb8..62afe76 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -15,7 +15,7 @@ import multiprocessing -def emd(a, b, M): +def emd(a, b, M, numItermax=10000): """Solves the Earth Movers distance problem and returns the OT matrix @@ -40,6 +40,8 @@ def emd(a, b, M): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix + numItermax : int + Maximum number of iterations made by the LP solver. Returns ------- @@ -84,9 +86,9 @@ def emd(a, b, M): if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - return emd_c(a, b, M) + return emd_c(a, b, M, numItermax) -def emd2(a, b, M,processes=multiprocessing.cpu_count()): +def emd2(a, b, M, numItermax=10000, processes=multiprocessing.cpu_count()): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -110,6 +112,8 @@ def emd2(a, b, M,processes=multiprocessing.cpu_count()): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix + numItermax : int + Maximum number of iterations made by the LP solver. Returns ------- @@ -155,12 +159,12 @@ def emd2(a, b, M,processes=multiprocessing.cpu_count()): b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] if len(b.shape)==1: - return emd2_c(a, b, M) + return emd2_c(a, b, M, numItermax) else: nb=b.shape[1] - #res=[emd2_c(a,b[:,i].copy(),M) for i in range(nb)] + #res=[emd2_c(a,b[:,i].copy(),M, numItermax) for i in range(nb)] def f(b): - return emd2_c(a,b,M) + return emd2_c(a,b,M, numItermax) res= parmap(f, [b[:,i] for i in range(nb)],processes) return np.array(res) diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 46c96c1..ed8c416 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -15,13 +15,14 @@ cimport cython cdef extern from "EMD.h": - void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost) + int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax) + cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED @cython.boundscheck(False) @cython.wraparound(False) -def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M): +def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax): """ Solves the Earth Movers distance problem and returns the optimal transport matrix @@ -48,6 +49,8 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod target histogram M : (ns,nt) ndarray, float64 loss matrix + numItermax : int + Maximum number of iterations made by the LP solver. Returns @@ -69,13 +72,18 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod b=np.ones((n2,))/n2 # calling the function - EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost) + cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost, numItermax) + if resultSolver != OPTIMAL: + if resultSolver == INFEASIBLE: + print("Problem infeasible. Try to inscrease numItermax.") + elif resultSolver == UNBOUNDED: + print("Problem unbounded") return G @cython.boundscheck(False) @cython.wraparound(False) -def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M): +def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax): """ Solves the Earth Movers distance problem and returns the optimal transport loss @@ -102,6 +110,8 @@ def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mo target histogram M : (ns,nt) ndarray, float64 loss matrix + numItermax : int + Maximum number of iterations made by the LP solver. Returns @@ -123,7 +133,12 @@ def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mo b=np.ones((n2,))/n2 # calling the function - EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost) + cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost, numItermax) + if resultSolver != OPTIMAL: + if resultSolver == INFEASIBLE: + print("Problem infeasible. Try to inscrease numItermax.") + elif resultSolver == UNBOUNDED: + print("Problem unbounded") cost=0 for i in range(n1): -- cgit v1.2.3