summaryrefslogtreecommitdiff
path: root/ot/lp
diff options
context:
space:
mode:
authoraje <leo_g_autheron@hotmail.fr>2017-08-30 09:56:37 +0200
committeraje <leo_g_autheron@hotmail.fr>2017-08-30 09:56:37 +0200
commit982f36cb0a5f3a6a14454238a26053de7251b0f0 (patch)
treee7ad62954515430a418b9825829f7d563bf469ff /ot/lp
parent308ce24b705dfad9d058138d058da8b18002e081 (diff)
Changes:
- Rename numItermax to max_iter - Default value to 100000 instead of 10000 - Add max_iter to class SinkhornTransport(BaseTransport) - Add norm to all BaseTransport
Diffstat (limited to 'ot/lp')
-rw-r--r--ot/lp/EMD.h2
-rw-r--r--ot/lp/EMD_wrapper.cpp4
-rw-r--r--ot/lp/__init__.py46
-rw-r--r--ot/lp/emd_wrap.pyx82
4 files changed, 68 insertions, 66 deletions
diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h
index 956830c..aa92441 100644
--- a/ot/lp/EMD.h
+++ b/ot/lp/EMD.h
@@ -29,6 +29,6 @@ enum ProblemType {
UNBOUNDED
};
-int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax);
+int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int max_iter);
#endif
diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp
index 83ed56c..c8c2eb3 100644
--- a/ot/lp/EMD_wrapper.cpp
+++ b/ot/lp/EMD_wrapper.cpp
@@ -15,7 +15,7 @@
#include "EMD.h"
-int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax) {
+int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int max_iter) {
// beware M and C anre strored in row major C style!!!
int n, m, i,cur;
double max;
@@ -45,7 +45,7 @@ int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *c
std::vector<int> indI(n), indJ(m);
std::vector<double> weights1(n), weights2(m);
Digraph di(n, m);
- NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, numItermax);
+ NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, max_iter);
// Set supply and demand, don't account for 0 values (faster)
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 5143a70..7bef648 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -14,8 +14,7 @@ from ..utils import parmap
import multiprocessing
-
-def emd(a, b, M, numItermax=10000):
+def emd(a, b, M, max_iter=100000):
"""Solves the Earth Movers distance problem and returns the OT matrix
@@ -40,8 +39,9 @@ def emd(a, b, M, numItermax=10000):
Target histogram (uniform weigth if empty list)
M : (ns,nt) ndarray, float64
loss matrix
- numItermax : int
- Maximum number of iterations made by the LP solver.
+ max_iter : int, optional (default=100000)
+ The maximum number of iterations before stopping the optimization
+ algorithm if it has not converged.
Returns
-------
@@ -54,7 +54,7 @@ def emd(a, b, M, numItermax=10000):
Simple example with obvious solution. The function emd accepts lists and
perform automatic conversion to numpy arrays
-
+
>>> import ot
>>> a=[.5,.5]
>>> b=[.5,.5]
@@ -86,10 +86,11 @@ def emd(a, b, M, numItermax=10000):
if len(b) == 0:
b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1]
- return emd_c(a, b, M, numItermax)
+ return emd_c(a, b, M, max_iter)
+
-def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=10000):
- """Solves the Earth Movers distance problem and returns the loss
+def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000):
+ """Solves the Earth Movers distance problem and returns the loss
.. math::
\gamma = arg\min_\gamma <\gamma,M>_F
@@ -112,8 +113,9 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=10000):
Target histogram (uniform weigth if empty list)
M : (ns,nt) ndarray, float64
loss matrix
- numItermax : int
- Maximum number of iterations made by the LP solver.
+ max_iter : int, optional (default=100000)
+ The maximum number of iterations before stopping the optimization
+ algorithm if it has not converged.
Returns
-------
@@ -126,15 +128,15 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=10000):
Simple example with obvious solution. The function emd accepts lists and
perform automatic conversion to numpy arrays
-
-
+
+
>>> import ot
>>> a=[.5,.5]
>>> b=[.5,.5]
>>> M=[[0.,1.],[1.,0.]]
>>> ot.emd2(a,b,M)
0.0
-
+
References
----------
@@ -157,16 +159,14 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=10000):
a = np.ones((M.shape[0], ), dtype=np.float64)/M.shape[0]
if len(b) == 0:
b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1]
-
- if len(b.shape)==1:
- return emd2_c(a, b, M, numItermax)
+
+ if len(b.shape) == 1:
+ return emd2_c(a, b, M, max_iter)
else:
- nb=b.shape[1]
- #res=[emd2_c(a,b[:,i].copy(),M, numItermax) for i in range(nb)]
+ nb = b.shape[1]
+ # res = [emd2_c(a, b[:, i].copy(), M, max_iter) for i in range(nb)]
+
def f(b):
- return emd2_c(a,b,M, numItermax)
- res= parmap(f, [b[:,i] for i in range(nb)],processes)
+ return emd2_c(a, b, M, max_iter)
+ res = parmap(f, [b[:, i] for i in range(nb)], processes)
return np.array(res)
-
-
- \ No newline at end of file
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index 6039e1f..26d3330 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -15,56 +15,57 @@ cimport cython
cdef extern from "EMD.h":
- int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax)
+ int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int max_iter)
cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED
@cython.boundscheck(False)
@cython.wraparound(False)
-def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax):
+def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int max_iter):
"""
Solves the Earth Movers distance problem and returns the optimal transport matrix
-
+
gamm=emd(a,b,M)
-
+
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F
-
+ \gamma = arg\min_\gamma <\gamma,M>_F
+
s.t. \gamma 1 = a
-
- \gamma^T 1= b
-
+
+ \gamma^T 1= b
+
\gamma\geq 0
where :
-
+
- M is the metric cost matrix
- a and b are the sample weights
-
+
Parameters
----------
a : (ns,) ndarray, float64
- source histogram
+ source histogram
b : (nt,) ndarray, float64
target histogram
M : (ns,nt) ndarray, float64
- loss matrix
- numItermax : int
- Maximum number of iterations made by the LP solver.
-
-
+ loss matrix
+ max_iter : int
+ The maximum number of iterations before stopping the optimization
+ algorithm if it has not converged.
+
+
Returns
-------
gamma: (ns x nt) ndarray
Optimal transportation matrix for the given parameters
-
+
"""
cdef int n1= M.shape[0]
cdef int n2= M.shape[1]
cdef float cost=0
cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2])
-
+
if not len(a):
a=np.ones((n1,))/n1
@@ -72,7 +73,7 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod
b=np.ones((n2,))/n2
# calling the function
- cdef int resultSolver = EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data,<double*> &cost, numItermax)
+ cdef int resultSolver = EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data,<double*> &cost, max_iter)
if resultSolver != OPTIMAL:
if resultSolver == INFEASIBLE:
print("Problem infeasible. Try to increase numItermax.")
@@ -83,49 +84,50 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod
@cython.boundscheck(False)
@cython.wraparound(False)
-def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax):
+def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int max_iter):
"""
Solves the Earth Movers distance problem and returns the optimal transport loss
-
+
gamm=emd(a,b,M)
-
+
.. math::
- \gamma = arg\min_\gamma <\gamma,M>_F
-
+ \gamma = arg\min_\gamma <\gamma,M>_F
+
s.t. \gamma 1 = a
-
- \gamma^T 1= b
-
+
+ \gamma^T 1= b
+
\gamma\geq 0
where :
-
+
- M is the metric cost matrix
- a and b are the sample weights
-
+
Parameters
----------
a : (ns,) ndarray, float64
- source histogram
+ source histogram
b : (nt,) ndarray, float64
target histogram
M : (ns,nt) ndarray, float64
- loss matrix
- numItermax : int
- Maximum number of iterations made by the LP solver.
-
-
+ loss matrix
+ max_iter : int
+ The maximum number of iterations before stopping the optimization
+ algorithm if it has not converged.
+
+
Returns
-------
gamma: (ns x nt) ndarray
Optimal transportation matrix for the given parameters
-
+
"""
cdef int n1= M.shape[0]
cdef int n2= M.shape[1]
cdef float cost=0
cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2])
-
+
if not len(a):
a=np.ones((n1,))/n1
@@ -133,13 +135,13 @@ def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mo
b=np.ones((n2,))/n2
# calling the function
- cdef int resultSolver = EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data,<double*> &cost, numItermax)
+ cdef int resultSolver = EMD_wrap(n1,n2,<double*> a.data,<double*> b.data,<double*> M.data,<double*> G.data,<double*> &cost, max_iter)
if resultSolver != OPTIMAL:
if resultSolver == INFEASIBLE:
print("Problem infeasible. Try to inscrease numItermax.")
elif resultSolver == UNBOUNDED:
print("Problem unbounded")
-
+
cost=0
for i in range(n1):
for j in range(n2):