From 55a38f8253e5831105d2c329f4d8ed77686d1330 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Thu, 13 Jul 2017 15:30:39 +0900 Subject: Added optional maximal number of iteration --- ot/lp/__init__.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'ot/lp/__init__.py') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index db3da78..673242d 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -11,7 +11,7 @@ import multiprocessing -def emd(a, b, M): +def emd(a, b, M, max_iter=-1): """Solves the Earth Movers distance problem and returns the OT matrix @@ -80,9 +80,9 @@ def emd(a, b, M): if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - return emd_c(a, b, M) + return emd_c(a, b, M, max_iter) -def emd2(a, b, M,processes=multiprocessing.cpu_count()): +def emd2(a, b, M,processes=multiprocessing.cpu_count(), max_iter=-1): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -151,12 +151,12 @@ def emd2(a, b, M,processes=multiprocessing.cpu_count()): b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] if len(b.shape)==1: - return emd2_c(a, b, M) + return emd2_c(a, b, M, max_iter) else: nb=b.shape[1] #res=[emd2_c(a,b[:,i].copy(),M) for i in range(nb)] def f(b): - return emd2_c(a,b,M) + return emd2_c(a,b,M, max_iter) res= parmap(f, [b[:,i] for i in range(nb)],processes) return np.array(res) -- cgit v1.2.3 From 88c62c39a9623e8b58ebb776a9deddc96b43b4a0 Mon Sep 17 00:00:00 2001 From: arolet Date: Fri, 21 Jul 2017 12:12:48 +0900 Subject: Added dual variables computations --- ot/lp/EMD.h | 3 ++- ot/lp/EMD_wrapper.cpp | 6 ++++-- ot/lp/__init__.py | 11 +++++++---- ot/lp/emd_wrap.pyx | 22 ++++++++++++++++------ 4 files changed, 29 insertions(+), 13 deletions(-) (limited to 'ot/lp/__init__.py') diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h index 59a5af8..fb7feca 100644 --- a/ot/lp/EMD.h +++ b/ot/lp/EMD.h @@ -24,6 +24,7 @@ using namespace lemon; typedef unsigned int node_id_type; -void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int max_iter); +void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, + double* alpha, double* beta, double *cost, int max_iter); #endif diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index cc13230..6bda6a7 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -15,8 +15,8 @@ #include "EMD.h" -void EMD_wrap(int n1, int n2, double *X, double *Y, - double *D, double *G, double *cost, int max_iter) { +void EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, + double* alpha, double* beta, double *cost, int max_iter) { // beware M and C anre strored in row major C style!!! int n, m, i,cur; @@ -99,6 +99,8 @@ void EMD_wrap(int n1, int n2, double *X, double *Y, int i = di.source(a); int j = di.target(a); *(G+indI[i]*n2+indJ[j-n]) = net.flow(a); + *(alpha + indI[i]) = net.potential(i); + *(beta + indJ[j-n]) = net.potential(j); } }; diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 673242d..915a18c 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -11,7 +11,7 @@ import multiprocessing -def emd(a, b, M, max_iter=-1): +def emd(a, b, M, dual_variables=False, max_iter=-1): """Solves the Earth Movers distance problem and returns the OT matrix @@ -80,7 +80,10 @@ def emd(a, b, M, max_iter=-1): if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - return emd_c(a, b, M, max_iter) + G, alpha, beta = emd_c(a, b, M, max_iter) + if dual_variables: + return G, alpha, beta + return G def emd2(a, b, M,processes=multiprocessing.cpu_count(), max_iter=-1): """Solves the Earth Movers distance problem and returns the loss @@ -151,12 +154,12 @@ def emd2(a, b, M,processes=multiprocessing.cpu_count(), max_iter=-1): b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] if len(b.shape)==1: - return emd2_c(a, b, M, max_iter) + return emd2_c(a, b, M, max_iter)[0] else: nb=b.shape[1] #res=[emd2_c(a,b[:,i].copy(),M) for i in range(nb)] def f(b): - return emd2_c(a,b,M, max_iter) + return emd2_c(a,b,M, max_iter)[0] res= parmap(f, [b[:,i] for i in range(nb)],processes) return np.array(res) diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index c4ba125..813596f 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -12,7 +12,8 @@ cimport cython cdef extern from "EMD.h": - void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int max_iter) + void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, + double* alpha, double* beta, int max_iter) @@ -58,6 +59,8 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod cdef float cost=0 cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2]) + cdef np.ndarray[double, ndim=1, mode="c"] alpha=np.zeros(n1) + cdef np.ndarray[double, ndim=1, mode="c"] beta=np.zeros(n2) if not len(a): a=np.ones((n1,))/n1 @@ -65,10 +68,13 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod if not len(b): b=np.ones((n2,))/n2 + print alpha.size + print beta.size # calling the function - EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost, maxiter) + EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, + alpha.data, beta.data, &cost, maxiter) - return G + return G, alpha, beta @cython.boundscheck(False) @cython.wraparound(False) @@ -112,15 +118,19 @@ def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mo cdef float cost=0 cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2]) + + cdef np.ndarray[double, ndim = 1, mode = "c"] alpha = np.zeros([n1]) + cdef np.ndarray[double, ndim = 1, mode = "c"] beta = np.zeros([n2]) if not len(a): a=np.ones((n1,))/n1 if not len(b): b=np.ones((n2,))/n2 - # calling the function - EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost, maxiter) + EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, + alpha.data, beta.data, &cost, maxiter) + - return cost + return cost, alpha, beta -- cgit v1.2.3 From d52b4ea415d9bb669be04ccd0940f9b3d258d0e1 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Tue, 5 Sep 2017 17:15:45 +0900 Subject: Fixed typo and merged emd tests --- ot/lp/__init__.py | 2 +- test/test_emd.py | 68 ------------------------------------------------------- test/test_ot.py | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 64 insertions(+), 72 deletions(-) delete mode 100644 test/test_emd.py (limited to 'ot/lp/__init__.py') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index a14d4e4..6048f60 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -168,6 +168,6 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000): # res = [emd2_c(a, b[:, i].copy(), M, numItermax) for i in range(nb)] def f(b): - return emd2_c(a,b,M, max_iter)[0] + return emd2_c(a,b,M, numItermax)[0] res= parmap(f, [b[:,i] for i in range(nb)],processes) return np.array(res) diff --git a/test/test_emd.py b/test/test_emd.py deleted file mode 100644 index 0025583..0000000 --- a/test/test_emd.py +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- - -import numpy as np -import ot - -from ot.datasets import get_1D_gauss as gauss -reload(ot.lp) - -#%% parameters - -n=5000 # nb bins -m=6000 # nb bins - -mean1 = 1000 -mean2 = 1100 - -# bin positions -x=np.arange(n,dtype=np.float64) -y=np.arange(m,dtype=np.float64) - -# Gaussian distributions -a=gauss(n,m=mean1,s=5) # m= mean, s= std - -b=gauss(m,m=mean2,s=10) - -# loss matrix -M=ot.dist(x.reshape((-1,1)), y.reshape((-1,1))) ** (1./2) -#M/=M.max() - -#%% - -print('Computing {} EMD '.format(1)) - -# emd loss 1 proc -ot.tic() -G, alpha, beta = ot.emd(a,b,M, dual_variables=True) -ot.toc('1 proc : {} s') - -cost1 = (G * M).sum() -cost_dual = np.vdot(a, alpha) + np.vdot(b, beta) - -# emd loss 1 proc -ot.tic() -cost_emd2 = ot.emd2(a,b,M) -ot.toc('1 proc : {} s') - -ot.tic() -G2 = ot.emd(b, a, np.ascontiguousarray(M.T)) -ot.toc('1 proc : {} s') - -cost2 = (G2 * M.T).sum() - -M_reduced = M - alpha.reshape(-1,1) - beta.reshape(1, -1) - -# Check that both cost computations are equivalent -np.testing.assert_almost_equal(cost1, cost_emd2) -# Check that dual and primal cost are equal -np.testing.assert_almost_equal(cost1, cost_dual) -# Check symmetry -np.testing.assert_almost_equal(cost1, cost2) -# Check with closed-form solution for gaussians -np.testing.assert_almost_equal(cost1, np.abs(mean1-mean2)) - -[ind1, ind2] = np.nonzero(G) - -# Check that reduced cost is zero on transport arcs -np.testing.assert_array_almost_equal((M - alpha.reshape(-1, 1) - beta.reshape(1, -1))[ind1, ind2], np.zeros(ind1.size)) \ No newline at end of file diff --git a/test/test_ot.py b/test/test_ot.py index acd8718..ded6c9f 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -6,6 +6,8 @@ import numpy as np import ot + +from ot.datasets import get_1D_gauss as gauss def test_doctest(): @@ -66,9 +68,6 @@ def test_emd_empty(): def test_emd2_multi(): - - from ot.datasets import get_1D_gauss as gauss - n = 1000 # nb bins # bin positions @@ -100,3 +99,64 @@ def test_emd2_multi(): ot.toc('multi proc : {} s') np.testing.assert_allclose(emd1, emdn) + +def test_dual_variables(): + #%% parameters + + n=5000 # nb bins + m=6000 # nb bins + + mean1 = 1000 + mean2 = 1100 + + # bin positions + x=np.arange(n,dtype=np.float64) + y=np.arange(m,dtype=np.float64) + + # Gaussian distributions + a=gauss(n,m=mean1,s=5) # m= mean, s= std + + b=gauss(m,m=mean2,s=10) + + # loss matrix + M=ot.dist(x.reshape((-1,1)), y.reshape((-1,1))) ** (1./2) + #M/=M.max() + + #%% + + print('Computing {} EMD '.format(1)) + + # emd loss 1 proc + ot.tic() + G, alpha, beta = ot.emd(a,b,M, dual_variables=True) + ot.toc('1 proc : {} s') + + cost1 = (G * M).sum() + cost_dual = np.vdot(a, alpha) + np.vdot(b, beta) + + # emd loss 1 proc + ot.tic() + cost_emd2 = ot.emd2(a,b,M) + ot.toc('1 proc : {} s') + + ot.tic() + G2 = ot.emd(b, a, np.ascontiguousarray(M.T)) + ot.toc('1 proc : {} s') + + cost2 = (G2 * M.T).sum() + + M_reduced = M - alpha.reshape(-1,1) - beta.reshape(1, -1) + + # Check that both cost computations are equivalent + np.testing.assert_almost_equal(cost1, cost_emd2) + # Check that dual and primal cost are equal + np.testing.assert_almost_equal(cost1, cost_dual) + # Check symmetry + np.testing.assert_almost_equal(cost1, cost2) + # Check with closed-form solution for gaussians + np.testing.assert_almost_equal(cost1, np.abs(mean1-mean2)) + + [ind1, ind2] = np.nonzero(G) + + # Check that reduced cost is zero on transport arcs + np.testing.assert_array_almost_equal((M - alpha.reshape(-1, 1) - beta.reshape(1, -1))[ind1, ind2], np.zeros(ind1.size)) -- cgit v1.2.3 From 12d9b3ff72e9669ccc0162e82b7a33beb51d3e25 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Thu, 7 Sep 2017 13:50:41 +0900 Subject: Return dual variables in an optional dictionary Also removed some code duplication --- ot/lp/__init__.py | 24 +++++++++++++------ ot/lp/emd_wrap.pyx | 69 +----------------------------------------------------- test/test_ot.py | 20 ++++++---------- 3 files changed, 25 insertions(+), 88 deletions(-) (limited to 'ot/lp/__init__.py') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 6048f60..c15e6b9 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -9,12 +9,12 @@ Solvers for the original linear program OT problem import numpy as np # import compiled emd -from .emd_wrap import emd_c, emd2_c +from .emd_wrap import emd_c from ..utils import parmap import multiprocessing -def emd(a, b, M, numItermax=100000, dual_variables=False): +def emd(a, b, M, numItermax=100000, log=False): """Solves the Earth Movers distance problem and returns the OT matrix @@ -42,11 +42,17 @@ def emd(a, b, M, numItermax=100000, dual_variables=False): numItermax : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. + log: boolean, optional (default=False) + If True, returns a dictionary containing the cost and dual + variables. Otherwise returns only the optimal transportation matrix. Returns ------- gamma: (ns x nt) ndarray Optimal transportation matrix for the given parameters + log: dict + If input log is true, a dictionary containing the cost and dual + variables Examples @@ -86,9 +92,13 @@ def emd(a, b, M, numItermax=100000, dual_variables=False): if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - G, alpha, beta = emd_c(a, b, M, numItermax) - if dual_variables: - return G, alpha, beta + G, cost, u, v = emd_c(a, b, M, numItermax) + if log: + log = {} + log['cost'] = cost + log['u'] = u + log['v'] = v + return G, log return G def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000): @@ -163,11 +173,11 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000): b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] if len(b.shape)==1: - return emd2_c(a, b, M, numItermax)[0] + return emd_c(a, b, M, numItermax)[1] nb = b.shape[1] # res = [emd2_c(a, b[:, i].copy(), M, numItermax) for i in range(nb)] def f(b): - return emd2_c(a,b,M, numItermax)[0] + return emd_c(a,b,M, numItermax)[1] res= parmap(f, [b[:,i] for i in range(nb)],processes) return np.array(res) diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 9bea154..5618dfc 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -86,71 +86,4 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod elif resultSolver == MAX_ITER_REACHED: warnings.warn("numItermax reached before optimality. Try to increase numItermax.") - return G, alpha, beta - -@cython.boundscheck(False) -@cython.wraparound(False) -def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax): - """ - Solves the Earth Movers distance problem and returns the optimal transport loss - - gamm=emd(a,b,M) - - .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F - - s.t. \gamma 1 = a - - \gamma^T 1= b - - \gamma\geq 0 - where : - - - M is the metric cost matrix - - a and b are the sample weights - - Parameters - ---------- - a : (ns,) ndarray, float64 - source histogram - b : (nt,) ndarray, float64 - target histogram - M : (ns,nt) ndarray, float64 - loss matrix - numItermax : int - The maximum number of iterations before stopping the optimization - algorithm if it has not converged. - - - Returns - ------- - gamma: (ns x nt) ndarray - Optimal transportation matrix for the given parameters - - """ - cdef int n1= M.shape[0] - cdef int n2= M.shape[1] - - cdef double cost=0 - cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2]) - - cdef np.ndarray[double, ndim = 1, mode = "c"] alpha = np.zeros([n1]) - cdef np.ndarray[double, ndim = 1, mode = "c"] beta = np.zeros([n2]) - - if not len(a): - a=np.ones((n1,))/n1 - - if not len(b): - b=np.ones((n2,))/n2 - # calling the function - cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, numItermax) - if resultSolver != OPTIMAL: - if resultSolver == INFEASIBLE: - warnings.warn("Problem infeasible. Check that a and b are in the simplex") - elif resultSolver == UNBOUNDED: - warnings.warn("Problem unbounded") - elif resultSolver == MAX_ITER_REACHED: - warnings.warn("numItermax reached before optimality. Try to increase numItermax.") - - return cost, alpha, beta - + return G, cost, alpha, beta diff --git a/test/test_ot.py b/test/test_ot.py index 8a19cf6..78f64ab 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -124,27 +124,26 @@ def test_warnings(): # %% print('Computing {} EMD '.format(1)) - G, alpha, beta = ot.emd(a, b, M, dual_variables=True) with warnings.catch_warnings(record=True) as w: # Cause all warnings to always be triggered. warnings.simplefilter("always") # Trigger a warning. print('Computing {} EMD '.format(1)) - G, alpha, beta = ot.emd(a, b, M, dual_variables=True, numItermax=1) + G = ot.emd(a, b, M, numItermax=1) # Verify some things assert "numItermax" in str(w[-1].message) assert len(w) == 1 # Trigger a warning. a[0]=100 print('Computing {} EMD '.format(2)) - G, alpha, beta = ot.emd(a, b, M, dual_variables=True) + G = ot.emd(a, b, M) # Verify some things assert "infeasible" in str(w[-1].message) assert len(w) == 2 # Trigger a warning. a[0]=-1 print('Computing {} EMD '.format(2)) - G, alpha, beta = ot.emd(a, b, M, dual_variables=True) + G = ot.emd(a, b, M) # Verify some things assert "infeasible" in str(w[-1].message) assert len(w) == 3 @@ -176,16 +175,11 @@ def test_dual_variables(): # emd loss 1 proc ot.tic() - G, alpha, beta = ot.emd(a, b, M, dual_variables=True) + G, log = ot.emd(a, b, M, log=True) ot.toc('1 proc : {} s') cost1 = (G * M).sum() - cost_dual = np.vdot(a, alpha) + np.vdot(b, beta) - - # emd loss 1 proc - ot.tic() - cost_emd2 = ot.emd2(a, b, M) - ot.toc('1 proc : {} s') + cost_dual = np.vdot(a, log['u']) + np.vdot(b, log['v']) ot.tic() G2 = ot.emd(b, a, np.ascontiguousarray(M.T)) @@ -194,7 +188,7 @@ def test_dual_variables(): cost2 = (G2 * M.T).sum() # Check that both cost computations are equivalent - np.testing.assert_almost_equal(cost1, cost_emd2) + np.testing.assert_almost_equal(cost1, log['cost']) # Check that dual and primal cost are equal np.testing.assert_almost_equal(cost1, cost_dual) # Check symmetry @@ -205,5 +199,5 @@ def test_dual_variables(): [ind1, ind2] = np.nonzero(G) # Check that reduced cost is zero on transport arcs - np.testing.assert_array_almost_equal((M - alpha.reshape(-1, 1) - beta.reshape(1, -1))[ind1, ind2], + np.testing.assert_array_almost_equal((M - log['u'].reshape(-1, 1) - log['v'].reshape(1, -1))[ind1, ind2], np.zeros(ind1.size)) -- cgit v1.2.3 From ab65f86304b03a967054eeeaf73b8c8277618d65 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Thu, 7 Sep 2017 14:35:35 +0900 Subject: Added log option to muliprocess emd --- ot/lp/__init__.py | 39 ++++++++++++++++++++++++------------- test/test_ot.py | 57 ++++++++++++++++++++++++++++++------------------------- 2 files changed, 57 insertions(+), 39 deletions(-) (limited to 'ot/lp/__init__.py') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index c15e6b9..8edd8ec 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -7,11 +7,13 @@ Solvers for the original linear program OT problem # # License: MIT License +import multiprocessing + import numpy as np + # import compiled emd from .emd_wrap import emd_c from ..utils import parmap -import multiprocessing def emd(a, b, M, numItermax=100000, log=False): @@ -88,9 +90,9 @@ def emd(a, b, M, numItermax=100000, log=False): # if empty array given then use unifor distributions if len(a) == 0: - a = np.ones((M.shape[0], ), dtype=np.float64)/M.shape[0] + a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0] if len(b) == 0: - b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] + b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] G, cost, u, v = emd_c(a, b, M, numItermax) if log: @@ -101,7 +103,8 @@ def emd(a, b, M, numItermax=100000, log=False): return G, log return G -def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000): + +def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log=False): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -168,16 +171,26 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000): # if empty array given then use unifor distributions if len(a) == 0: - a = np.ones((M.shape[0], ), dtype=np.float64)/M.shape[0] + a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0] if len(b) == 0: - b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - - if len(b.shape)==1: - return emd_c(a, b, M, numItermax)[1] + b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] + + if log: + def f(b): + G, cost, u, v = emd_c(a, b, M, numItermax) + log = {} + log['G'] = G + log['u'] = u + log['v'] = v + return [cost, log] + else: + def f(b): + return emd_c(a, b, M, numItermax)[1] + + if len(b.shape) == 1: + return f(b) nb = b.shape[1] # res = [emd2_c(a, b[:, i].copy(), M, numItermax) for i in range(nb)] - def f(b): - return emd_c(a,b,M, numItermax)[1] - res= parmap(f, [b[:,i] for i in range(nb)],processes) - return np.array(res) + res = parmap(f, [b[:, i] for i in range(nb)], processes) + return res diff --git a/test/test_ot.py b/test/test_ot.py index 78f64ab..feadef4 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -4,11 +4,12 @@ # # License: MIT License +import warnings + import numpy as np import ot from ot.datasets import get_1D_gauss as gauss -import warnings def test_doctest(): @@ -100,6 +101,21 @@ def test_emd2_multi(): np.testing.assert_allclose(emd1, emdn) + # emd loss multipro proc with log + ot.tic() + emdn = ot.emd2(a, b, M, log=True) + ot.toc('multi proc : {} s') + + for i in range(len(emdn)): + emd = emdn[i] + log = emd[1] + cost = emd[0] + check_duality_gap(a, b[:, i], M, log['G'], log['u'], log['v'], cost) + emdn[i] = cost + + emdn = np.array(emdn) + np.testing.assert_allclose(emd1, emdn) + def test_warnings(): n = 100 # nb bins @@ -119,32 +135,22 @@ def test_warnings(): # loss matrix M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2) - # M/=M.max() - - # %% print('Computing {} EMD '.format(1)) with warnings.catch_warnings(record=True) as w: - # Cause all warnings to always be triggered. warnings.simplefilter("always") - # Trigger a warning. print('Computing {} EMD '.format(1)) G = ot.emd(a, b, M, numItermax=1) - # Verify some things assert "numItermax" in str(w[-1].message) assert len(w) == 1 - # Trigger a warning. - a[0]=100 + a[0] = 100 print('Computing {} EMD '.format(2)) G = ot.emd(a, b, M) - # Verify some things assert "infeasible" in str(w[-1].message) assert len(w) == 2 - # Trigger a warning. - a[0]=-1 + a[0] = -1 print('Computing {} EMD '.format(2)) G = ot.emd(a, b, M) - # Verify some things assert "infeasible" in str(w[-1].message) assert len(w) == 3 @@ -167,9 +173,6 @@ def test_dual_variables(): # loss matrix M = ot.dist(x.reshape((-1, 1)), y.reshape((-1, 1))) ** (1. / 2) - # M/=M.max() - - # %% print('Computing {} EMD '.format(1)) @@ -178,26 +181,28 @@ def test_dual_variables(): G, log = ot.emd(a, b, M, log=True) ot.toc('1 proc : {} s') - cost1 = (G * M).sum() - cost_dual = np.vdot(a, log['u']) + np.vdot(b, log['v']) - ot.tic() G2 = ot.emd(b, a, np.ascontiguousarray(M.T)) ot.toc('1 proc : {} s') - cost2 = (G2 * M.T).sum() + cost1 = (G * M).sum() + # Check symmetry + np.testing.assert_array_almost_equal(cost1, (M * G2.T).sum()) + # Check with closed-form solution for gaussians + np.testing.assert_almost_equal(cost1, np.abs(mean1 - mean2)) # Check that both cost computations are equivalent np.testing.assert_almost_equal(cost1, log['cost']) + check_duality_gap(a, b, M, G, log['u'], log['v'], log['cost']) + + +def check_duality_gap(a, b, M, G, u, v, cost): + cost_dual = np.vdot(a, u) + np.vdot(b, v) # Check that dual and primal cost are equal - np.testing.assert_almost_equal(cost1, cost_dual) - # Check symmetry - np.testing.assert_almost_equal(cost1, cost2) - # Check with closed-form solution for gaussians - np.testing.assert_almost_equal(cost1, np.abs(mean1 - mean2)) + np.testing.assert_almost_equal(cost_dual, cost) [ind1, ind2] = np.nonzero(G) # Check that reduced cost is zero on transport arcs - np.testing.assert_array_almost_equal((M - log['u'].reshape(-1, 1) - log['v'].reshape(1, -1))[ind1, ind2], + np.testing.assert_array_almost_equal((M - u.reshape(-1, 1) - v.reshape(1, -1))[ind1, ind2], np.zeros(ind1.size)) -- cgit v1.2.3 From e58cd780ccf87736265e4e1a39afa3a167325ccc Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Sat, 9 Sep 2017 12:37:56 +0900 Subject: Added convergence status to the log --- ot/lp/__init__.py | 16 ++++++++++++---- ot/lp/emd_wrap.pyx | 28 +++++++++++++++++----------- 2 files changed, 29 insertions(+), 15 deletions(-) (limited to 'ot/lp/__init__.py') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 8edd8ec..0f40c19 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -12,7 +12,7 @@ import multiprocessing import numpy as np # import compiled emd -from .emd_wrap import emd_c +from .emd_wrap import emd_c, checkResult from ..utils import parmap @@ -94,12 +94,15 @@ def emd(a, b, M, numItermax=100000, log=False): if len(b) == 0: b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] - G, cost, u, v = emd_c(a, b, M, numItermax) + G, cost, u, v, resultCode = emd_c(a, b, M, numItermax) + resultCodeString = checkResult(resultCode) if log: log = {} log['cost'] = cost log['u'] = u log['v'] = v + log['warning'] = resultCodeString + log['resultCode'] = resultCode return G, log return G @@ -177,15 +180,20 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log= if log: def f(b): - G, cost, u, v = emd_c(a, b, M, numItermax) + G, cost, u, v, resultCode = emd_c(a, b, M, numItermax) + resultCodeString = checkResult(resultCode) log = {} log['G'] = G log['u'] = u log['v'] = v + log['warning'] = resultCodeString + log['resultCode'] = resultCode return [cost, log] else: def f(b): - return emd_c(a, b, M, numItermax)[1] + G, cost, u, v, resultCode = emd_c(a, b, M, numItermax) + checkResult(resultCode) + return cost if len(b.shape) == 1: return f(b) diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 5618dfc..19bcdd8 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -7,12 +7,12 @@ Cython linker with C solver # # License: MIT License -import warnings import numpy as np cimport numpy as np cimport cython +import warnings cdef extern from "EMD.h": @@ -20,6 +20,19 @@ cdef extern from "EMD.h": cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED +def checkResult(resultCode): + if resultCode == OPTIMAL: + return None + + if resultCode == INFEASIBLE: + message = "Problem infeasible. Check that a and b are in the simplex" + elif resultCode == UNBOUNDED: + message = "Problem unbounded" + elif resultCode == MAX_ITER_REACHED: + message = "numItermax reached before optimality. Try to increase numItermax." + warnings.warn(message) + return message + @cython.boundscheck(False) @cython.wraparound(False) @@ -77,13 +90,6 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod b=np.ones((n2,))/n2 # calling the function - cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, numItermax) - if resultSolver != OPTIMAL: - if resultSolver == INFEASIBLE: - warnings.warn("Problem infeasible. Check that a and b are in the simplex") - elif resultSolver == UNBOUNDED: - warnings.warn("Problem unbounded") - elif resultSolver == MAX_ITER_REACHED: - warnings.warn("numItermax reached before optimality. Try to increase numItermax.") - - return G, cost, alpha, beta + cdef int resultCode = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, numItermax) + + return G, cost, alpha, beta, resultCode -- cgit v1.2.3 From 85c56d96f609c4ad458f0963a068386cc910c66c Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Sat, 9 Sep 2017 17:28:38 +0900 Subject: Renamed variables --- ot/da.py | 2 +- ot/lp/__init__.py | 31 +++++++++++++++++-------------- ot/lp/emd_wrap.pyx | 18 +++++++++--------- test/test_ot.py | 2 +- 4 files changed, 28 insertions(+), 25 deletions(-) (limited to 'ot/lp/__init__.py') diff --git a/ot/da.py b/ot/da.py index 1d3d0ba..eb70305 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1370,7 +1370,7 @@ class EMDTransport(BaseTransport): # coupling estimation self.coupling_ = emd( - a=self.mu_s, b=self.mu_t, M=self.cost_, numItermax=self.max_iter + a=self.mu_s, b=self.mu_t, M=self.cost_, num_iter_max=self.max_iter ) return self diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 0f40c19..ab7cb97 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -12,11 +12,11 @@ import multiprocessing import numpy as np # import compiled emd -from .emd_wrap import emd_c, checkResult +from .emd_wrap import emd_c, check_result from ..utils import parmap -def emd(a, b, M, numItermax=100000, log=False): +def emd(a, b, M, num_iter_max=100000, log=False): """Solves the Earth Movers distance problem and returns the OT matrix @@ -41,7 +41,7 @@ def emd(a, b, M, numItermax=100000, log=False): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - numItermax : int, optional (default=100000) + num_iter_max : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. log: boolean, optional (default=False) @@ -54,7 +54,7 @@ def emd(a, b, M, numItermax=100000, log=False): Optimal transportation matrix for the given parameters log: dict If input log is true, a dictionary containing the cost and dual - variables + variables and exit status Examples @@ -94,20 +94,20 @@ def emd(a, b, M, numItermax=100000, log=False): if len(b) == 0: b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] - G, cost, u, v, resultCode = emd_c(a, b, M, numItermax) - resultCodeString = checkResult(resultCode) + G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) + resultCodeString = check_result(result_code) if log: log = {} log['cost'] = cost log['u'] = u log['v'] = v log['warning'] = resultCodeString - log['resultCode'] = resultCode + log['result_code'] = result_code return G, log return G -def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log=False): +def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, log=False): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -131,7 +131,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log= Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - numItermax : int, optional (default=100000) + num_iter_max : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. @@ -139,6 +139,9 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log= ------- gamma: (ns x nt) ndarray Optimal transportation matrix for the given parameters + log: dict + If input log is true, a dictionary containing the cost and dual + variables and exit status Examples @@ -180,19 +183,19 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log= if log: def f(b): - G, cost, u, v, resultCode = emd_c(a, b, M, numItermax) - resultCodeString = checkResult(resultCode) + G, cost, u, v, resultCode = emd_c(a, b, M, num_iter_max) + resultCodeString = check_result(resultCode) log = {} log['G'] = G log['u'] = u log['v'] = v log['warning'] = resultCodeString - log['resultCode'] = resultCode + log['result_code'] = resultCode return [cost, log] else: def f(b): - G, cost, u, v, resultCode = emd_c(a, b, M, numItermax) - checkResult(resultCode) + G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) + check_result(result_code) return cost if len(b.shape) == 1: diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 19bcdd8..7ebdd2a 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -20,15 +20,15 @@ cdef extern from "EMD.h": cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED -def checkResult(resultCode): - if resultCode == OPTIMAL: +def check_result(result_code): + if result_code == OPTIMAL: return None - if resultCode == INFEASIBLE: + if result_code == INFEASIBLE: message = "Problem infeasible. Check that a and b are in the simplex" - elif resultCode == UNBOUNDED: + elif result_code == UNBOUNDED: message = "Problem unbounded" - elif resultCode == MAX_ITER_REACHED: + elif result_code == MAX_ITER_REACHED: message = "numItermax reached before optimality. Try to increase numItermax." warnings.warn(message) return message @@ -36,7 +36,7 @@ def checkResult(resultCode): @cython.boundscheck(False) @cython.wraparound(False) -def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax): +def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int num_iter_max): """ Solves the Earth Movers distance problem and returns the optimal transport matrix @@ -63,7 +63,7 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod target histogram M : (ns,nt) ndarray, float64 loss matrix - numItermax : int + num_iter_max : int The maximum number of iterations before stopping the optimization algorithm if it has not converged. @@ -90,6 +90,6 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod b=np.ones((n2,))/n2 # calling the function - cdef int resultCode = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, numItermax) + cdef int result_code = EMD_wrap(n1, n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, num_iter_max) - return G, cost, alpha, beta, resultCode + return G, cost, alpha, beta, result_code diff --git a/test/test_ot.py b/test/test_ot.py index cf5839e..c9b5154 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -140,7 +140,7 @@ def test_warnings(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") print('Computing {} EMD '.format(1)) - ot.emd(a, b, M, numItermax=1) + ot.emd(a, b, M, num_iter_max=1) assert "numItermax" in str(w[-1].message) assert len(w) == 1 a[0] = 100 -- cgit v1.2.3 From 1ba2c837d54ce963ad63ddf8df2e47230800b747 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Sat, 9 Sep 2017 17:30:23 +0900 Subject: Renamed variables --- ot/lp/__init__.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'ot/lp/__init__.py') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index ab7cb97..1238cdb 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -95,13 +95,13 @@ def emd(a, b, M, num_iter_max=100000, log=False): b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) - resultCodeString = check_result(result_code) + result_code_string = check_result(result_code) if log: log = {} log['cost'] = cost log['u'] = u log['v'] = v - log['warning'] = resultCodeString + log['warning'] = result_code_string log['result_code'] = result_code return G, log return G @@ -184,12 +184,12 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo if log: def f(b): G, cost, u, v, resultCode = emd_c(a, b, M, num_iter_max) - resultCodeString = check_result(resultCode) + result_code_string = check_result(resultCode) log = {} log['G'] = G log['u'] = u log['v'] = v - log['warning'] = resultCodeString + log['warning'] = result_code_string log['result_code'] = resultCode return [cost, log] else: -- cgit v1.2.3 From cd8c04246b6d1f15b68d6433741e8c808fd517d8 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Sat, 9 Sep 2017 17:38:31 +0900 Subject: Renamed variable --- ot/da.py | 2 +- ot/lp/EMD.h | 2 +- ot/lp/EMD_wrapper.cpp | 4 ++-- ot/lp/__init__.py | 14 +++++++------- ot/lp/emd_wrap.pyx | 8 ++++---- test/test_ot.py | 2 +- 6 files changed, 16 insertions(+), 16 deletions(-) (limited to 'ot/lp/__init__.py') diff --git a/ot/da.py b/ot/da.py index eb70305..f3e7433 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1370,7 +1370,7 @@ class EMDTransport(BaseTransport): # coupling estimation self.coupling_ = emd( - a=self.mu_s, b=self.mu_t, M=self.cost_, num_iter_max=self.max_iter + a=self.mu_s, b=self.mu_t, M=self.cost_, max_iter=self.max_iter ) return self diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h index bb486de..f42e222 100644 --- a/ot/lp/EMD.h +++ b/ot/lp/EMD.h @@ -30,6 +30,6 @@ enum ProblemType { MAX_ITER_REACHED }; -int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int max_iter); +int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter); #endif diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index 92663dc..fc7ca63 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -16,7 +16,7 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, - double* alpha, double* beta, double *cost, int max_iter) { + double* alpha, double* beta, double *cost, int maxIter) { // beware M and C anre strored in row major C style!!! int n, m, i, cur; @@ -48,7 +48,7 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, std::vector indI(n), indJ(m); std::vector weights1(n), weights2(m); Digraph di(n, m); - NetworkSimplexSimple net(di, true, n+m, n*m, max_iter); + NetworkSimplexSimple net(di, true, n+m, n*m, maxIter); // Set supply and demand, don't account for 0 values (faster) diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 1238cdb..9a0cb1c 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -16,7 +16,7 @@ from .emd_wrap import emd_c, check_result from ..utils import parmap -def emd(a, b, M, num_iter_max=100000, log=False): +def emd(a, b, M, max_iter=100000, log=False): """Solves the Earth Movers distance problem and returns the OT matrix @@ -41,7 +41,7 @@ def emd(a, b, M, num_iter_max=100000, log=False): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - num_iter_max : int, optional (default=100000) + max_iter : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. log: boolean, optional (default=False) @@ -94,7 +94,7 @@ def emd(a, b, M, num_iter_max=100000, log=False): if len(b) == 0: b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] - G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) + G, cost, u, v, result_code = emd_c(a, b, M, max_iter) result_code_string = check_result(result_code) if log: log = {} @@ -107,7 +107,7 @@ def emd(a, b, M, num_iter_max=100000, log=False): return G -def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, log=False): +def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000, log=False): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -131,7 +131,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - num_iter_max : int, optional (default=100000) + max_iter : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. @@ -183,7 +183,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo if log: def f(b): - G, cost, u, v, resultCode = emd_c(a, b, M, num_iter_max) + G, cost, u, v, resultCode = emd_c(a, b, M, max_iter) result_code_string = check_result(resultCode) log = {} log['G'] = G @@ -194,7 +194,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo return [cost, log] else: def f(b): - G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) + G, cost, u, v, result_code = emd_c(a, b, M, max_iter) check_result(result_code) return cost diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 7ebdd2a..83ee6aa 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -16,7 +16,7 @@ import warnings cdef extern from "EMD.h": - int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int numItermax) + int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter) cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED @@ -36,7 +36,7 @@ def check_result(result_code): @cython.boundscheck(False) @cython.wraparound(False) -def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int num_iter_max): +def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter): """ Solves the Earth Movers distance problem and returns the optimal transport matrix @@ -63,7 +63,7 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod target histogram M : (ns,nt) ndarray, float64 loss matrix - num_iter_max : int + max_iter : int The maximum number of iterations before stopping the optimization algorithm if it has not converged. @@ -90,6 +90,6 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod b=np.ones((n2,))/n2 # calling the function - cdef int result_code = EMD_wrap(n1, n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, num_iter_max) + cdef int result_code = EMD_wrap(n1, n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, max_iter) return G, cost, alpha, beta, result_code diff --git a/test/test_ot.py b/test/test_ot.py index c9b5154..ca921c5 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -140,7 +140,7 @@ def test_warnings(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") print('Computing {} EMD '.format(1)) - ot.emd(a, b, M, num_iter_max=1) + ot.emd(a, b, M, max_iter=1) assert "numItermax" in str(w[-1].message) assert len(w) == 1 a[0] = 100 -- cgit v1.2.3 From 8cc04ef5ae8806c81811b2081b1880b46ca063a3 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Sat, 9 Sep 2017 18:05:12 +0900 Subject: Renamed variable in string --- ot/lp/__init__.py | 1 - ot/lp/emd_wrap.pyx | 2 +- test/test_ot.py | 2 +- 3 files changed, 2 insertions(+), 3 deletions(-) (limited to 'ot/lp/__init__.py') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 9a0cb1c..ae5b08c 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -201,7 +201,6 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000, log=Fa if len(b.shape) == 1: return f(b) nb = b.shape[1] - # res = [emd2_c(a, b[:, i].copy(), M, numItermax) for i in range(nb)] res = parmap(f, [b[:, i] for i in range(nb)], processes) return res diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 83ee6aa..2fcc0e4 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -29,7 +29,7 @@ def check_result(result_code): elif result_code == UNBOUNDED: message = "Problem unbounded" elif result_code == MAX_ITER_REACHED: - message = "numItermax reached before optimality. Try to increase numItermax." + message = "max_iter reached before optimality. Try to increase max_iter." warnings.warn(message) return message diff --git a/test/test_ot.py b/test/test_ot.py index ca921c5..46fc634 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -141,7 +141,7 @@ def test_warnings(): warnings.simplefilter("always") print('Computing {} EMD '.format(1)) ot.emd(a, b, M, max_iter=1) - assert "numItermax" in str(w[-1].message) + assert "max_iter" in str(w[-1].message) assert len(w) == 1 a[0] = 100 print('Computing {} EMD '.format(2)) -- cgit v1.2.3 From 06429e5a34790ec51eb1c921293b24c37b81b952 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Sat, 9 Sep 2017 18:23:05 +0900 Subject: Returned to old variable name to follow repo convention --- ot/da.py | 2 +- ot/lp/__init__.py | 12 ++++++------ ot/lp/emd_wrap.pyx | 2 +- test/test_ot.py | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'ot/lp/__init__.py') diff --git a/ot/da.py b/ot/da.py index f3e7433..eb70305 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1370,7 +1370,7 @@ class EMDTransport(BaseTransport): # coupling estimation self.coupling_ = emd( - a=self.mu_s, b=self.mu_t, M=self.cost_, max_iter=self.max_iter + a=self.mu_s, b=self.mu_t, M=self.cost_, num_iter_max=self.max_iter ) return self diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index ae5b08c..17f5bb4 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -16,7 +16,7 @@ from .emd_wrap import emd_c, check_result from ..utils import parmap -def emd(a, b, M, max_iter=100000, log=False): +def emd(a, b, M, num_iter_max=100000, log=False): """Solves the Earth Movers distance problem and returns the OT matrix @@ -41,7 +41,7 @@ def emd(a, b, M, max_iter=100000, log=False): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - max_iter : int, optional (default=100000) + num_iter_max : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. log: boolean, optional (default=False) @@ -94,7 +94,7 @@ def emd(a, b, M, max_iter=100000, log=False): if len(b) == 0: b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] - G, cost, u, v, result_code = emd_c(a, b, M, max_iter) + G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) result_code_string = check_result(result_code) if log: log = {} @@ -107,7 +107,7 @@ def emd(a, b, M, max_iter=100000, log=False): return G -def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000, log=False): +def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, log=False): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -183,7 +183,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000, log=Fa if log: def f(b): - G, cost, u, v, resultCode = emd_c(a, b, M, max_iter) + G, cost, u, v, resultCode = emd_c(a, b, M, num_iter_max) result_code_string = check_result(resultCode) log = {} log['G'] = G @@ -194,7 +194,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000, log=Fa return [cost, log] else: def f(b): - G, cost, u, v, result_code = emd_c(a, b, M, max_iter) + G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) check_result(result_code) return cost diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 2fcc0e4..45fc988 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -29,7 +29,7 @@ def check_result(result_code): elif result_code == UNBOUNDED: message = "Problem unbounded" elif result_code == MAX_ITER_REACHED: - message = "max_iter reached before optimality. Try to increase max_iter." + message = "num_iter_max reached before optimality. Try to increase num_iter_max." warnings.warn(message) return message diff --git a/test/test_ot.py b/test/test_ot.py index 46fc634..e05e8aa 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -140,8 +140,8 @@ def test_warnings(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") print('Computing {} EMD '.format(1)) - ot.emd(a, b, M, max_iter=1) - assert "max_iter" in str(w[-1].message) + ot.emd(a, b, M, num_iter_max=1) + assert "num_iter_max" in str(w[-1].message) assert len(w) == 1 a[0] = 100 print('Computing {} EMD '.format(2)) -- cgit v1.2.3 From 7c6169222979a7e82a83c118bc7117684258d0de Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Sat, 9 Sep 2017 18:29:32 +0900 Subject: Updated variable name in docstring --- ot/lp/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'ot/lp/__init__.py') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 17f5bb4..f2eaa2b 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -131,7 +131,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - max_iter : int, optional (default=100000) + num_iter_max : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. -- cgit v1.2.3 From dd6f8260d01ce173ef3fe0c900112f0ed5288950 Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Tue, 12 Sep 2017 19:58:46 +0900 Subject: Made the return of the matrix optional in emd2 --- ot/lp/__init__.py | 12 +++++++++--- test/test_ot.py | 6 +++--- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'ot/lp/__init__.py') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index f2eaa2b..d0f682b 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -107,7 +107,7 @@ def emd(a, b, M, num_iter_max=100000, log=False): return G -def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, log=False): +def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, log=False, return_matrix=False): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -134,6 +134,11 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo num_iter_max : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. + log: boolean, optional (default=False) + If True, returns a dictionary containing the cost and dual + variables. Otherwise returns only the optimal transportation cost. + return_matrix: boolean, optional (default=False) + If True, returns the optimal transportation matrix in the log. Returns ------- @@ -181,12 +186,13 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo if len(b) == 0: b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] - if log: + if log or return_matrix: def f(b): G, cost, u, v, resultCode = emd_c(a, b, M, num_iter_max) result_code_string = check_result(resultCode) log = {} - log['G'] = G + if return_matrix: + log['G'] = G log['u'] = u log['v'] = v log['warning'] = result_code_string diff --git a/test/test_ot.py b/test/test_ot.py index e05e8aa..ea6d9dc 100644 --- a/test/test_ot.py +++ b/test/test_ot.py @@ -103,7 +103,7 @@ def test_emd2_multi(): # emd loss multipro proc with log ot.tic() - emdn = ot.emd2(a, b, M, log=True) + emdn = ot.emd2(a, b, M, log=True, return_matrix=True) ot.toc('multi proc : {} s') for i in range(len(emdn)): @@ -140,8 +140,8 @@ def test_warnings(): with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") print('Computing {} EMD '.format(1)) - ot.emd(a, b, M, num_iter_max=1) - assert "num_iter_max" in str(w[-1].message) + ot.emd(a, b, M, numItermax=1) + assert "numItermax" in str(w[-1].message) assert len(w) == 1 a[0] = 100 print('Computing {} EMD '.format(2)) -- cgit v1.2.3 From e52b6eb41228a7f8e381cf73c06e0dffba5773be Mon Sep 17 00:00:00 2001 From: Antoine Rolet Date: Tue, 12 Sep 2017 20:00:14 +0900 Subject: Renaming --- ot/da.py | 2 +- ot/lp/__init__.py | 14 +++++++------- ot/lp/emd_wrap.pyx | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'ot/lp/__init__.py') diff --git a/ot/da.py b/ot/da.py index eb70305..1d3d0ba 100644 --- a/ot/da.py +++ b/ot/da.py @@ -1370,7 +1370,7 @@ class EMDTransport(BaseTransport): # coupling estimation self.coupling_ = emd( - a=self.mu_s, b=self.mu_t, M=self.cost_, num_iter_max=self.max_iter + a=self.mu_s, b=self.mu_t, M=self.cost_, numItermax=self.max_iter ) return self diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index d0f682b..5c09da2 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -16,7 +16,7 @@ from .emd_wrap import emd_c, check_result from ..utils import parmap -def emd(a, b, M, num_iter_max=100000, log=False): +def emd(a, b, M, numItermax=100000, log=False): """Solves the Earth Movers distance problem and returns the OT matrix @@ -41,7 +41,7 @@ def emd(a, b, M, num_iter_max=100000, log=False): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - num_iter_max : int, optional (default=100000) + numItermax : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. log: boolean, optional (default=False) @@ -94,7 +94,7 @@ def emd(a, b, M, num_iter_max=100000, log=False): if len(b) == 0: b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1] - G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) + G, cost, u, v, result_code = emd_c(a, b, M, numItermax) result_code_string = check_result(result_code) if log: log = {} @@ -107,7 +107,7 @@ def emd(a, b, M, num_iter_max=100000, log=False): return G -def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, log=False, return_matrix=False): +def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000, log=False, return_matrix=False): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -131,7 +131,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - num_iter_max : int, optional (default=100000) + numItermax : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. log: boolean, optional (default=False) @@ -188,7 +188,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo if log or return_matrix: def f(b): - G, cost, u, v, resultCode = emd_c(a, b, M, num_iter_max) + G, cost, u, v, resultCode = emd_c(a, b, M, numItermax) result_code_string = check_result(resultCode) log = {} if return_matrix: @@ -200,7 +200,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), num_iter_max=100000, lo return [cost, log] else: def f(b): - G, cost, u, v, result_code = emd_c(a, b, M, num_iter_max) + G, cost, u, v, result_code = emd_c(a, b, M, numItermax) check_result(result_code) return cost diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 45fc988..83ee6aa 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -29,7 +29,7 @@ def check_result(result_code): elif result_code == UNBOUNDED: message = "Problem unbounded" elif result_code == MAX_ITER_REACHED: - message = "num_iter_max reached before optimality. Try to increase num_iter_max." + message = "numItermax reached before optimality. Try to increase numItermax." warnings.warn(message) return message -- cgit v1.2.3