From 00970175c0f8ba9a99b61a182b32e329f219d382 Mon Sep 17 00:00:00 2001 From: RĂ©mi Flamary Date: Wed, 26 Jul 2017 12:15:40 +0200 Subject: add license and authors on all modules --- ot/lp/__init__.py | 4 ++++ ot/lp/emd_wrap.pyx | 9 ++++++--- 2 files changed, 10 insertions(+), 3 deletions(-) (limited to 'ot/lp') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index db3da78..6e0bdb8 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -3,6 +3,10 @@ Solvers for the original linear program OT problem """ +# Author: Remi Flamary +# +# License: MIT License + import numpy as np # import compiled emd from .emd_wrap import emd_c, emd2_c diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 46794ab..46c96c1 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -1,9 +1,12 @@ # -*- coding: utf-8 -*- """ -Created on Thu Sep 11 08:42:08 2014 - -@author: rflamary +Cython linker with C solver """ + +# Author: Remi Flamary +# +# License: MIT License + import numpy as np cimport numpy as np -- cgit v1.2.3 From 3cecc18f53453e79ccc00484e6cbfaa5643f269c Mon Sep 17 00:00:00 2001 From: aje Date: Tue, 29 Aug 2017 15:38:11 +0200 Subject: Changes to LP solver: - Allow to modify the maximal number of iterations - Display an error message in the python console if the solver encountered an issue --- ot/da.py | 4 ++-- ot/lp/EMD.h | 7 ++++++- ot/lp/EMD_wrapper.cpp | 7 +++---- ot/lp/__init__.py | 16 ++++++++++------ ot/lp/emd_wrap.pyx | 25 ++++++++++++++++++++----- 5 files changed, 41 insertions(+), 18 deletions(-) (limited to 'ot/lp') diff --git a/ot/da.py b/ot/da.py index 78dc150..0dfd02f 100644 --- a/ot/da.py +++ b/ot/da.py @@ -658,7 +658,7 @@ class OTDA(object): self.metric = metric self.computed = False - def fit(self, xs, xt, ws=None, wt=None, norm=None): + def fit(self, xs, xt, ws=None, wt=None, norm=None, numItermax=10000): """Fit domain adaptation between samples is xs and xt (with optional weights)""" self.xs = xs @@ -674,7 +674,7 @@ class OTDA(object): self.M = dist(xs, xt, metric=self.metric) self.normalizeM(norm) - self.G = emd(ws, wt, self.M) + self.G = emd(ws, wt, self.M, numItermax) self.computed = True def interp(self, direction=1): diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h index 40d7192..956830c 100644 --- a/ot/lp/EMD.h +++ b/ot/lp/EMD.h @@ -23,7 +23,12 @@ using namespace lemon; typedef unsigned int node_id_type; +enum ProblemType { + INFEASIBLE, + OPTIMAL, + UNBOUNDED +}; -void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost); +int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax); #endif diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index cad4750..83ed56c 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -15,11 +15,10 @@ #include "EMD.h" -void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost) { +int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax) { // beware M and C anre strored in row major C style!!! int n, m, i,cur; double max; - int max_iter=10000; typedef FullBipartiteDigraph Digraph; DIGRAPH_TYPEDEFS(FullBipartiteDigraph); @@ -46,7 +45,7 @@ void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double * std::vector indI(n), indJ(m); std::vector weights1(n), weights2(m); Digraph di(n, m); - NetworkSimplexSimple net(di, true, n+m, n*m,max_iter); + NetworkSimplexSimple net(di, true, n+m, n*m, numItermax); // Set supply and demand, don't account for 0 values (faster) @@ -116,5 +115,5 @@ void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double * }; - + return ret; } diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 6e0bdb8..62afe76 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -15,7 +15,7 @@ import multiprocessing -def emd(a, b, M): +def emd(a, b, M, numItermax=10000): """Solves the Earth Movers distance problem and returns the OT matrix @@ -40,6 +40,8 @@ def emd(a, b, M): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix + numItermax : int + Maximum number of iterations made by the LP solver. Returns ------- @@ -84,9 +86,9 @@ def emd(a, b, M): if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - return emd_c(a, b, M) + return emd_c(a, b, M, numItermax) -def emd2(a, b, M,processes=multiprocessing.cpu_count()): +def emd2(a, b, M, numItermax=10000, processes=multiprocessing.cpu_count()): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -110,6 +112,8 @@ def emd2(a, b, M,processes=multiprocessing.cpu_count()): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix + numItermax : int + Maximum number of iterations made by the LP solver. Returns ------- @@ -155,12 +159,12 @@ def emd2(a, b, M,processes=multiprocessing.cpu_count()): b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] if len(b.shape)==1: - return emd2_c(a, b, M) + return emd2_c(a, b, M, numItermax) else: nb=b.shape[1] - #res=[emd2_c(a,b[:,i].copy(),M) for i in range(nb)] + #res=[emd2_c(a,b[:,i].copy(),M, numItermax) for i in range(nb)] def f(b): - return emd2_c(a,b,M) + return emd2_c(a,b,M, numItermax) res= parmap(f, [b[:,i] for i in range(nb)],processes) return np.array(res) diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 46c96c1..ed8c416 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -15,13 +15,14 @@ cimport cython cdef extern from "EMD.h": - void EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost) + int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax) + cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED @cython.boundscheck(False) @cython.wraparound(False) -def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M): +def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax): """ Solves the Earth Movers distance problem and returns the optimal transport matrix @@ -48,6 +49,8 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod target histogram M : (ns,nt) ndarray, float64 loss matrix + numItermax : int + Maximum number of iterations made by the LP solver. Returns @@ -69,13 +72,18 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod b=np.ones((n2,))/n2 # calling the function - EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost) + cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost, numItermax) + if resultSolver != OPTIMAL: + if resultSolver == INFEASIBLE: + print("Problem infeasible. Try to inscrease numItermax.") + elif resultSolver == UNBOUNDED: + print("Problem unbounded") return G @cython.boundscheck(False) @cython.wraparound(False) -def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M): +def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax): """ Solves the Earth Movers distance problem and returns the optimal transport loss @@ -102,6 +110,8 @@ def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mo target histogram M : (ns,nt) ndarray, float64 loss matrix + numItermax : int + Maximum number of iterations made by the LP solver. Returns @@ -123,7 +133,12 @@ def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mo b=np.ones((n2,))/n2 # calling the function - EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost) + cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost, numItermax) + if resultSolver != OPTIMAL: + if resultSolver == INFEASIBLE: + print("Problem infeasible. Try to inscrease numItermax.") + elif resultSolver == UNBOUNDED: + print("Problem unbounded") cost=0 for i in range(n1): -- cgit v1.2.3 From 99c0a24c6467631b326838667998ba4f219d8a24 Mon Sep 17 00:00:00 2001 From: aje Date: Tue, 29 Aug 2017 16:53:06 +0200 Subject: Fix param order --- ot/lp/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'ot/lp') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 62afe76..5143a70 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -88,7 +88,7 @@ def emd(a, b, M, numItermax=10000): return emd_c(a, b, M, numItermax) -def emd2(a, b, M, numItermax=10000, processes=multiprocessing.cpu_count()): +def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=10000): """Solves the Earth Movers distance problem and returns the loss .. math:: -- cgit v1.2.3 From 308ce24b705dfad9d058138d058da8b18002e081 Mon Sep 17 00:00:00 2001 From: aje Date: Tue, 29 Aug 2017 17:01:17 +0200 Subject: Type print --- ot/lp/emd_wrap.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'ot/lp') diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index ed8c416..6039e1f 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -75,7 +75,7 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost, numItermax) if resultSolver != OPTIMAL: if resultSolver == INFEASIBLE: - print("Problem infeasible. Try to inscrease numItermax.") + print("Problem infeasible. Try to increase numItermax.") elif resultSolver == UNBOUNDED: print("Problem unbounded") -- cgit v1.2.3 From 982f36cb0a5f3a6a14454238a26053de7251b0f0 Mon Sep 17 00:00:00 2001 From: aje Date: Wed, 30 Aug 2017 09:56:37 +0200 Subject: Changes: - Rename numItermax to max_iter - Default value to 100000 instead of 10000 - Add max_iter to class SinkhornTransport(BaseTransport) - Add norm to all BaseTransport --- ot/da.py | 64 ++++++++++++++++++++++++++++++++++------ ot/lp/EMD.h | 2 +- ot/lp/EMD_wrapper.cpp | 4 +-- ot/lp/__init__.py | 46 ++++++++++++++--------------- ot/lp/emd_wrap.pyx | 82 ++++++++++++++++++++++++++------------------------- 5 files changed, 123 insertions(+), 75 deletions(-) (limited to 'ot/lp') diff --git a/ot/da.py b/ot/da.py index 0dfd02f..5871aba 100644 --- a/ot/da.py +++ b/ot/da.py @@ -658,7 +658,7 @@ class OTDA(object): self.metric = metric self.computed = False - def fit(self, xs, xt, ws=None, wt=None, norm=None, numItermax=10000): + def fit(self, xs, xt, ws=None, wt=None, norm=None, max_iter=100000): """Fit domain adaptation between samples is xs and xt (with optional weights)""" self.xs = xs @@ -674,7 +674,7 @@ class OTDA(object): self.M = dist(xs, xt, metric=self.metric) self.normalizeM(norm) - self.G = emd(ws, wt, self.M, numItermax) + self.G = emd(ws, wt, self.M, max_iter) self.computed = True def interp(self, direction=1): @@ -1001,6 +1001,7 @@ class BaseTransport(BaseEstimator): # pairwise distance self.cost_ = dist(Xs, Xt, metric=self.metric) + self.normalizeCost_(self.norm) if (ys is not None) and (yt is not None): @@ -1182,6 +1183,26 @@ class BaseTransport(BaseEstimator): return transp_Xt + def normalizeCost_(self, norm): + """ Apply normalization to the loss matrix + + + Parameters + ---------- + norm : str + type of normalization from 'median','max','log','loglog' + + """ + + if norm == "median": + self.cost_ /= float(np.median(self.cost_)) + elif norm == "max": + self.cost_ /= float(np.max(self.cost_)) + elif norm == "log": + self.cost_ = np.log(1 + self.cost_) + elif norm == "loglog": + self.cost_ = np.log(1 + np.log(1 + self.cost_)) + class SinkhornTransport(BaseTransport): """Domain Adapatation OT method based on Sinkhorn Algorithm @@ -1202,6 +1223,9 @@ class SinkhornTransport(BaseTransport): be transported from a domain to another one. metric : string, optional (default="sqeuclidean") The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. distribution : string, optional (default="uniform") The kind of distribution estimation to employ verbose : int, optional (default=0) @@ -1231,7 +1255,7 @@ class SinkhornTransport(BaseTransport): def __init__(self, reg_e=1., max_iter=1000, tol=10e-9, verbose=False, log=False, - metric="sqeuclidean", + metric="sqeuclidean", norm=None, distribution_estimation=distribution_estimation_uniform, out_of_sample_map='ferradans', limit_max=np.infty): @@ -1241,6 +1265,7 @@ class SinkhornTransport(BaseTransport): self.verbose = verbose self.log = log self.metric = metric + self.norm = norm self.limit_max = limit_max self.distribution_estimation = distribution_estimation self.out_of_sample_map = out_of_sample_map @@ -1296,6 +1321,9 @@ class EMDTransport(BaseTransport): be transported from a domain to another one. metric : string, optional (default="sqeuclidean") The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. distribution : string, optional (default="uniform") The kind of distribution estimation to employ verbose : int, optional (default=0) @@ -1306,6 +1334,9 @@ class EMDTransport(BaseTransport): Controls the semi supervised mode. Transport between labeled source and target samples of different classes will exhibit an infinite cost (10 times the maximum value of the cost matrix) + max_iter : int, optional (default=100000) + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. Attributes ---------- @@ -1319,14 +1350,17 @@ class EMDTransport(BaseTransport): on Pattern Analysis and Machine Intelligence , vol.PP, no.99, pp.1-1 """ - def __init__(self, metric="sqeuclidean", + def __init__(self, metric="sqeuclidean", norm=None, distribution_estimation=distribution_estimation_uniform, - out_of_sample_map='ferradans', limit_max=10): + out_of_sample_map='ferradans', limit_max=10, + max_iter=100000): self.metric = metric + self.norm = norm self.limit_max = limit_max self.distribution_estimation = distribution_estimation self.out_of_sample_map = out_of_sample_map + self.max_iter = max_iter def fit(self, Xs, ys=None, Xt=None, yt=None): """Build a coupling matrix from source and target sets of samples @@ -1353,7 +1387,7 @@ class EMDTransport(BaseTransport): # coupling estimation self.coupling_ = emd( - a=self.mu_s, b=self.mu_t, M=self.cost_, + a=self.mu_s, b=self.mu_t, M=self.cost_, max_iter=self.max_iter ) return self @@ -1376,6 +1410,9 @@ class SinkhornLpl1Transport(BaseTransport): be transported from a domain to another one. metric : string, optional (default="sqeuclidean") The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. distribution : string, optional (default="uniform") The kind of distribution estimation to employ max_iter : int, float, optional (default=10) @@ -1410,7 +1447,7 @@ class SinkhornLpl1Transport(BaseTransport): def __init__(self, reg_e=1., reg_cl=0.1, max_iter=10, max_inner_iter=200, tol=10e-9, verbose=False, - metric="sqeuclidean", + metric="sqeuclidean", norm=None, distribution_estimation=distribution_estimation_uniform, out_of_sample_map='ferradans', limit_max=np.infty): @@ -1421,6 +1458,7 @@ class SinkhornLpl1Transport(BaseTransport): self.tol = tol self.verbose = verbose self.metric = metric + self.norm = norm self.distribution_estimation = distribution_estimation self.out_of_sample_map = out_of_sample_map self.limit_max = limit_max @@ -1477,6 +1515,9 @@ class SinkhornL1l2Transport(BaseTransport): be transported from a domain to another one. metric : string, optional (default="sqeuclidean") The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. distribution : string, optional (default="uniform") The kind of distribution estimation to employ max_iter : int, float, optional (default=10) @@ -1516,7 +1557,7 @@ class SinkhornL1l2Transport(BaseTransport): def __init__(self, reg_e=1., reg_cl=0.1, max_iter=10, max_inner_iter=200, tol=10e-9, verbose=False, log=False, - metric="sqeuclidean", + metric="sqeuclidean", norm=None, distribution_estimation=distribution_estimation_uniform, out_of_sample_map='ferradans', limit_max=10): @@ -1528,6 +1569,7 @@ class SinkhornL1l2Transport(BaseTransport): self.verbose = verbose self.log = log self.metric = metric + self.norm = norm self.distribution_estimation = distribution_estimation self.out_of_sample_map = out_of_sample_map self.limit_max = limit_max @@ -1588,6 +1630,9 @@ class MappingTransport(BaseEstimator): Estimate linear mapping with constant bias metric : string, optional (default="sqeuclidean") The ground metric for the Wasserstein problem + norm : string, optional (default=None) + If given, normalize the ground metric to avoid numerical errors that + can occur with large metric values. kernel : string, optional (default="linear") The kernel to use either linear or gaussian sigma : float, optional (default=1) @@ -1627,11 +1672,12 @@ class MappingTransport(BaseEstimator): """ def __init__(self, mu=1, eta=0.001, bias=False, metric="sqeuclidean", - kernel="linear", sigma=1, max_iter=100, tol=1e-5, + norm=None, kernel="linear", sigma=1, max_iter=100, tol=1e-5, max_inner_iter=10, inner_tol=1e-6, log=False, verbose=False, verbose2=False): self.metric = metric + self.norm = norm self.mu = mu self.eta = eta self.bias = bias diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h index 956830c..aa92441 100644 --- a/ot/lp/EMD.h +++ b/ot/lp/EMD.h @@ -29,6 +29,6 @@ enum ProblemType { UNBOUNDED }; -int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax); +int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int max_iter); #endif diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index 83ed56c..c8c2eb3 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -15,7 +15,7 @@ #include "EMD.h" -int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax) { +int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int max_iter) { // beware M and C anre strored in row major C style!!! int n, m, i,cur; double max; @@ -45,7 +45,7 @@ int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *c std::vector indI(n), indJ(m); std::vector weights1(n), weights2(m); Digraph di(n, m); - NetworkSimplexSimple net(di, true, n+m, n*m, numItermax); + NetworkSimplexSimple net(di, true, n+m, n*m, max_iter); // Set supply and demand, don't account for 0 values (faster) diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 5143a70..7bef648 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -14,8 +14,7 @@ from ..utils import parmap import multiprocessing - -def emd(a, b, M, numItermax=10000): +def emd(a, b, M, max_iter=100000): """Solves the Earth Movers distance problem and returns the OT matrix @@ -40,8 +39,9 @@ def emd(a, b, M, numItermax=10000): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - numItermax : int - Maximum number of iterations made by the LP solver. + max_iter : int, optional (default=100000) + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. Returns ------- @@ -54,7 +54,7 @@ def emd(a, b, M, numItermax=10000): Simple example with obvious solution. The function emd accepts lists and perform automatic conversion to numpy arrays - + >>> import ot >>> a=[.5,.5] >>> b=[.5,.5] @@ -86,10 +86,11 @@ def emd(a, b, M, numItermax=10000): if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - return emd_c(a, b, M, numItermax) + return emd_c(a, b, M, max_iter) + -def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=10000): - """Solves the Earth Movers distance problem and returns the loss +def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000): + """Solves the Earth Movers distance problem and returns the loss .. math:: \gamma = arg\min_\gamma <\gamma,M>_F @@ -112,8 +113,9 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=10000): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - numItermax : int - Maximum number of iterations made by the LP solver. + max_iter : int, optional (default=100000) + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. Returns ------- @@ -126,15 +128,15 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=10000): Simple example with obvious solution. The function emd accepts lists and perform automatic conversion to numpy arrays - - + + >>> import ot >>> a=[.5,.5] >>> b=[.5,.5] >>> M=[[0.,1.],[1.,0.]] >>> ot.emd2(a,b,M) 0.0 - + References ---------- @@ -157,16 +159,14 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=10000): a = np.ones((M.shape[0], ), dtype=np.float64)/M.shape[0] if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - - if len(b.shape)==1: - return emd2_c(a, b, M, numItermax) + + if len(b.shape) == 1: + return emd2_c(a, b, M, max_iter) else: - nb=b.shape[1] - #res=[emd2_c(a,b[:,i].copy(),M, numItermax) for i in range(nb)] + nb = b.shape[1] + # res = [emd2_c(a, b[:, i].copy(), M, max_iter) for i in range(nb)] + def f(b): - return emd2_c(a,b,M, numItermax) - res= parmap(f, [b[:,i] for i in range(nb)],processes) + return emd2_c(a, b, M, max_iter) + res = parmap(f, [b[:, i] for i in range(nb)], processes) return np.array(res) - - - \ No newline at end of file diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index 6039e1f..26d3330 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -15,56 +15,57 @@ cimport cython cdef extern from "EMD.h": - int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int numItermax) + int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double *cost, int max_iter) cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED @cython.boundscheck(False) @cython.wraparound(False) -def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax): +def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int max_iter): """ Solves the Earth Movers distance problem and returns the optimal transport matrix - + gamm=emd(a,b,M) - + .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F - + \gamma = arg\min_\gamma <\gamma,M>_F + s.t. \gamma 1 = a - - \gamma^T 1= b - + + \gamma^T 1= b + \gamma\geq 0 where : - + - M is the metric cost matrix - a and b are the sample weights - + Parameters ---------- a : (ns,) ndarray, float64 - source histogram + source histogram b : (nt,) ndarray, float64 target histogram M : (ns,nt) ndarray, float64 - loss matrix - numItermax : int - Maximum number of iterations made by the LP solver. - - + loss matrix + max_iter : int + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. + + Returns ------- gamma: (ns x nt) ndarray Optimal transportation matrix for the given parameters - + """ cdef int n1= M.shape[0] cdef int n2= M.shape[1] cdef float cost=0 cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2]) - + if not len(a): a=np.ones((n1,))/n1 @@ -72,7 +73,7 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod b=np.ones((n2,))/n2 # calling the function - cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost, numItermax) + cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost, max_iter) if resultSolver != OPTIMAL: if resultSolver == INFEASIBLE: print("Problem infeasible. Try to increase numItermax.") @@ -83,49 +84,50 @@ def emd_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mod @cython.boundscheck(False) @cython.wraparound(False) -def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int numItermax): +def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mode="c"] b,np.ndarray[double, ndim=2, mode="c"] M, int max_iter): """ Solves the Earth Movers distance problem and returns the optimal transport loss - + gamm=emd(a,b,M) - + .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F - + \gamma = arg\min_\gamma <\gamma,M>_F + s.t. \gamma 1 = a - - \gamma^T 1= b - + + \gamma^T 1= b + \gamma\geq 0 where : - + - M is the metric cost matrix - a and b are the sample weights - + Parameters ---------- a : (ns,) ndarray, float64 - source histogram + source histogram b : (nt,) ndarray, float64 target histogram M : (ns,nt) ndarray, float64 - loss matrix - numItermax : int - Maximum number of iterations made by the LP solver. - - + loss matrix + max_iter : int + The maximum number of iterations before stopping the optimization + algorithm if it has not converged. + + Returns ------- gamma: (ns x nt) ndarray Optimal transportation matrix for the given parameters - + """ cdef int n1= M.shape[0] cdef int n2= M.shape[1] cdef float cost=0 cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2]) - + if not len(a): a=np.ones((n1,))/n1 @@ -133,13 +135,13 @@ def emd2_c( np.ndarray[double, ndim=1, mode="c"] a,np.ndarray[double, ndim=1, mo b=np.ones((n2,))/n2 # calling the function - cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost, numItermax) + cdef int resultSolver = EMD_wrap(n1,n2, a.data, b.data, M.data, G.data, &cost, max_iter) if resultSolver != OPTIMAL: if resultSolver == INFEASIBLE: print("Problem infeasible. Try to inscrease numItermax.") elif resultSolver == UNBOUNDED: print("Problem unbounded") - + cost=0 for i in range(n1): for j in range(n2): -- cgit v1.2.3 From feef989b155c0b77a1a014e58db7db390b1ee6d4 Mon Sep 17 00:00:00 2001 From: aje Date: Wed, 30 Aug 2017 10:06:41 +0200 Subject: Rename for emd and emd2 --- ot/lp/__init__.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'ot/lp') diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 7bef648..de91e74 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -14,7 +14,7 @@ from ..utils import parmap import multiprocessing -def emd(a, b, M, max_iter=100000): +def emd(a, b, M, numItermax=100000): """Solves the Earth Movers distance problem and returns the OT matrix @@ -39,7 +39,7 @@ def emd(a, b, M, max_iter=100000): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - max_iter : int, optional (default=100000) + numItermax : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. @@ -86,10 +86,10 @@ def emd(a, b, M, max_iter=100000): if len(b) == 0: b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] - return emd_c(a, b, M, max_iter) + return emd_c(a, b, M, numItermax) -def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000): +def emd2(a, b, M, processes=multiprocessing.cpu_count(), numItermax=100000): """Solves the Earth Movers distance problem and returns the loss .. math:: @@ -113,7 +113,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000): Target histogram (uniform weigth if empty list) M : (ns,nt) ndarray, float64 loss matrix - max_iter : int, optional (default=100000) + numItermax : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. @@ -161,12 +161,12 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), max_iter=100000): b = np.ones((M.shape[1], ), dtype=np.float64)/M.shape[1] if len(b.shape) == 1: - return emd2_c(a, b, M, max_iter) + return emd2_c(a, b, M, numItermax) else: nb = b.shape[1] - # res = [emd2_c(a, b[:, i].copy(), M, max_iter) for i in range(nb)] + # res = [emd2_c(a, b[:, i].copy(), M, numItermax) for i in range(nb)] def f(b): - return emd2_c(a, b, M, max_iter) + return emd2_c(a, b, M, numItermax) res = parmap(f, [b[:, i] for i in range(nb)], processes) return np.array(res) -- cgit v1.2.3