diff options
author | Gard Spreemann <gspr@nonempty.org> | 2021-11-09 17:05:13 +0100 |
---|---|---|
committer | Gard Spreemann <gspr@nonempty.org> | 2021-11-09 17:05:13 +0100 |
commit | a9fdc844907decddf54bed3ebeea8d8b2cf0fc5c (patch) | |
tree | 449a03fce8fafb78b6badd12b6e633f1e5d73a64 /ot/lp | |
parent | a16b9471d7114ec08977479b7249efe747702b97 (diff) | |
parent | f1628794d521a8dfa00af383b5e06cd6d34af619 (diff) |
Merge tag '0.8.0' into dfsg/latest
Diffstat (limited to 'ot/lp')
-rw-r--r-- | ot/lp/EMD.h | 5 | ||||
-rw-r--r-- | ot/lp/EMD_wrapper.cpp | 124 | ||||
-rw-r--r-- | ot/lp/__init__.py | 597 | ||||
-rw-r--r-- | ot/lp/cvx.py | 3 | ||||
-rw-r--r-- | ot/lp/emd_wrap.pyx | 32 | ||||
-rw-r--r-- | ot/lp/full_bipartitegraph.h | 27 | ||||
-rw-r--r-- | ot/lp/full_bipartitegraph_omp.h | 234 | ||||
-rw-r--r-- | ot/lp/network_simplex_simple.h | 212 | ||||
-rw-r--r-- | ot/lp/network_simplex_simple_omp.h | 1699 | ||||
-rw-r--r-- | ot/lp/solver_1d.py | 367 |
10 files changed, 2763 insertions, 537 deletions
diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h index c0fe7a3..8a1f9ac 100644 --- a/ot/lp/EMD.h +++ b/ot/lp/EMD.h @@ -18,19 +18,18 @@ #include <iostream> #include <vector> -#include "network_simplex_simple.h" -using namespace lemon; typedef unsigned int node_id_type; enum ProblemType { INFEASIBLE, OPTIMAL, UNBOUNDED, - MAX_ITER_REACHED + MAX_ITER_REACHED }; int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter); +int EMD_wrap_omp(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter, int numThreads); diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index bc873ed..2bdc172 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -12,16 +12,22 @@ * */ + +#include "network_simplex_simple.h" +#include "network_simplex_simple_omp.h" #include "EMD.h" +#include <cstdint> int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, double* alpha, double* beta, double *cost, int maxIter) { - // beware M and C anre strored in row major C style!!! - int n, m, i, cur; + // beware M and C are stored in row major C style!!! + + using namespace lemon; + int n, m, cur; typedef FullBipartiteDigraph Digraph; - DIGRAPH_TYPEDEFS(FullBipartiteDigraph); + DIGRAPH_TYPEDEFS(Digraph); // Get the number of non zero coordinates for r and c n=0; @@ -48,7 +54,7 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, std::vector<int> indI(n), indJ(m); std::vector<double> weights1(n), weights2(m); Digraph di(n, m); - NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, maxIter); + NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, ((int64_t)n)*((int64_t)m), maxIter); // Set supply and demand, don't account for 0 values (faster) @@ -76,10 +82,12 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, net.supplyMap(&weights1[0], n, &weights2[0], m); // Set the cost of each edge + int64_t idarc = 0; for (int i=0; i<n; i++) { for (int j=0; j<m; j++) { double val=*(D+indI[i]*n2+indJ[j]); - net.setCost(di.arcFromId(i*m+j), val); + net.setCost(di.arcFromId(idarc), val); + ++idarc; } } @@ -87,12 +95,13 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, // Solve the problem with the network simplex algorithm int ret=net.run(); + int i, j; if (ret==(int)net.OPTIMAL || ret==(int)net.MAX_ITER_REACHED) { *cost = 0; Arc a; di.first(a); for (; a != INVALID; di.next(a)) { - int i = di.source(a); - int j = di.target(a); + i = di.source(a); + j = di.target(a); double flow = net.flow(a); *cost += flow * (*(D+indI[i]*n2+indJ[j-n])); *(G+indI[i]*n2+indJ[j-n]) = flow; @@ -106,3 +115,104 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, return ret; } + + + + + + +int EMD_wrap_omp(int n1, int n2, double *X, double *Y, double *D, double *G, + double* alpha, double* beta, double *cost, int maxIter, int numThreads) { + // beware M and C are stored in row major C style!!! + + using namespace lemon_omp; + int n, m, cur; + + typedef FullBipartiteDigraph Digraph; + DIGRAPH_TYPEDEFS(Digraph); + + // Get the number of non zero coordinates for r and c + n=0; + for (int i=0; i<n1; i++) { + double val=*(X+i); + if (val>0) { + n++; + }else if(val<0){ + return INFEASIBLE; + } + } + m=0; + for (int i=0; i<n2; i++) { + double val=*(Y+i); + if (val>0) { + m++; + }else if(val<0){ + return INFEASIBLE; + } + } + + // Define the graph + + std::vector<int> indI(n), indJ(m); + std::vector<double> weights1(n), weights2(m); + Digraph di(n, m); + NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, ((int64_t)n)*((int64_t)m), maxIter, numThreads); + + // Set supply and demand, don't account for 0 values (faster) + + cur=0; + for (int i=0; i<n1; i++) { + double val=*(X+i); + if (val>0) { + weights1[ cur ] = val; + indI[cur++]=i; + } + } + + // Demand is actually negative supply... + + cur=0; + for (int i=0; i<n2; i++) { + double val=*(Y+i); + if (val>0) { + weights2[ cur ] = -val; + indJ[cur++]=i; + } + } + + + net.supplyMap(&weights1[0], n, &weights2[0], m); + + // Set the cost of each edge + int64_t idarc = 0; + for (int i=0; i<n; i++) { + for (int j=0; j<m; j++) { + double val=*(D+indI[i]*n2+indJ[j]); + net.setCost(di.arcFromId(idarc), val); + ++idarc; + } + } + + + // Solve the problem with the network simplex algorithm + + int ret=net.run(); + int i, j; + if (ret==(int)net.OPTIMAL || ret==(int)net.MAX_ITER_REACHED) { + *cost = 0; + Arc a; di.first(a); + for (; a != INVALID; di.next(a)) { + i = di.source(a); + j = di.target(a); + double flow = net.flow(a); + *cost += flow * (*(D+indI[i]*n2+indJ[j-n])); + *(G+indI[i]*n2+indJ[j-n]) = flow; + *(alpha + indI[i]) = -net.potential(i); + *(beta + indJ[j-n]) = net.potential(j); + } + + } + + + return ret; +} diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index 514a607..5da897d 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -8,25 +8,50 @@ Solvers for the original linear program OT problem # # License: MIT License +import os import multiprocessing import sys import numpy as np -from scipy.sparse import coo_matrix +import warnings from . import cvx from .cvx import barycenter + # import compiled emd from .emd_wrap import emd_c, check_result, emd_1d_sorted -from ..utils import dist +from .solver_1d import emd_1d, emd2_1d, wasserstein_1d + +from ..utils import dist, list_to_array from ..utils import parmap +from ..backend import get_backend -__all__ = ['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx', +__all__ = ['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx', ' emd_1d_sorted', 'emd_1d', 'emd2_1d', 'wasserstein_1d'] +def check_number_threads(numThreads): + """Checks whether or not the requested number of threads has a valid value. + + Parameters + ---------- + numThreads : int or str + The requested number of threads, should either be a strictly positive integer or "max" or None + + Returns + ------- + numThreads : int + Corrected number of threads + """ + if (numThreads is None) or (isinstance(numThreads, str) and numThreads.lower() == 'max'): + return -1 + if (not isinstance(numThreads, int)) or numThreads < 1: + raise ValueError('numThreads should either be "max" or a strictly positive integer') + return numThreads + + def center_ot_dual(alpha0, beta0, a=None, b=None): - r"""Center dual OT potentials w.r.t. theirs weights + r"""Center dual OT potentials w.r.t. their weights The main idea of this function is to find unique dual potentials that ensure some kind of centering/fairness. The main idea is to find dual potentials that lead to the same final objective value for both source and targets (see below for more details). It will help having @@ -37,7 +62,7 @@ def center_ot_dual(alpha0, beta0, a=None, b=None): is the following: .. math:: - \alpha^T a= \beta^T b + \alpha^T \mathbf{a} = \beta^T \mathbf{b} in addition to the OT problem constraints. @@ -45,11 +70,11 @@ def center_ot_dual(alpha0, beta0, a=None, b=None): a constant from both :math:`\alpha_0` and :math:`\beta_0`. .. math:: - c=\frac{\beta0^T b-\alpha_0^T a}{1^Tb+1^Ta} + c &= \frac{\beta_0^T \mathbf{b} - \alpha_0^T \mathbf{a}}{\mathbf{1}^T \mathbf{b} + \mathbf{1}^T \mathbf{a}} - \alpha=\alpha_0+c + \alpha &= \alpha_0 + c - \beta=\beta0+c + \beta &= \beta_0 + c Parameters ---------- @@ -92,35 +117,35 @@ def estimate_dual_null_weights(alpha0, beta0, a, b, M): The feasible values are computed efficiently but rather coarsely. .. warning:: - This function is necessary because the C++ solver in emd_c - discards all samples in the distributions with - zeros weights. This means that while the primal variable (transport + This function is necessary because the C++ solver in `emd_c` + discards all samples in the distributions with + zeros weights. This means that while the primal variable (transport matrix) is exact, the solver only returns feasible dual potentials - on the samples with weights different from zero. + on the samples with weights different from zero. First we compute the constraints violations: .. math:: - V=\alpha+\beta^T-M + \mathbf{V} = \alpha + \beta^T - \mathbf{M} - Next we compute the max amount of violation per row (alpha) and - columns (beta) + Next we compute the max amount of violation per row (:math:`\alpha`) and + columns (:math:`beta`) .. math:: - v^a_i=\max_j V_{i,j} + \mathbf{v^a}_i = \max_j \mathbf{V}_{i,j} - v^b_j=\max_i V_{i,j} + \mathbf{v^b}_j = \max_i \mathbf{V}_{i,j} Finally we update the dual potential with 0 weights if a constraint is violated .. math:: - \alpha_i = \alpha_i -v^a_i \quad \text{ if } a_i=0 \text{ and } v^a_i>0 + \alpha_i = \alpha_i - \mathbf{v^a}_i \quad \text{ if } \mathbf{a}_i=0 \text{ and } \mathbf{v^a}_i>0 - \beta_j = \beta_j -v^b_j \quad \text{ if } b_j=0 \text{ and } v^b_j>0 + \beta_j = \beta_j - \mathbf{v^b}_j \quad \text{ if } \mathbf{b}_j=0 \text{ and } \mathbf{v^b}_j > 0 In the end the dual potentials are centered using function - :ref:`center_ot_dual`. + :py:func:`ot.lp.center_ot_dual`. Note that all those updates do not change the objective value of the solution but provide dual potentials that do not violate the constraints. @@ -172,54 +197,62 @@ def estimate_dual_null_weights(alpha0, beta0, a, b, M): return center_ot_dual(alpha, beta, a, b) -def emd(a, b, M, numItermax=100000, log=False, center_dual=True): +def emd(a, b, M, numItermax=100000, log=False, center_dual=True, numThreads=1): r"""Solves the Earth Movers distance problem and returns the OT matrix .. math:: - \gamma = arg\min_\gamma <\gamma,M>_F + \gamma = \mathop{\arg \min}_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F - s.t. \gamma 1 = a + s.t. \ \gamma \mathbf{1} = \mathbf{a} - \gamma^T 1= b + \gamma^T \mathbf{1} = \mathbf{b} + + \gamma \geq 0 - \gamma\geq 0 where : - - M is the metric cost matrix - - a and b are the sample weights + - :math:`\mathbf{M}` is the metric cost matrix + - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights - .. warning:: - Note that the M matrix needs to be a C-order numpy.array in float64 - format. + .. warning:: Note that the :math:`\mathbf{M}` matrix in numpy needs to be a C-order + numpy.array in float64 format. It will be converted if not in this + format + + .. note:: This function is backend-compatible and will work on arrays + from all compatible backends. - Uses the algorithm proposed in [1]_ + Uses the algorithm proposed in :ref:`[1] <references-emd>`. Parameters ---------- - a : (ns,) numpy.ndarray, float64 + a : (ns,) array-like, float Source histogram (uniform weight if empty list) - b : (nt,) numpy.ndarray, float64 + b : (nt,) array-like, float Target histogram (uniform weight if empty list) - M : (ns,nt) numpy.ndarray, float64 - Loss matrix (c-order array with type float64) + M : (ns,nt) array-like, float + Loss matrix (c-order array in numpy with type float64) numItermax : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. log: bool, optional (default=False) - If True, returns a dictionary containing the cost and dual - variables. Otherwise returns only the optimal transportation matrix. + If True, returns a dictionary containing the cost and dual variables. + Otherwise returns only the optimal transportation matrix. center_dual: boolean, optional (default=True) If True, centers the dual potential using function :ref:`center_ot_dual`. + numThreads: int or "max", optional (default=1, i.e. OpenMP is not used) + If compiled with OpenMP, chooses the number of threads to parallelize. + "max" selects the highest number possible. Returns ------- - gamma: (ns x nt) numpy.ndarray - Optimal transportation matrix for the given parameters - log: dict - If input log is true, a dictionary containing the cost and dual - variables and exit status + gamma: array-like, shape (ns, nt) + Optimal transportation matrix for the given + parameters + log: dict, optional + If input log is true, a dictionary containing the + cost and dual variables and exit status Examples @@ -232,26 +265,39 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True): >>> a=[.5,.5] >>> b=[.5,.5] >>> M=[[0.,1.],[1.,0.]] - >>> ot.emd(a,b,M) + >>> ot.emd(a, b, M) array([[0.5, 0. ], [0. , 0.5]]) + + .. _references-emd: References ---------- - - .. [1] Bonneel, N., Van De Panne, M., Paris, S., & Heidrich, W. - (2011, December). Displacement interpolation using Lagrangian mass - transport. In ACM Transactions on Graphics (TOG) (Vol. 30, No. 6, p. - 158). ACM. + .. [1] Bonneel, N., Van De Panne, M., Paris, S., & Heidrich, W. (2011, + December). Displacement interpolation using Lagrangian mass transport. + In ACM Transactions on Graphics (TOG) (Vol. 30, No. 6, p. 158). ACM. See Also -------- ot.bregman.sinkhorn : Entropic regularized OT - ot.optim.cg : General regularized OT""" + ot.optim.cg : General regularized OT + """ + + # convert to numpy if list + a, b, M = list_to_array(a, b, M) + + a0, b0, M0 = a, b, M + nx = get_backend(M0, a0, b0) + # convert to numpy + M = nx.to_numpy(M) + a = nx.to_numpy(a) + b = nx.to_numpy(b) + + # ensure float64 a = np.asarray(a, dtype=np.float64) b = np.asarray(b, dtype=np.float64) - M = np.asarray(M, dtype=np.float64) + M = np.asarray(M, dtype=np.float64, order='C') # if empty array given then use uniform distributions if len(a) == 0: @@ -262,81 +308,91 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True): assert (a.shape[0] == M.shape[0] and b.shape[0] == M.shape[1]), \ "Dimension mismatch, check dimensions of M with a and b" + # ensure that same mass + np.testing.assert_almost_equal(a.sum(0), + b.sum(0), err_msg='a and b vector must have the same sum') + b = b * a.sum() / b.sum() + asel = a != 0 bsel = b != 0 - G, cost, u, v, result_code = emd_c(a, b, M, numItermax) + numThreads = check_number_threads(numThreads) + + G, cost, u, v, result_code = emd_c(a, b, M, numItermax, numThreads) if center_dual: u, v = center_ot_dual(u, v, a, b) if np.any(~asel) or np.any(~bsel): u, v = estimate_dual_null_weights(u, v, a, b, M) - + result_code_string = check_result(result_code) if log: log = {} log['cost'] = cost - log['u'] = u - log['v'] = v + log['u'] = nx.from_numpy(u, type_as=a0) + log['v'] = nx.from_numpy(v, type_as=b0) log['warning'] = result_code_string log['result_code'] = result_code - return G, log - return G + return nx.from_numpy(G, type_as=M0), log + return nx.from_numpy(G, type_as=M0) -def emd2(a, b, M, processes=multiprocessing.cpu_count(), +def emd2(a, b, M, processes=1, numItermax=100000, log=False, return_matrix=False, - center_dual=True): + center_dual=True, numThreads=1): r"""Solves the Earth Movers distance problem and returns the loss .. math:: - \min_\gamma <\gamma,M>_F + \min_\gamma \quad \langle \gamma, \mathbf{M} \rangle_F + + s.t. \ \gamma \mathbf{1} = \mathbf{a} - s.t. \gamma 1 = a + \gamma^T \mathbf{1} = \mathbf{b} - \gamma^T 1= b + \gamma \geq 0 - \gamma\geq 0 where : - - M is the metric cost matrix - - a and b are the sample weights + - :math:`\mathbf{M}` is the metric cost matrix + - :math:`\mathbf{a}` and :math:`\mathbf{b}` are the sample weights - .. warning:: - Note that the M matrix needs to be a C-order numpy.array in float64 - format. + .. note:: This function is backend-compatible and will work on arrays + from all compatible backends. - Uses the algorithm proposed in [1]_ + Uses the algorithm proposed in :ref:`[1] <references-emd2>`. Parameters ---------- - a : (ns,) numpy.ndarray, float64 + a : (ns,) array-like, float64 Source histogram (uniform weight if empty list) - b : (nt,) numpy.ndarray, float64 + b : (nt,) array-like, float64 Target histogram (uniform weight if empty list) - M : (ns,nt) numpy.ndarray, float64 - Loss matrix (c-order array with type float64) - processes : int, optional (default=nb cpu) - Nb of processes used for multiple emd computation (not used on windows) + M : (ns,nt) array-like, float64 + Loss matrix (for numpy c-order array with type float64) + processes : int, optional (default=1) + Nb of processes used for multiple emd computation (deprecated) numItermax : int, optional (default=100000) The maximum number of iterations before stopping the optimization algorithm if it has not converged. log: boolean, optional (default=False) - If True, returns a dictionary containing the cost and dual + If True, returns a dictionary containing dual variables. Otherwise returns only the optimal transportation cost. return_matrix: boolean, optional (default=False) If True, returns the optimal transportation matrix in the log. center_dual: boolean, optional (default=True) If True, centers the dual potential using function :ref:`center_ot_dual`. + numThreads: int or "max", optional (default=1, i.e. OpenMP is not used) + If compiled with OpenMP, chooses the number of threads to parallelize. + "max" selects the highest number possible. Returns ------- - gamma: (ns x nt) ndarray - Optimal transportation matrix for the given parameters - log: dictnp - If input log is true, a dictionary containing the cost and dual + W: float, array-like + Optimal transportation loss for the given parameters + log: dict + If input log is true, a dictionary containing dual variables and exit status @@ -354,9 +410,10 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), >>> ot.emd2(a,b,M) 0.0 + + .. _references-emd2: References ---------- - .. [1] Bonneel, N., Van De Panne, M., Paris, S., & Heidrich, W. (2011, December). Displacement interpolation using Lagrangian mass transport. In ACM Transactions on Graphics (TOG) (Vol. 30, No. 6, p. @@ -365,15 +422,22 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), See Also -------- ot.bregman.sinkhorn : Entropic regularized OT - ot.optim.cg : General regularized OT""" + ot.optim.cg : General regularized OT + """ + + a, b, M = list_to_array(a, b, M) + + a0, b0, M0 = a, b, M + nx = get_backend(M0, a0, b0) + + # convert to numpy + M = nx.to_numpy(M) + a = nx.to_numpy(a) + b = nx.to_numpy(b) a = np.asarray(a, dtype=np.float64) b = np.asarray(b, dtype=np.float64) - M = np.asarray(M, dtype=np.float64) - - # problem with pikling Forks - if sys.platform.endswith('win32'): - processes = 1 + M = np.asarray(M, dtype=np.float64, order='C') # if empty array given then use uniform distributions if len(a) == 0: @@ -386,11 +450,13 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), asel = a != 0 + numThreads = check_number_threads(numThreads) + if log or return_matrix: def f(b): bsel = b != 0 - - G, cost, u, v, result_code = emd_c(a, b, M, numItermax) + + G, cost, u, v, result_code = emd_c(a, b, M, numItermax, numThreads) if center_dual: u, v = center_ot_dual(u, v, a, b) @@ -400,17 +466,20 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), result_code_string = check_result(result_code) log = {} + G = nx.from_numpy(G, type_as=M0) if return_matrix: log['G'] = G - log['u'] = u - log['v'] = v + log['u'] = nx.from_numpy(u, type_as=a0) + log['v'] = nx.from_numpy(v, type_as=b0) log['warning'] = result_code_string log['result_code'] = result_code + cost = nx.set_gradients(nx.from_numpy(cost, type_as=M0), + (a0, b0, M0), (log['u'], log['v'], G)) return [cost, log] else: def f(b): bsel = b != 0 - G, cost, u, v, result_code = emd_c(a, b, M, numItermax) + G, cost, u, v, result_code = emd_c(a, b, M, numItermax, numThreads) if center_dual: u, v = center_ot_dual(u, v, a, b) @@ -418,6 +487,11 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), if np.any(~asel) or np.any(~bsel): u, v = estimate_dual_null_weights(u, v, a, b, M) + G = nx.from_numpy(G, type_as=M0) + cost = nx.set_gradients(nx.from_numpy(cost, type_as=M0), + (a0, b0, M0), (nx.from_numpy(u, type_as=a0), + nx.from_numpy(v, type_as=b0), G)) + check_result(result_code) return cost @@ -426,35 +500,53 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(), nb = b.shape[1] if processes > 1: - res = parmap(f, [b[:, i] for i in range(nb)], processes) - else: - res = list(map(f, [b[:, i].copy() for i in range(nb)])) + warnings.warn( + "The 'processes' parameter has been deprecated. " + "Multiprocessing should be done outside of POT." + ) + res = list(map(f, [b[:, i].copy() for i in range(nb)])) return res def free_support_barycenter(measures_locations, measures_weights, X_init, b=None, weights=None, numItermax=100, - stopThr=1e-7, verbose=False, log=None): - """ - Solves the free support (locations of the barycenters are optimized, not the weights) Wasserstein barycenter problem (i.e. the weighted Frechet mean for the 2-Wasserstein distance) + stopThr=1e-7, verbose=False, log=None, numThreads=1): + r""" + Solves the free support (locations of the barycenters are optimized, not the weights) Wasserstein barycenter problem (i.e. the weighted Frechet mean for the 2-Wasserstein distance), formally: + + .. math:: + \min_\mathbf{X} \quad \sum_{i=1}^N w_i W_2^2(\mathbf{b}, \mathbf{X}, \mathbf{a}_i, \mathbf{X}_i) + + where : + + - :math:`w \in \mathbb{(0, 1)}^{N}`'s are the barycenter weights and sum to one + - the :math:`\mathbf{a}_i \in \mathbb{R}^{k_i}` are the empirical measures weights and sum to one for each :math:`i` + - the :math:`\mathbf{X}_i \in \mathbb{R}^{k_i, d}` are the empirical measures atoms locations + - :math:`\mathbf{b} \in \mathbb{R}^{k}` is the desired weights vector of the barycenter + + This problem is considered in :ref:`[1] <references-free-support-barycenter>` (Algorithm 2). + There are two differences with the following codes: - The function solves the Wasserstein barycenter problem when the barycenter measure is constrained to be supported on k atoms. - This problem is considered in [1] (Algorithm 2). There are two differences with the following codes: - we do not optimize over the weights - - we do not do line search for the locations updates, we use i.e. theta = 1 in [1] (Algorithm 2). This can be seen as a discrete implementation of the fixed-point algorithm of [2] proposed in the continuous setting. + - we do not do line search for the locations updates, we use i.e. :math:`\theta = 1` in + :ref:`[1] <references-free-support-barycenter>` (Algorithm 2). This can be seen as a discrete + implementation of the fixed-point algorithm of + :ref:`[2] <references-free-support-barycenter>` proposed in the continuous setting. Parameters ---------- - measures_locations : list of (k_i,d) numpy.ndarray - The discrete support of a measure supported on k_i locations of a d-dimensional space (k_i can be different for each element of the list) - measures_weights : list of (k_i,) numpy.ndarray - Numpy arrays where each numpy array has k_i non-negatives values summing to one representing the weights of each discrete input measure + measures_locations : list of N (k_i,d) numpy.ndarray + The discrete support of a measure supported on :math:`k_i` locations of a `d`-dimensional space + (:math:`k_i` can be different for each element of the list) + measures_weights : list of N (k_i,) numpy.ndarray + Numpy arrays where each numpy array has :math:`k_i` non-negatives values summing to one + representing the weights of each discrete input measure X_init : (k,d) np.ndarray - Initialization of the support locations (on k atoms) of the barycenter + Initialization of the support locations (on `k` atoms) of the barycenter b : (k,) np.ndarray Initialization of the weights of the barycenter (non-negatives, sum to 1) - weights : (k,) np.ndarray + weights : (N,) np.ndarray Initialization of the coefficients of the barycenter (non-negatives, sum to 1) numItermax : int, optional @@ -465,15 +557,20 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None Print information along iterations log : bool, optional record log if True + numThreads: int or "max", optional (default=1, i.e. OpenMP is not used) + If compiled with OpenMP, chooses the number of threads to parallelize. + "max" selects the highest number possible. + Returns ------- X : (k,d) np.ndarray Support locations (on k atoms) of the barycenter + + .. _references-free-support-barycenter: References ---------- - .. [1] Cuturi, Marco, and Arnaud Doucet. "Fast computation of Wasserstein barycenters." International Conference on Machine Learning. 2014. .. [2] Álvarez-Esteban, Pedro C., et al. "A fixed-point approach to barycenters in Wasserstein space." Journal of Mathematical Analysis and Applications 441.2 (2016): 744-762. @@ -504,7 +601,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None for (measure_locations_i, measure_weights_i, weight_i) in zip(measures_locations, measures_weights, weights.tolist()): M_i = dist(X, measure_locations_i) - T_i = emd(b, measure_weights_i, M_i) + T_i = emd(b, measure_weights_i, M_i, numThreads=numThreads) T_sum = T_sum + weight_i * np.reshape(1. / b, (-1, 1)) * np.matmul(T_i, measure_locations_i) displacement_square_norm = np.sum(np.square(T_sum - X)) @@ -523,287 +620,3 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None return X, log_dict else: return X - - -def emd_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True, - log=False): - r"""Solves the Earth Movers distance problem between 1d measures and returns - the OT matrix - - - .. math:: - \gamma = arg\min_\gamma \sum_i \sum_j \gamma_{ij} d(x_a[i], x_b[j]) - - s.t. \gamma 1 = a, - \gamma^T 1= b, - \gamma\geq 0 - where : - - - d is the metric - - x_a and x_b are the samples - - a and b are the sample weights - - When 'minkowski' is used as a metric, :math:`d(x, y) = |x - y|^p`. - - Uses the algorithm detailed in [1]_ - - Parameters - ---------- - x_a : (ns,) or (ns, 1) ndarray, float64 - Source dirac locations (on the real line) - x_b : (nt,) or (ns, 1) ndarray, float64 - Target dirac locations (on the real line) - a : (ns,) ndarray, float64, optional - Source histogram (default is uniform weight) - b : (nt,) ndarray, float64, optional - Target histogram (default is uniform weight) - metric: str, optional (default='sqeuclidean') - Metric to be used. Only strings listed in :func:`ot.dist` are accepted. - Due to implementation details, this function runs faster when - `'sqeuclidean'`, `'cityblock'`, or `'euclidean'` metrics are used. - p: float, optional (default=1.0) - The p-norm to apply for if metric='minkowski' - dense: boolean, optional (default=True) - If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt). - Otherwise returns a sparse representation using scipy's `coo_matrix` - format. Due to implementation details, this function runs faster when - `'sqeuclidean'`, `'minkowski'`, `'cityblock'`, or `'euclidean'` metrics - are used. - log: boolean, optional (default=False) - If True, returns a dictionary containing the cost. - Otherwise returns only the optimal transportation matrix. - - Returns - ------- - gamma: (ns, nt) ndarray - Optimal transportation matrix for the given parameters - log: dict - If input log is True, a dictionary containing the cost - - - Examples - -------- - - Simple example with obvious solution. The function emd_1d accepts lists and - performs automatic conversion to numpy arrays - - >>> import ot - >>> a=[.5, .5] - >>> b=[.5, .5] - >>> x_a = [2., 0.] - >>> x_b = [0., 3.] - >>> ot.emd_1d(x_a, x_b, a, b) - array([[0. , 0.5], - [0.5, 0. ]]) - >>> ot.emd_1d(x_a, x_b) - array([[0. , 0.5], - [0.5, 0. ]]) - - References - ---------- - - .. [1] Peyré, G., & Cuturi, M. (2017). "Computational Optimal - Transport", 2018. - - See Also - -------- - ot.lp.emd : EMD for multidimensional distributions - ot.lp.emd2_1d : EMD for 1d distributions (returns cost instead of the - transportation matrix) - """ - a = np.asarray(a, dtype=np.float64) - b = np.asarray(b, dtype=np.float64) - x_a = np.asarray(x_a, dtype=np.float64) - x_b = np.asarray(x_b, dtype=np.float64) - - assert (x_a.ndim == 1 or x_a.ndim == 2 and x_a.shape[1] == 1), \ - "emd_1d should only be used with monodimensional data" - assert (x_b.ndim == 1 or x_b.ndim == 2 and x_b.shape[1] == 1), \ - "emd_1d should only be used with monodimensional data" - - # if empty array given then use uniform distributions - if a.ndim == 0 or len(a) == 0: - a = np.ones((x_a.shape[0],), dtype=np.float64) / x_a.shape[0] - if b.ndim == 0 or len(b) == 0: - b = np.ones((x_b.shape[0],), dtype=np.float64) / x_b.shape[0] - - x_a_1d = x_a.reshape((-1,)) - x_b_1d = x_b.reshape((-1,)) - perm_a = np.argsort(x_a_1d) - perm_b = np.argsort(x_b_1d) - - G_sorted, indices, cost = emd_1d_sorted(a[perm_a], b[perm_b], - x_a_1d[perm_a], x_b_1d[perm_b], - metric=metric, p=p) - G = coo_matrix((G_sorted, (perm_a[indices[:, 0]], perm_b[indices[:, 1]])), - shape=(a.shape[0], b.shape[0])) - if dense: - G = G.toarray() - if log: - log = {'cost': cost} - return G, log - return G - - -def emd2_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True, - log=False): - r"""Solves the Earth Movers distance problem between 1d measures and returns - the loss - - - .. math:: - \gamma = arg\min_\gamma \sum_i \sum_j \gamma_{ij} d(x_a[i], x_b[j]) - - s.t. \gamma 1 = a, - \gamma^T 1= b, - \gamma\geq 0 - where : - - - d is the metric - - x_a and x_b are the samples - - a and b are the sample weights - - When 'minkowski' is used as a metric, :math:`d(x, y) = |x - y|^p`. - - Uses the algorithm detailed in [1]_ - - Parameters - ---------- - x_a : (ns,) or (ns, 1) ndarray, float64 - Source dirac locations (on the real line) - x_b : (nt,) or (ns, 1) ndarray, float64 - Target dirac locations (on the real line) - a : (ns,) ndarray, float64, optional - Source histogram (default is uniform weight) - b : (nt,) ndarray, float64, optional - Target histogram (default is uniform weight) - metric: str, optional (default='sqeuclidean') - Metric to be used. Only strings listed in :func:`ot.dist` are accepted. - Due to implementation details, this function runs faster when - `'sqeuclidean'`, `'minkowski'`, `'cityblock'`, or `'euclidean'` metrics - are used. - p: float, optional (default=1.0) - The p-norm to apply for if metric='minkowski' - dense: boolean, optional (default=True) - If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt). - Otherwise returns a sparse representation using scipy's `coo_matrix` - format. Only used if log is set to True. Due to implementation details, - this function runs faster when dense is set to False. - log: boolean, optional (default=False) - If True, returns a dictionary containing the transportation matrix. - Otherwise returns only the loss. - - Returns - ------- - loss: float - Cost associated to the optimal transportation - log: dict - If input log is True, a dictionary containing the Optimal transportation - matrix for the given parameters - - - Examples - -------- - - Simple example with obvious solution. The function emd2_1d accepts lists and - performs automatic conversion to numpy arrays - - >>> import ot - >>> a=[.5, .5] - >>> b=[.5, .5] - >>> x_a = [2., 0.] - >>> x_b = [0., 3.] - >>> ot.emd2_1d(x_a, x_b, a, b) - 0.5 - >>> ot.emd2_1d(x_a, x_b) - 0.5 - - References - ---------- - - .. [1] Peyré, G., & Cuturi, M. (2017). "Computational Optimal - Transport", 2018. - - See Also - -------- - ot.lp.emd2 : EMD for multidimensional distributions - ot.lp.emd_1d : EMD for 1d distributions (returns the transportation matrix - instead of the cost) - """ - # If we do not return G (log==False), then we should not to cast it to dense - # (useless overhead) - G, log_emd = emd_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric=metric, p=p, - dense=dense and log, log=True) - cost = log_emd['cost'] - if log: - log_emd = {'G': G} - return cost, log_emd - return cost - - -def wasserstein_1d(x_a, x_b, a=None, b=None, p=1.): - r"""Solves the p-Wasserstein distance problem between 1d measures and returns - the distance - - .. math:: - \min_\gamma \left( \sum_i \sum_j \gamma_{ij} \|x_a[i] - x_b[j]\|^p \right)^{1/p} - - s.t. \gamma 1 = a, - \gamma^T 1= b, - \gamma\geq 0 - - where : - - - x_a and x_b are the samples - - a and b are the sample weights - - Uses the algorithm detailed in [1]_ - - Parameters - ---------- - x_a : (ns,) or (ns, 1) ndarray, float64 - Source dirac locations (on the real line) - x_b : (nt,) or (ns, 1) ndarray, float64 - Target dirac locations (on the real line) - a : (ns,) ndarray, float64, optional - Source histogram (default is uniform weight) - b : (nt,) ndarray, float64, optional - Target histogram (default is uniform weight) - p: float, optional (default=1.0) - The order of the p-Wasserstein distance to be computed - - Returns - ------- - dist: float - p-Wasserstein distance - - - Examples - -------- - - Simple example with obvious solution. The function wasserstein_1d accepts - lists and performs automatic conversion to numpy arrays - - >>> import ot - >>> a=[.5, .5] - >>> b=[.5, .5] - >>> x_a = [2., 0.] - >>> x_b = [0., 3.] - >>> ot.wasserstein_1d(x_a, x_b, a, b) - 0.5 - >>> ot.wasserstein_1d(x_a, x_b) - 0.5 - - References - ---------- - - .. [1] Peyré, G., & Cuturi, M. (2017). "Computational Optimal - Transport", 2018. - - See Also - -------- - ot.lp.emd_1d : EMD for 1d distributions - """ - cost_emd = emd2_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p, - dense=False, log=False) - return np.power(cost_emd, 1. / p) diff --git a/ot/lp/cvx.py b/ot/lp/cvx.py index 8e763be..869d450 100644 --- a/ot/lp/cvx.py +++ b/ot/lp/cvx.py @@ -27,7 +27,7 @@ def scipy_sparse_to_spmatrix(A): def barycenter(A, M, weights=None, verbose=False, log=False, solver='interior-point'): - """Compute the Wasserstein barycenter of distributions A + r"""Compute the Wasserstein barycenter of distributions A The function solves the following optimization problem [16]: @@ -76,7 +76,6 @@ def barycenter(A, M, weights=None, verbose=False, log=False, solver='interior-po .. [16] Agueh, M., & Carlier, G. (2011). Barycenters in the Wasserstein space. SIAM Journal on Mathematical Analysis, 43(2), 904-924. - """ if weights is None: diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index c167964..42e08f4 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -20,6 +20,7 @@ import warnings cdef extern from "EMD.h": int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter) nogil + int EMD_wrap_omp(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter, int numThreads) nogil cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED @@ -38,7 +39,7 @@ def check_result(result_code): @cython.boundscheck(False) @cython.wraparound(False) -def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter): +def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter, int numThreads): """ Solves the Earth Movers distance problem and returns the optimal transport matrix @@ -97,8 +98,6 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([0, 0]) cdef np.ndarray[double, ndim=1, mode="c"] Gv=np.zeros(0) - cdef np.ndarray[long, ndim=1, mode="c"] iG=np.zeros(0,dtype=np.int) - cdef np.ndarray[long, ndim=1, mode="c"] jG=np.zeros(0,dtype=np.int) if not len(a): a=np.ones((n1,))/n1 @@ -111,8 +110,10 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod # calling the function with nogil: - result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter) - + if numThreads == 1: + result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter) + else: + result_code = EMD_wrap_omp(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter, numThreads) return G, cost, alpha, beta, result_code @@ -157,22 +158,22 @@ def emd_1d_sorted(np.ndarray[double, ndim=1, mode="c"] u_weights, cost associated to the optimal transportation """ cdef double cost = 0. - cdef int n = u_weights.shape[0] - cdef int m = v_weights.shape[0] + cdef Py_ssize_t n = u_weights.shape[0] + cdef Py_ssize_t m = v_weights.shape[0] - cdef int i = 0 + cdef Py_ssize_t i = 0 cdef double w_i = u_weights[0] - cdef int j = 0 + cdef Py_ssize_t j = 0 cdef double w_j = v_weights[0] cdef double m_ij = 0. cdef np.ndarray[double, ndim=1, mode="c"] G = np.zeros((n + m - 1, ), dtype=np.float64) - cdef np.ndarray[long, ndim=2, mode="c"] indices = np.zeros((n + m - 1, 2), - dtype=np.int) - cdef int cur_idx = 0 - while i < n and j < m: + cdef np.ndarray[long long, ndim=2, mode="c"] indices = np.zeros((n + m - 1, 2), + dtype=np.int64) + cdef Py_ssize_t cur_idx = 0 + while True: if metric == 'sqeuclidean': m_ij = (u[i] - v[j]) * (u[i] - v[j]) elif metric == 'cityblock' or metric == 'euclidean': @@ -188,6 +189,8 @@ def emd_1d_sorted(np.ndarray[double, ndim=1, mode="c"] u_weights, indices[cur_idx, 0] = i indices[cur_idx, 1] = j i += 1 + if i == n: + break w_j -= w_i w_i = u_weights[i] else: @@ -196,7 +199,10 @@ def emd_1d_sorted(np.ndarray[double, ndim=1, mode="c"] u_weights, indices[cur_idx, 0] = i indices[cur_idx, 1] = j j += 1 + if j == m: + break w_i -= w_j w_j = v_weights[j] cur_idx += 1 + cur_idx += 1 return G[:cur_idx], indices[:cur_idx], cost diff --git a/ot/lp/full_bipartitegraph.h b/ot/lp/full_bipartitegraph.h index 87a1bec..713ccb5 100644 --- a/ot/lp/full_bipartitegraph.h +++ b/ot/lp/full_bipartitegraph.h @@ -23,10 +23,10 @@ * */ -#ifndef LEMON_FULL_BIPARTITE_GRAPH_H -#define LEMON_FULL_BIPARTITE_GRAPH_H +#pragma once #include "core.h" +#include <cstdint> ///\ingroup graphs ///\file @@ -44,16 +44,16 @@ namespace lemon { //class Node; typedef int Node; //class Arc; - typedef long long Arc; + typedef int64_t Arc; protected: int _node_num; - long long _arc_num; + int64_t _arc_num; FullBipartiteDigraphBase() {} - void construct(int n1, int n2) { _node_num = n1+n2; _arc_num = n1 * n2; _n1=n1; _n2=n2;} + void construct(int n1, int n2) { _node_num = n1+n2; _arc_num = (int64_t)n1 * (int64_t)n2; _n1=n1; _n2=n2;} public: @@ -65,25 +65,25 @@ namespace lemon { Arc arc(const Node& s, const Node& t) const { if (s<_n1 && t>=_n1) - return Arc(s * _n2 + (t-_n1) ); + return Arc((int64_t)s * (int64_t)_n2 + (int64_t)(t-_n1) ); else return Arc(-1); } int nodeNum() const { return _node_num; } - long long arcNum() const { return _arc_num; } + int64_t arcNum() const { return _arc_num; } int maxNodeId() const { return _node_num - 1; } - long long maxArcId() const { return _arc_num - 1; } + int64_t maxArcId() const { return _arc_num - 1; } Node source(Arc arc) const { return arc / _n2; } Node target(Arc arc) const { return (arc % _n2) + _n1; } static int id(Node node) { return node; } - static long long id(Arc arc) { return arc; } + static int64_t id(Arc arc) { return arc; } static Node nodeFromId(int id) { return Node(id);} - static Arc arcFromId(int id) { return Arc(id);} + static Arc arcFromId(int64_t id) { return Arc(id);} Arc findArc(Node s, Node t, Arc prev = -1) const { @@ -136,7 +136,7 @@ namespace lemon { /// /// \brief A directed full graph class. /// - /// FullBipartiteDigraph is a simple and fast implmenetation of directed full + /// FullBipartiteDigraph is a simple and fast implementation of directed full /// (complete) graphs. It contains an arc from each node to each node /// (including a loop for each node), therefore the number of arcs /// is the square of the number of nodes. @@ -203,13 +203,10 @@ namespace lemon { /// \brief Number of nodes. int nodeNum() const { return Parent::nodeNum(); } /// \brief Number of arcs. - long long arcNum() const { return Parent::arcNum(); } + int64_t arcNum() const { return Parent::arcNum(); } }; } //namespace lemon - - -#endif //LEMON_FULL_GRAPH_H diff --git a/ot/lp/full_bipartitegraph_omp.h b/ot/lp/full_bipartitegraph_omp.h new file mode 100644 index 0000000..8cbed0b --- /dev/null +++ b/ot/lp/full_bipartitegraph_omp.h @@ -0,0 +1,234 @@ +/* -*- mode: C++; indent-tabs-mode: nil; -*- + * + * This file has been adapted by Nicolas Bonneel (2013), + * from full_graph.h from LEMON, a generic C++ optimization library, + * to implement a lightweight fully connected bipartite graph. A previous + * version of this file is used as part of the Displacement Interpolation + * project, + * Web: http://www.cs.ubc.ca/labs/imager/tr/2011/DisplacementInterpolation/ + * + * + **** Original file Copyright Notice : + * Copyright (C) 2003-2010 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#pragma once + +#include <cstdint> + +///\ingroup graphs +///\file +///\brief FullBipartiteDigraph and FullBipartiteGraph classes. + + +namespace lemon_omp { + + ///This \c \#define creates convenient type definitions for the following + ///types of \c Digraph: \c Node, \c NodeIt, \c Arc, \c ArcIt, \c InArcIt, + ///\c OutArcIt, \c BoolNodeMap, \c IntNodeMap, \c DoubleNodeMap, + ///\c BoolArcMap, \c IntArcMap, \c DoubleArcMap. + /// + ///\note If the graph type is a dependent type, ie. the graph type depend + ///on a template parameter, then use \c TEMPLATE_DIGRAPH_TYPEDEFS() + ///macro. +#define DIGRAPH_TYPEDEFS(Digraph) \ + typedef Digraph::Node Node; \ + typedef Digraph::Arc Arc; \ + + + ///Create convenience typedefs for the digraph types and iterators + + ///\see DIGRAPH_TYPEDEFS + /// + ///\note Use this macro, if the graph type is a dependent type, + ///ie. the graph type depend on a template parameter. +#define TEMPLATE_DIGRAPH_TYPEDEFS(Digraph) \ + typedef typename Digraph::Node Node; \ + typedef typename Digraph::Arc Arc; \ + + + class FullBipartiteDigraphBase { + public: + + typedef FullBipartiteDigraphBase Digraph; + + //class Node; + typedef int Node; + //class Arc; + typedef int64_t Arc; + + protected: + + int _node_num; + int64_t _arc_num; + + FullBipartiteDigraphBase() {} + + void construct(int n1, int n2) { _node_num = n1+n2; _arc_num = (int64_t)n1 * (int64_t)n2; _n1=n1; _n2=n2;} + + public: + + int _n1, _n2; + + + Node operator()(int ix) const { return Node(ix); } + static int index(const Node& node) { return node; } + + Arc arc(const Node& s, const Node& t) const { + if (s<_n1 && t>=_n1) + return Arc((int64_t)s * (int64_t)_n2 + (int64_t)(t-_n1) ); + else + return Arc(-1); + } + + int nodeNum() const { return _node_num; } + int64_t arcNum() const { return _arc_num; } + + int maxNodeId() const { return _node_num - 1; } + int64_t maxArcId() const { return _arc_num - 1; } + + Node source(Arc arc) const { return arc / _n2; } + Node target(Arc arc) const { return (arc % _n2) + _n1; } + + static int id(Node node) { return node; } + static int64_t id(Arc arc) { return arc; } + + static Node nodeFromId(int id) { return Node(id);} + static Arc arcFromId(int64_t id) { return Arc(id);} + + + Arc findArc(Node s, Node t, Arc prev = -1) const { + return prev == -1 ? arc(s, t) : -1; + } + + void first(Node& node) const { + node = _node_num - 1; + } + + static void next(Node& node) { + --node; + } + + void first(Arc& arc) const { + arc = _arc_num - 1; + } + + static void next(Arc& arc) { + --arc; + } + + void firstOut(Arc& arc, const Node& node) const { + if (node>=_n1) + arc = -1; + else + arc = (node + 1) * _n2 - 1; + } + + void nextOut(Arc& arc) const { + if (arc % _n2 == 0) arc = 0; + --arc; + } + + void firstIn(Arc& arc, const Node& node) const { + if (node<_n1) + arc = -1; + else + arc = _arc_num + node - _node_num; + } + + void nextIn(Arc& arc) const { + arc -= _n2; + if (arc < 0) arc = -1; + } + + }; + + /// \ingroup graphs + /// + /// \brief A directed full graph class. + /// + /// FullBipartiteDigraph is a simple and fast implmenetation of directed full + /// (complete) graphs. It contains an arc from each node to each node + /// (including a loop for each node), therefore the number of arcs + /// is the square of the number of nodes. + /// This class is completely static and it needs constant memory space. + /// Thus you can neither add nor delete nodes or arcs, however + /// the structure can be resized using resize(). + /// + /// This type fully conforms to the \ref concepts::Digraph "Digraph concept". + /// Most of its member functions and nested classes are documented + /// only in the concept class. + /// + /// This class provides constant time counting for nodes and arcs. + /// + /// \note FullBipartiteDigraph and FullBipartiteGraph classes are very similar, + /// but there are two differences. While this class conforms only + /// to the \ref concepts::Digraph "Digraph" concept, FullBipartiteGraph + /// conforms to the \ref concepts::Graph "Graph" concept, + /// moreover FullBipartiteGraph does not contain a loop for each + /// node as this class does. + /// + /// \sa FullBipartiteGraph + class FullBipartiteDigraph : public FullBipartiteDigraphBase { + typedef FullBipartiteDigraphBase Parent; + + public: + + /// \brief Default constructor. + /// + /// Default constructor. The number of nodes and arcs will be zero. + FullBipartiteDigraph() { construct(0,0); } + + /// \brief Constructor + /// + /// Constructor. + /// \param n The number of the nodes. + FullBipartiteDigraph(int n1, int n2) { construct(n1, n2); } + + + /// \brief Returns the node with the given index. + /// + /// Returns the node with the given index. Since this structure is + /// completely static, the nodes can be indexed with integers from + /// the range <tt>[0..nodeNum()-1]</tt>. + /// The index of a node is the same as its ID. + /// \sa index() + Node operator()(int ix) const { return Parent::operator()(ix); } + + /// \brief Returns the index of the given node. + /// + /// Returns the index of the given node. Since this structure is + /// completely static, the nodes can be indexed with integers from + /// the range <tt>[0..nodeNum()-1]</tt>. + /// The index of a node is the same as its ID. + /// \sa operator()() + static int index(const Node& node) { return Parent::index(node); } + + /// \brief Returns the arc connecting the given nodes. + /// + /// Returns the arc connecting the given nodes. + /*Arc arc(Node u, Node v) const { + return Parent::arc(u, v); + }*/ + + /// \brief Number of nodes. + int nodeNum() const { return Parent::nodeNum(); } + /// \brief Number of arcs. + int64_t arcNum() const { return Parent::arcNum(); } + }; + + + + +} //namespace lemon_omp diff --git a/ot/lp/network_simplex_simple.h b/ot/lp/network_simplex_simple.h index 5d93040..3b46b9b 100644 --- a/ot/lp/network_simplex_simple.h +++ b/ot/lp/network_simplex_simple.h @@ -25,15 +25,17 @@ * */ -#ifndef LEMON_NETWORK_SIMPLEX_SIMPLE_H -#define LEMON_NETWORK_SIMPLEX_SIMPLE_H +#pragma once +#undef DEBUG_LVL #define DEBUG_LVL 0 #if DEBUG_LVL>0 #include <iomanip> #endif - +#undef EPSILON +#undef _EPSILON +#undef MAX_DEBUG_ITER #define EPSILON 2.2204460492503131e-15 #define _EPSILON 1e-8 #define MAX_DEBUG_ITER 100000 @@ -50,6 +52,7 @@ #include <vector> #include <limits> #include <algorithm> +#include <iostream> #include <cstdio> #ifdef HASHMAP #include <hash_map> @@ -63,6 +66,8 @@ //#include "sparse_array_n.h" #include "full_bipartitegraph.h" +#undef INVALIDNODE +#undef INVALID #define INVALIDNODE -1 #define INVALID (-1) @@ -76,16 +81,16 @@ namespace lemon { class SparseValueVector { public: - SparseValueVector(int n=0) + SparseValueVector(size_t n=0) { } - void resize(int n=0){}; - T operator[](const int id) const + void resize(size_t n=0){}; + T operator[](const size_t id) const { #ifdef HASHMAP - typename stdext::hash_map<int,T>::const_iterator it = data.find(id); + typename stdext::hash_map<size_t,T>::const_iterator it = data.find(id); #else - typename std::map<int,T>::const_iterator it = data.find(id); + typename std::map<size_t,T>::const_iterator it = data.find(id); #endif if (it==data.end()) return 0; @@ -93,16 +98,16 @@ namespace lemon { return it->second; } - ProxyObject<T> operator[](const int id) + ProxyObject<T> operator[](const size_t id) { return ProxyObject<T>( this, id ); } //private: #ifdef HASHMAP - stdext::hash_map<int,T> data; + stdext::hash_map<size_t,T> data; #else - std::map<int,T> data; + std::map<size_t,T> data; #endif }; @@ -110,7 +115,7 @@ namespace lemon { template <typename T> class ProxyObject { public: - ProxyObject( SparseValueVector<T> *v, int idx ){_v=v; _idx=idx;}; + ProxyObject( SparseValueVector<T> *v, size_t idx ){_v=v; _idx=idx;}; ProxyObject<T> & operator=( const T &v ) { // If we get here, we know that operator[] was called to perform a write access, // so we can insert an item in the vector if needed @@ -123,9 +128,9 @@ namespace lemon { // If we get here, we know that operator[] was called to perform a read access, // so we can simply return the existing object #ifdef HASHMAP - typename stdext::hash_map<int,T>::iterator it = _v->data.find(_idx); + typename stdext::hash_map<size_t,T>::iterator it = _v->data.find(_idx); #else - typename std::map<int,T>::iterator it = _v->data.find(_idx); + typename std::map<size_t,T>::iterator it = _v->data.find(_idx); #endif if (it==_v->data.end()) return 0; @@ -137,9 +142,9 @@ namespace lemon { { if (val==0) return; #ifdef HASHMAP - typename stdext::hash_map<int,T>::iterator it = _v->data.find(_idx); + typename stdext::hash_map<size_t,T>::iterator it = _v->data.find(_idx); #else - typename std::map<int,T>::iterator it = _v->data.find(_idx); + typename std::map<size_t,T>::iterator it = _v->data.find(_idx); #endif if (it==_v->data.end()) _v->data[_idx] = val; @@ -156,9 +161,9 @@ namespace lemon { { if (val==0) return; #ifdef HASHMAP - typename stdext::hash_map<int,T>::iterator it = _v->data.find(_idx); + typename stdext::hash_map<size_t,T>::iterator it = _v->data.find(_idx); #else - typename std::map<int,T>::iterator it = _v->data.find(_idx); + typename std::map<size_t,T>::iterator it = _v->data.find(_idx); #endif if (it==_v->data.end()) _v->data[_idx] = -val; @@ -173,7 +178,7 @@ namespace lemon { } SparseValueVector<T> *_v; - int _idx; + size_t _idx; }; @@ -204,7 +209,7 @@ namespace lemon { /// /// \tparam GR The digraph type the algorithm runs on. /// \tparam V The number type used for flow amounts, capacity bounds - /// and supply values in the algorithm. By default, it is \c int. + /// and supply values in the algorithm. By default, it is \c int64_t. /// \tparam C The number type used for costs and potentials in the /// algorithm. By default, it is the same as \c V. /// @@ -214,7 +219,7 @@ namespace lemon { /// \note %NetworkSimplexSimple provides five different pivot rule /// implementations, from which the most efficient one is used /// by default. For more information, see \ref PivotRule. - template <typename GR, typename V = int, typename C = V, typename NodesType = unsigned short int> + template <typename GR, typename V = int, typename C = V, typename NodesType = unsigned short int, typename ArcsType = int64_t> class NetworkSimplexSimple { public: @@ -228,7 +233,7 @@ namespace lemon { /// mixed order in the internal data structure. /// In special cases, it could lead to better overall performance, /// but it is usually slower. Therefore it is disabled by default. - NetworkSimplexSimple(const GR& graph, bool arc_mixing, int nbnodes, long long nb_arcs,int maxiters) : + NetworkSimplexSimple(const GR& graph, bool arc_mixing, int nbnodes, ArcsType nb_arcs, size_t maxiters) : _graph(graph), //_arc_id(graph), _arc_mixing(arc_mixing), _init_nb_nodes(nbnodes), _init_nb_arcs(nb_arcs), MAX(std::numeric_limits<Value>::max()), @@ -288,11 +293,11 @@ namespace lemon { private: - int max_iter; + size_t max_iter; TEMPLATE_DIGRAPH_TYPEDEFS(GR); typedef std::vector<int> IntVector; - typedef std::vector<NodesType> UHalfIntVector; + typedef std::vector<ArcsType> ArcVector; typedef std::vector<Value> ValueVector; typedef std::vector<Cost> CostVector; // typedef SparseValueVector<Cost> CostVector; @@ -315,9 +320,9 @@ namespace lemon { // Data related to the underlying digraph const GR &_graph; int _node_num; - int _arc_num; - int _all_arc_num; - int _search_arc_num; + ArcsType _arc_num; + ArcsType _all_arc_num; + ArcsType _search_arc_num; // Parameters of the problem SupplyType _stype; @@ -325,9 +330,9 @@ namespace lemon { inline int _node_id(int n) const {return _node_num-n-1;} ; - //IntArcMap _arc_id; - UHalfIntVector _source; - UHalfIntVector _target; +// IntArcMap _arc_id; + IntVector _source; // keep nodes as integers + IntVector _target; bool _arc_mixing; public: // Node and arc data @@ -341,7 +346,7 @@ namespace lemon { private: // Data for storing the spanning tree structure IntVector _parent; - IntVector _pred; + ArcVector _pred; IntVector _thread; IntVector _rev_thread; IntVector _succ_num; @@ -349,17 +354,17 @@ namespace lemon { IntVector _dirty_revs; BoolVector _forward; StateVector _state; - int _root; + ArcsType _root; // Temporary data used in the current pivot iteration - int in_arc, join, u_in, v_in, u_out, v_out; - int first, second, right, last; - int stem, par_stem, new_stem; + ArcsType in_arc, join, u_in, v_in, u_out, v_out; + ArcsType first, second, right, last; + ArcsType stem, par_stem, new_stem; Value delta; const Value MAX; - int mixingCoeff; + ArcsType mixingCoeff; public: @@ -373,27 +378,27 @@ namespace lemon { private: // thank you to DVK and MizardX from StackOverflow for this function! - inline int sequence(int k) const { - int smallv = (k > num_total_big_subsequence_numbers) & 1; + inline ArcsType sequence(ArcsType k) const { + ArcsType smallv = (k > num_total_big_subsequence_numbers) & 1; k -= num_total_big_subsequence_numbers * smallv; - int subsequence_length2 = subsequence_length- smallv; - int subsequence_num = (k / subsequence_length2) + num_big_subseqiences * smallv; - int subsequence_offset = (k % subsequence_length2) * mixingCoeff; + ArcsType subsequence_length2 = subsequence_length- smallv; + ArcsType subsequence_num = (k / subsequence_length2) + num_big_subseqiences * smallv; + ArcsType subsequence_offset = (k % subsequence_length2) * mixingCoeff; return subsequence_offset + subsequence_num; } - int subsequence_length; - int num_big_subseqiences; - int num_total_big_subsequence_numbers; + ArcsType subsequence_length; + ArcsType num_big_subseqiences; + ArcsType num_total_big_subsequence_numbers; - inline int getArcID(const Arc &arc) const + inline ArcsType getArcID(const Arc &arc) const { //int n = _arc_num-arc._id-1; - int n = _arc_num-GR::id(arc)-1; + ArcsType n = _arc_num-GR::id(arc)-1; - //int a = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff; - //int b = _arc_id[arc]; + //ArcsType a = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff; + //ArcsType b = _arc_id[arc]; if (_arc_mixing) return sequence(n); else @@ -401,16 +406,16 @@ namespace lemon { } // finally unused because too slow - inline int getSource(const int arc) const + inline ArcsType getSource(const ArcsType arc) const { - //int a = _source[arc]; + //ArcsType a = _source[arc]; //return a; - int n = _arc_num-arc-1; + ArcsType n = _arc_num-arc-1; if (_arc_mixing) n = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff; - int b; + ArcsType b; if (n>=0) b = _node_id(_graph.source(GR::arcFromId( n ) )); else @@ -436,17 +441,17 @@ namespace lemon { private: // References to the NetworkSimplexSimple class - const UHalfIntVector &_source; - const UHalfIntVector &_target; + const IntVector &_source; + const IntVector &_target; const CostVector &_cost; const StateVector &_state; const CostVector &_pi; - int &_in_arc; - int _search_arc_num; + ArcsType &_in_arc; + ArcsType _search_arc_num; // Pivot rule data - int _block_size; - int _next_arc; + ArcsType _block_size; + ArcsType _next_arc; NetworkSimplexSimple &_ns; public: @@ -460,17 +465,16 @@ namespace lemon { { // The main parameters of the pivot rule const double BLOCK_SIZE_FACTOR = 1.0; - const int MIN_BLOCK_SIZE = 10; + const ArcsType MIN_BLOCK_SIZE = 10; - _block_size = std::max( int(BLOCK_SIZE_FACTOR * - std::sqrt(double(_search_arc_num))), - MIN_BLOCK_SIZE ); + _block_size = std::max(ArcsType(BLOCK_SIZE_FACTOR * std::sqrt(double(_search_arc_num))), MIN_BLOCK_SIZE); } + // Find next entering arc bool findEnteringArc() { Cost c, min = 0; - int e; - int cnt = _block_size; + ArcsType e; + ArcsType cnt = _block_size; double a; for (e = _next_arc; e != _search_arc_num; ++e) { c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); @@ -516,7 +520,7 @@ namespace lemon { int _init_nb_nodes; - long long _init_nb_arcs; + ArcsType _init_nb_arcs; /// \name Parameters /// The parameters of the algorithm can be specified using these @@ -736,7 +740,7 @@ namespace lemon { for (int i = 0; i != _node_num; ++i) { _supply[i] = 0; } - for (int i = 0; i != _arc_num; ++i) { + for (ArcsType i = 0; i != _arc_num; ++i) { _cost[i] = 1; } _stype = GEQ; @@ -745,7 +749,7 @@ namespace lemon { - int divid (int x, int y) + int64_t divid (int64_t x, int64_t y) { return (x-x%y)/y; } @@ -775,7 +779,7 @@ namespace lemon { _node_num = _init_nb_nodes; _arc_num = _init_nb_arcs; int all_node_num = _node_num + 1; - int max_arc_num = _arc_num + 2 * _node_num; + ArcsType max_arc_num = _arc_num + 2 * _node_num; _source.resize(max_arc_num); _target.resize(max_arc_num); @@ -798,13 +802,13 @@ namespace lemon { //_arc_mixing=false; if (_arc_mixing) { // Store the arcs in a mixed order - int k = std::max(int(std::sqrt(double(_arc_num))), 10); + const ArcsType k = std::max(ArcsType(std::sqrt(double(_arc_num))), ArcsType(10)); mixingCoeff = k; subsequence_length = _arc_num / mixingCoeff + 1; num_big_subseqiences = _arc_num % mixingCoeff; num_total_big_subsequence_numbers = subsequence_length * num_big_subseqiences; - int i = 0, j = 0; + ArcsType i = 0, j = 0; Arc a; _graph.first(a); for (; a != INVALID; _graph.next(a)) { _source[i] = _node_id(_graph.source(a)); @@ -814,7 +818,7 @@ namespace lemon { } } else { // Store the arcs in the original order - int i = 0; + ArcsType i = 0; Arc a; _graph.first(a); for (; a != INVALID; _graph.next(a), ++i) { _source[i] = _node_id(_graph.source(a)); @@ -856,7 +860,7 @@ namespace lemon { Number totalCost() const { Number c = 0; for (ArcIt a(_graph); a != INVALID; ++a) { - int i = getArcID(a); + int64_t i = getArcID(a); c += Number(_flow[i]) * Number(_cost[i]); } return c; @@ -867,15 +871,15 @@ namespace lemon { Number c = 0; /*#ifdef HASHMAP - typename stdext::hash_map<int, Value>::const_iterator it; + typename stdext::hash_map<int64_t, Value>::const_iterator it; #else - typename std::map<int, Value>::const_iterator it; + typename std::map<int64_t, Value>::const_iterator it; #endif for (it = _flow.data.begin(); it!=_flow.data.end(); ++it) c += Number(it->second) * Number(_cost[it->first]); return c;*/ - for (unsigned long i=0; i<_flow.size(); i++) + for (ArcsType i=0; i<_flow.size(); i++) c += _flow[i] * Number(_cost[i]); return c; @@ -944,14 +948,14 @@ namespace lemon { // Initialize internal data structures bool init() { if (_node_num == 0) return false; - + // Check the sum of supply values _sum_supply = 0; for (int i = 0; i != _node_num; ++i) { _sum_supply += _supply[i]; } if ( fabs(_sum_supply) > _EPSILON ) return false; - + _sum_supply = 0; // Initialize artifical cost @@ -960,14 +964,14 @@ namespace lemon { ART_COST = std::numeric_limits<Cost>::max() / 2 + 1; } else { ART_COST = 0; - for (int i = 0; i != _arc_num; ++i) { + for (ArcsType i = 0; i != _arc_num; ++i) { if (_cost[i] > ART_COST) ART_COST = _cost[i]; } ART_COST = (ART_COST + 1) * _node_num; } // Initialize arc maps - for (int i = 0; i != _arc_num; ++i) { + for (ArcsType i = 0; i != _arc_num; ++i) { //_flow[i] = 0; //by default, the sparse matrix is empty _state[i] = STATE_LOWER; } @@ -988,7 +992,7 @@ namespace lemon { // EQ supply constraints _search_arc_num = _arc_num; _all_arc_num = _arc_num + _node_num; - for (int u = 0, e = _arc_num; u != _node_num; ++u, ++e) { + for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) { _parent[u] = _root; _pred[u] = e; _thread[u] = u + 1; @@ -1016,8 +1020,8 @@ namespace lemon { else if (_sum_supply > 0) { // LEQ supply constraints _search_arc_num = _arc_num + _node_num; - int f = _arc_num + _node_num; - for (int u = 0, e = _arc_num; u != _node_num; ++u, ++e) { + ArcsType f = _arc_num + _node_num; + for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) { _parent[u] = _root; _thread[u] = u + 1; _rev_thread[u + 1] = u; @@ -1054,8 +1058,8 @@ namespace lemon { else { // GEQ supply constraints _search_arc_num = _arc_num + _node_num; - int f = _arc_num + _node_num; - for (int u = 0, e = _arc_num; u != _node_num; ++u, ++e) { + ArcsType f = _arc_num + _node_num; + for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) { _parent[u] = _root; _thread[u] = u + 1; _rev_thread[u + 1] = u; @@ -1120,9 +1124,9 @@ namespace lemon { second = _source[in_arc]; } delta = INF; - int result = 0; + char result = 0; Value d; - int e; + ArcsType e; // Search the cycle along the path form the first node to the root for (int u = first; u != join; u = _parent[u]) { @@ -1239,7 +1243,7 @@ namespace lemon { // Update _rev_thread using the new _thread values for (int i = 0; i != int(_dirty_revs.size()); ++i) { - u = _dirty_revs[i]; + int u = _dirty_revs[i]; _rev_thread[_thread[u]] = u; } @@ -1257,7 +1261,7 @@ namespace lemon { u = w; } _pred[u_in] = in_arc; - _forward[u_in] = ((unsigned int)u_in == _source[in_arc]); + _forward[u_in] = (u_in == _source[in_arc]); _succ_num[u_in] = old_succ_num; // Set limits for updating _last_succ form v_in and v_out @@ -1328,7 +1332,7 @@ namespace lemon { if (_sum_supply > 0) total -= _sum_supply; if (total <= 0) return true; - IntVector arc_vector; + ArcVector arc_vector; if (_sum_supply >= 0) { if (supply_nodes.size() == 1 && demand_nodes.size() == 1) { // Perform a reverse graph search from the sink to the source @@ -1345,7 +1349,7 @@ namespace lemon { Arc a; _graph.firstIn(a, v); for (; a != INVALID; _graph.nextIn(a)) { if (reached[u = _graph.source(a)]) continue; - int j = getArcID(a); + ArcsType j = getArcID(a); if (INF >= total) { arc_vector.push_back(j); reached[u] = true; @@ -1355,7 +1359,7 @@ namespace lemon { } } else { // Find the min. cost incomming arc for each demand node - for (int i = 0; i != int(demand_nodes.size()); ++i) { + for (int i = 0; i != demand_nodes.size(); ++i) { Node v = demand_nodes[i]; Cost c, min_cost = std::numeric_limits<Cost>::max(); Arc min_arc = INVALID; @@ -1393,7 +1397,7 @@ namespace lemon { } // Perform heuristic initial pivots - for (int i = 0; i != int(arc_vector.size()); ++i) { + for (ArcsType i = 0; i != arc_vector.size(); ++i) { in_arc = arc_vector[i]; // l'erreur est probablement ici... if (_state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] - @@ -1423,7 +1427,7 @@ namespace lemon { // Perform heuristic initial pivots if (!initialPivots()) return UNBOUNDED; - int iter_number=0; + size_t iter_number=0; //pivot.setDantzig(true); // Execute the Network Simplex algorithm while (pivot.findEnteringArc()) { @@ -1443,7 +1447,7 @@ namespace lemon { double a; a= (fabs(_pi[_source[in_arc]])>=fabs(_pi[_target[in_arc]])) ? fabs(_pi[_source[in_arc]]) : fabs(_pi[_target[in_arc]]); a=a>=fabs(_cost[in_arc])?a:fabs(_cost[in_arc]); - for (int i=0; i<_flow.size(); i++) { + for (int64_t i=0; i<_flow.size(); i++) { sumFlow+=_state[i]*_flow[i]; } std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << iter_number << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n"; @@ -1482,12 +1486,12 @@ namespace lemon { double a; a= (fabs(_pi[_source[in_arc]])>=fabs(_pi[_target[in_arc]])) ? fabs(_pi[_source[in_arc]]) : fabs(_pi[_target[in_arc]]); a=a>=fabs(_cost[in_arc])?a:fabs(_cost[in_arc]); - for (int i=0; i<_flow.size(); i++) { + for (int64_t i=0; i<_flow.size(); i++) { sumFlow+=_state[i]*_flow[i]; } - + std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << niter << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n"; - + std::cout << "Arc in = (" << _node_id(_source[in_arc]) << ", " << _node_id(_target[in_arc]) <<")\n"; std::cout << "Supplies = (" << _supply[_source[in_arc]] << ", " << _supply[_target[in_arc]] << ")\n"; @@ -1505,9 +1509,9 @@ namespace lemon { #endif // Check feasibility if( retVal == OPTIMAL){ - for (int e = _search_arc_num; e != _all_arc_num; ++e) { + for (ArcsType e = _search_arc_num; e != _all_arc_num; ++e) { if (_flow[e] != 0){ - if (abs(_flow[e]) > EPSILON) + if (fabs(_flow[e]) > _EPSILON) // change of the original code following issue #126 return INFEASIBLE; else _flow[e]=0; @@ -1521,20 +1525,20 @@ namespace lemon { if (_sum_supply == 0) { if (_stype == GEQ) { Cost max_pot = -std::numeric_limits<Cost>::max(); - for (int i = 0; i != _node_num; ++i) { + for (ArcsType i = 0; i != _node_num; ++i) { if (_pi[i] > max_pot) max_pot = _pi[i]; } if (max_pot > 0) { - for (int i = 0; i != _node_num; ++i) + for (ArcsType i = 0; i != _node_num; ++i) _pi[i] -= max_pot; } } else { Cost min_pot = std::numeric_limits<Cost>::max(); - for (int i = 0; i != _node_num; ++i) { + for (ArcsType i = 0; i != _node_num; ++i) { if (_pi[i] < min_pot) min_pot = _pi[i]; } if (min_pot < 0) { - for (int i = 0; i != _node_num; ++i) + for (ArcsType i = 0; i != _node_num; ++i) _pi[i] -= min_pot; } } @@ -1548,5 +1552,3 @@ namespace lemon { ///@} } //namespace lemon - -#endif //LEMON_NETWORK_SIMPLEX_H diff --git a/ot/lp/network_simplex_simple_omp.h b/ot/lp/network_simplex_simple_omp.h new file mode 100644 index 0000000..87e4c05 --- /dev/null +++ b/ot/lp/network_simplex_simple_omp.h @@ -0,0 +1,1699 @@ +/* -*- mode: C++; indent-tabs-mode: nil; -*- +* +* +* This file has been adapted by Nicolas Bonneel (2013), +* from network_simplex.h from LEMON, a generic C++ optimization library, +* to implement a lightweight network simplex for mass transport, more +* memory efficient than the original file. A previous version of this file +* is used as part of the Displacement Interpolation project, +* Web: http://www.cs.ubc.ca/labs/imager/tr/2011/DisplacementInterpolation/ +* +* Revisions: +* March 2015: added OpenMP parallelization +* March 2017: included Antoine Rolet's trick to make it more robust +* April 2018: IMPORTANT bug fix + uses 64bit integers (slightly slower but less risks of overflows), updated to a newer version of the algo by LEMON, sparse flow by default + minor edits. +* +* +**** Original file Copyright Notice : +* +* Copyright (C) 2003-2010 +* Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport +* (Egervary Research Group on Combinatorial Optimization, EGRES). +* +* Permission to use, modify and distribute this software is granted +* provided that this copyright notice appears in all copies. For +* precise terms see the accompanying LICENSE file. +* +* This software is provided "AS IS" with no warranty of any kind, +* express or implied, and with no claim as to its suitability for any +* purpose. +* +*/ + +#pragma once +#undef DEBUG_LVL +#define DEBUG_LVL 0 + +#if DEBUG_LVL>0 +#include <iomanip> +#endif + +#undef EPSILON +#undef _EPSILON +#undef MAX_DEBUG_ITER +#define EPSILON std::numeric_limits<Cost>::epsilon()*10 +#define _EPSILON 1e-8 +#define MAX_DEBUG_ITER 100000 + +/// \ingroup min_cost_flow_algs +/// +/// \file +/// \brief Network Simplex algorithm for finding a minimum cost flow. + +// if your compiler has troubles with unorderedmaps, just comment the following line to use a slower std::map instead +#define HASHMAP // now handled with unorderedmaps instead of stdext::hash_map. Should be better supported. + +#define SPARSE_FLOW // a sparse flow vector will be 10-15% slower for small problems but uses less memory and becomes faster for large problems (40k total nodes) + +#include <vector> +#include <limits> +#include <algorithm> +#include <iostream> +#ifdef HASHMAP +#include <unordered_map> +#else +#include <map> +#endif +//#include "core.h" +//#include "lmath.h" + +#ifdef OMP +#include <omp.h> +#endif +#include <cmath> + + +//#include "sparse_array_n.h" +#include "full_bipartitegraph_omp.h" + +#undef INVALIDNODE +#undef INVALID +#define INVALIDNODE -1 +#define INVALID (-1) + +namespace lemon_omp { + + int64_t max_threads = -1; + + template <typename T> + class ProxyObject; + + template<typename T> + class SparseValueVector + { + public: + SparseValueVector(size_t n = 0) // parameter n for compatibility with standard vectors + { + } + void resize(size_t n = 0) {}; + T operator[](const size_t id) const + { +#ifdef HASHMAP + typename std::unordered_map<size_t, T>::const_iterator it = data.find(id); +#else + typename std::map<size_t, T>::const_iterator it = data.find(id); +#endif + if (it == data.end()) + return 0; + else + return it->second; + } + + ProxyObject<T> operator[](const size_t id) + { + return ProxyObject<T>(this, id); + } + + //private: +#ifdef HASHMAP + std::unordered_map<size_t, T> data; +#else + std::map<size_t, T> data; +#endif + + }; + + template <typename T> + class ProxyObject { + public: + ProxyObject(SparseValueVector<T> *v, size_t idx) { _v = v; _idx = idx; }; + ProxyObject<T> & operator=(const T &v) { + // If we get here, we know that operator[] was called to perform a write access, + // so we can insert an item in the vector if needed + if (v != 0) + _v->data[_idx] = v; + return *this; + } + + operator T() { + // If we get here, we know that operator[] was called to perform a read access, + // so we can simply return the existing object +#ifdef HASHMAP + typename std::unordered_map<size_t, T>::iterator it = _v->data.find(_idx); +#else + typename std::map<size_t, T>::iterator it = _v->data.find(_idx); +#endif + if (it == _v->data.end()) + return 0; + else + return it->second; + } + + void operator+=(T val) + { + if (val == 0) return; +#ifdef HASHMAP + typename std::unordered_map<size_t, T>::iterator it = _v->data.find(_idx); +#else + typename std::map<size_t, T>::iterator it = _v->data.find(_idx); +#endif + if (it == _v->data.end()) + _v->data[_idx] = val; + else + { + T sum = it->second + val; + if (sum == 0) + _v->data.erase(it); + else + it->second = sum; + } + } + void operator-=(T val) + { + if (val == 0) return; +#ifdef HASHMAP + typename std::unordered_map<size_t, T>::iterator it = _v->data.find(_idx); +#else + typename std::map<size_t, T>::iterator it = _v->data.find(_idx); +#endif + if (it == _v->data.end()) + _v->data[_idx] = -val; + else + { + T sum = it->second - val; + if (sum == 0) + _v->data.erase(it); + else + it->second = sum; + } + } + + SparseValueVector<T> *_v; + size_t _idx; + }; + + + + /// \addtogroup min_cost_flow_algs + /// @{ + + /// \brief Implementation of the primal Network Simplex algorithm + /// for finding a \ref min_cost_flow "minimum cost flow". + /// + /// \ref NetworkSimplexSimple implements the primal Network Simplex algorithm + /// for finding a \ref min_cost_flow "minimum cost flow" + /// \ref amo93networkflows, \ref dantzig63linearprog, + /// \ref kellyoneill91netsimplex. + /// This algorithm is a highly efficient specialized version of the + /// linear programming simplex method directly for the minimum cost + /// flow problem. + /// + /// In general, %NetworkSimplexSimple is the fastest implementation available + /// in LEMON for this problem. + /// Moreover, it supports both directions of the supply/demand inequality + /// constraints. For more information, see \ref SupplyType. + /// + /// Most of the parameters of the problem (except for the digraph) + /// can be given using separate functions, and the algorithm can be + /// executed using the \ref run() function. If some parameters are not + /// specified, then default values will be used. + /// + /// \tparam GR The digraph type the algorithm runs on. + /// \tparam V The number type used for flow amounts, capacity bounds + /// and supply values in the algorithm. By default, it is \c int. + /// \tparam C The number type used for costs and potentials in the + /// algorithm. By default, it is the same as \c V. + /// + /// \warning Both number types must be signed and all input data must + /// be integer. + /// + /// \note %NetworkSimplexSimple provides five different pivot rule + /// implementations, from which the most efficient one is used + /// by default. For more information, see \ref PivotRule. + template <typename GR, typename V = int, typename C = V, typename ArcsType = int64_t> + class NetworkSimplexSimple + { + public: + + /// \brief Constructor. + /// + /// The constructor of the class. + /// + /// \param graph The digraph the algorithm runs on. + /// \param arc_mixing Indicate if the arcs have to be stored in a + /// mixed order in the internal data structure. + /// In special cases, it could lead to better overall performance, + /// but it is usually slower. Therefore it is disabled by default. + NetworkSimplexSimple(const GR& graph, bool arc_mixing, int nbnodes, ArcsType nb_arcs, size_t maxiters = 0, int numThreads=-1) : + _graph(graph), //_arc_id(graph), + _arc_mixing(arc_mixing), _init_nb_nodes(nbnodes), _init_nb_arcs(nb_arcs), + MAX(std::numeric_limits<Value>::max()), + INF(std::numeric_limits<Value>::has_infinity ? + std::numeric_limits<Value>::infinity() : MAX) + { + // Reset data structures + reset(); + max_iter = maxiters; +#ifdef OMP + if (max_threads < 0) { + max_threads = omp_get_max_threads(); + } + if (numThreads > 0 && numThreads<=max_threads){ + num_threads = numThreads; + } else if (numThreads == -1 || numThreads>max_threads) { + num_threads = max_threads; + } else { + num_threads = 1; + } + omp_set_num_threads(num_threads); +#else + num_threads = 1; +#endif + } + + /// The type of the flow amounts, capacity bounds and supply values + typedef V Value; + /// The type of the arc costs + typedef C Cost; + + public: + /// \brief Problem type constants for the \c run() function. + /// + /// Enum type containing the problem type constants that can be + /// returned by the \ref run() function of the algorithm. + enum ProblemType { + /// The problem has no feasible solution (flow). + INFEASIBLE, + /// The problem has optimal solution (i.e. it is feasible and + /// bounded), and the algorithm has found optimal flow and node + /// potentials (primal and dual solutions). + OPTIMAL, + /// The objective function of the problem is unbounded, i.e. + /// there is a directed cycle having negative total cost and + /// infinite upper bound. + UNBOUNDED, + // The maximum number of iteration has been reached + MAX_ITER_REACHED + }; + + /// \brief Constants for selecting the type of the supply constraints. + /// + /// Enum type containing constants for selecting the supply type, + /// i.e. the direction of the inequalities in the supply/demand + /// constraints of the \ref min_cost_flow "minimum cost flow problem". + /// + /// The default supply type is \c GEQ, the \c LEQ type can be + /// selected using \ref supplyType(). + /// The equality form is a special case of both supply types. + enum SupplyType { + /// This option means that there are <em>"greater or equal"</em> + /// supply/demand constraints in the definition of the problem. + GEQ, + /// This option means that there are <em>"less or equal"</em> + /// supply/demand constraints in the definition of the problem. + LEQ + }; + + + + private: + size_t max_iter; + int num_threads; + TEMPLATE_DIGRAPH_TYPEDEFS(GR); + + typedef std::vector<int> IntVector; + typedef std::vector<ArcsType> ArcVector; + typedef std::vector<Value> ValueVector; + typedef std::vector<Cost> CostVector; + // typedef SparseValueVector<Cost> CostVector; + typedef std::vector<char> BoolVector; + // Note: vector<char> is used instead of vector<bool> for efficiency reasons + + // State constants for arcs + enum ArcState { + STATE_UPPER = -1, + STATE_TREE = 0, + STATE_LOWER = 1 + }; + + typedef std::vector<signed char> StateVector; + // Note: vector<signed char> is used instead of vector<ArcState> for + // efficiency reasons + + private: + + // Data related to the underlying digraph + const GR &_graph; + int _node_num; + ArcsType _arc_num; + ArcsType _all_arc_num; + ArcsType _search_arc_num; + + // Parameters of the problem + SupplyType _stype; + Value _sum_supply; + + inline int _node_id(int n) const { return _node_num - n - 1; }; + + //IntArcMap _arc_id; + IntVector _source; // keep nodes as integers + IntVector _target; + bool _arc_mixing; + + // Node and arc data + CostVector _cost; + ValueVector _supply; +#ifdef SPARSE_FLOW + SparseValueVector<Value> _flow; +#else + ValueVector _flow; +#endif + + CostVector _pi; + + // Data for storing the spanning tree structure + IntVector _parent; + ArcVector _pred; + IntVector _thread; + IntVector _rev_thread; + IntVector _succ_num; + IntVector _last_succ; + IntVector _dirty_revs; + BoolVector _forward; + StateVector _state; + ArcsType _root; + + // Temporary data used in the current pivot iteration + ArcsType in_arc, join, u_in, v_in, u_out, v_out; + ArcsType first, second, right, last; + ArcsType stem, par_stem, new_stem; + Value delta; + + const Value MAX; + + ArcsType mixingCoeff; + + public: + + /// \brief Constant for infinite upper bounds (capacities). + /// + /// Constant for infinite upper bounds (capacities). + /// It is \c std::numeric_limits<Value>::infinity() if available, + /// \c std::numeric_limits<Value>::max() otherwise. + const Value INF; + + private: + + // thank you to DVK and MizardX from StackOverflow for this function! + inline ArcsType sequence(ArcsType k) const { + ArcsType smallv = (k > num_total_big_subsequence_numbers) & 1; + + k -= num_total_big_subsequence_numbers * smallv; + ArcsType subsequence_length2 = subsequence_length - smallv; + ArcsType subsequence_num = (k / subsequence_length2) + num_big_subsequences * smallv; + ArcsType subsequence_offset = (k % subsequence_length2) * mixingCoeff; + + return subsequence_offset + subsequence_num; + } + ArcsType subsequence_length; + ArcsType num_big_subsequences; + ArcsType num_total_big_subsequence_numbers; + + inline ArcsType getArcID(const Arc &arc) const + { + //int n = _arc_num-arc._id-1; + ArcsType n = _arc_num - GR::id(arc) - 1; + + //ArcsType a = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff; + //ArcsType b = _arc_id[arc]; + if (_arc_mixing) + return sequence(n); + else + return n; + } + + // finally unused because too slow + inline ArcsType getSource(const ArcsType arc) const + { + //ArcsType a = _source[arc]; + //return a; + + ArcsType n = _arc_num - arc - 1; + if (_arc_mixing) + n = mixingCoeff*(n%mixingCoeff) + n / mixingCoeff; + + ArcsType b; + if (n >= 0) + b = _node_id(_graph.source(GR::arcFromId(n))); + else + { + n = arc + 1 - _arc_num; + if (n <= _node_num) + b = _node_num; + else + if (n >= _graph._n1) + b = _graph._n1; + else + b = _graph._n1 - n; + } + + return b; + } + + + + // Implementation of the Block Search pivot rule + class BlockSearchPivotRule + { + private: + + // References to the NetworkSimplexSimple class + const IntVector &_source; + const IntVector &_target; + const CostVector &_cost; + const StateVector &_state; + const CostVector &_pi; + ArcsType &_in_arc; + ArcsType _search_arc_num; + + // Pivot rule data + ArcsType _block_size; + ArcsType _next_arc; + NetworkSimplexSimple &_ns; + + public: + + // Constructor + BlockSearchPivotRule(NetworkSimplexSimple &ns) : + _source(ns._source), _target(ns._target), + _cost(ns._cost), _state(ns._state), _pi(ns._pi), + _in_arc(ns.in_arc), _search_arc_num(ns._search_arc_num), + _next_arc(0), _ns(ns) + { + // The main parameters of the pivot rule + const double BLOCK_SIZE_FACTOR = 1; + const ArcsType MIN_BLOCK_SIZE = 10; + + _block_size = std::max(ArcsType(BLOCK_SIZE_FACTOR * std::sqrt(double(_search_arc_num))), MIN_BLOCK_SIZE); + } + + // Find next entering arc + bool findEnteringArc() { + Cost min_val = 0; + + ArcsType N = _ns.num_threads; + + std::vector<Cost> minArray(N, 0); + std::vector<ArcsType> arcId(N); + ArcsType bs = (ArcsType)ceil(_block_size / (double)N); + + for (ArcsType i = 0; i < _search_arc_num; i += _block_size) { + + ArcsType e; + int j; +#pragma omp parallel + { +#ifdef OMP + int t = omp_get_thread_num(); +#else + int t = 0; +#endif + +#pragma omp for schedule(static, bs) lastprivate(e) + for (j = 0; j < std::min(i + _block_size, _search_arc_num) - i; j++) { + e = (_next_arc + i + j); if (e >= _search_arc_num) e -= _search_arc_num; + Cost c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); + if (c < minArray[t]) { + minArray[t] = c; + arcId[t] = e; + } + } + } + for (int j = 0; j < N; j++) { + if (minArray[j] < min_val) { + min_val = minArray[j]; + _in_arc = arcId[j]; + } + } + Cost a = std::abs(_pi[_source[_in_arc]]) > std::abs(_pi[_target[_in_arc]]) ? std::abs(_pi[_source[_in_arc]]) : std::abs(_pi[_target[_in_arc]]); + a = a > std::abs(_cost[_in_arc]) ? a : std::abs(_cost[_in_arc]); + if (min_val < -EPSILON*a) { + _next_arc = e; + return true; + } + } + + Cost a = fabs(_pi[_source[_in_arc]]) > fabs(_pi[_target[_in_arc]]) ? fabs(_pi[_source[_in_arc]]) : fabs(_pi[_target[_in_arc]]); + a = a > fabs(_cost[_in_arc]) ? a : fabs(_cost[_in_arc]); + if (min_val >= -EPSILON*a) return false; + + return true; + } + + + // Find next entering arc + /*bool findEnteringArc() { + Cost min_val = 0; + int N = omp_get_max_threads(); + std::vector<Cost> minArray(N); + std::vector<ArcsType> arcId(N); + + ArcsType bs = (ArcsType)ceil(_block_size / (double)N); + for (ArcsType i = 0; i < _search_arc_num; i += _block_size) { + + ArcsType maxJ = std::min(i + _block_size, _search_arc_num) - i; + ArcsType j; +#pragma omp parallel + { + int t = omp_get_thread_num(); + Cost minV = 0; + ArcsType arcStart = _next_arc + i; + ArcsType arc = -1; +#pragma omp for schedule(static, bs) + for (j = 0; j < maxJ; j++) { + ArcsType e = arcStart + j; if (e >= _search_arc_num) e -= _search_arc_num; + Cost c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); + if (c < minV) { + minV = c; + arc = e; + } + } + + minArray[t] = minV; + arcId[t] = arc; + } + for (int j = 0; j < N; j++) { + if (minArray[j] < min_val) { + min_val = minArray[j]; + _in_arc = arcId[j]; + } + } + + //FIX by Antoine Rolet to avoid precision issues + Cost a = std::max(std::abs(_cost[_in_arc]), std::max(std::abs(_pi[_source[_in_arc]]), std::abs(_pi[_target[_in_arc]]))); + if (min_val <-std::numeric_limits<Cost>::epsilon()*a) { + _next_arc = _next_arc + i + maxJ - 1; + if (_next_arc >= _search_arc_num) _next_arc -= _search_arc_num; + return true; + } + } + + if (min_val >= 0) { + return false; + } + + return true; + }*/ + + + /*bool findEnteringArc() { + Cost c, min = 0; + int cnt = _block_size; + int e, min_arc = _next_arc; + for (e = _next_arc; e < _search_arc_num; ++e) { + c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); + if (c < min) { + min = c; + min_arc = e; + + } + if (--cnt == 0) { + if (min < 0) break; + cnt = _block_size; + + } + + } + if (min == 0 || cnt > 0) { + for (e = 0; e < _next_arc; ++e) { + c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); + if (c < min) { + min = c; + min_arc = e; + + } + if (--cnt == 0) { + if (min < 0) break; + cnt = _block_size; + + } + + } + + } + if (min >= 0) return false; + _in_arc = min_arc; + _next_arc = e; + return true; + }*/ + + + + }; //class BlockSearchPivotRule + + + + public: + + + + int _init_nb_nodes; + ArcsType _init_nb_arcs; + + /// \name Parameters + /// The parameters of the algorithm can be specified using these + /// functions. + + /// @{ + + + /// \brief Set the costs of the arcs. + /// + /// This function sets the costs of the arcs. + /// If it is not used before calling \ref run(), the costs + /// will be set to \c 1 on all arcs. + /// + /// \param map An arc map storing the costs. + /// Its \c Value type must be convertible to the \c Cost type + /// of the algorithm. + /// + /// \return <tt>(*this)</tt> + template<typename CostMap> + NetworkSimplexSimple& costMap(const CostMap& map) { + Arc a; _graph.first(a); + for (; a != INVALID; _graph.next(a)) { + _cost[getArcID(a)] = map[a]; + } + return *this; + } + + + /// \brief Set the costs of one arc. + /// + /// This function sets the costs of one arcs. + /// Done for memory reasons + /// + /// \param arc An arc. + /// \param arc A cost + /// + /// \return <tt>(*this)</tt> + template<typename Value> + NetworkSimplexSimple& setCost(const Arc& arc, const Value cost) { + _cost[getArcID(arc)] = cost; + return *this; + } + + + /// \brief Set the supply values of the nodes. + /// + /// This function sets the supply values of the nodes. + /// If neither this function nor \ref stSupply() is used before + /// calling \ref run(), the supply of each node will be set to zero. + /// + /// \param map A node map storing the supply values. + /// Its \c Value type must be convertible to the \c Value type + /// of the algorithm. + /// + /// \return <tt>(*this)</tt> + template<typename SupplyMap> + NetworkSimplexSimple& supplyMap(const SupplyMap& map) { + Node n; _graph.first(n); + for (; n != INVALIDNODE; _graph.next(n)) { + _supply[_node_id(n)] = map[n]; + } + return *this; + } + template<typename SupplyMap> + NetworkSimplexSimple& supplyMap(const SupplyMap* map1, int n1, const SupplyMap* map2, int n2) { + Node n; _graph.first(n); + for (; n != INVALIDNODE; _graph.next(n)) { + if (n<n1) + _supply[_node_id(n)] = map1[n]; + else + _supply[_node_id(n)] = map2[n - n1]; + } + return *this; + } + template<typename SupplyMap> + NetworkSimplexSimple& supplyMapAll(SupplyMap val1, int n1, SupplyMap val2, int n2) { + Node n; _graph.first(n); + for (; n != INVALIDNODE; _graph.next(n)) { + if (n<n1) + _supply[_node_id(n)] = val1; + else + _supply[_node_id(n)] = val2; + } + return *this; + } + + /// \brief Set single source and target nodes and a supply value. + /// + /// This function sets a single source node and a single target node + /// and the required flow value. + /// If neither this function nor \ref supplyMap() is used before + /// calling \ref run(), the supply of each node will be set to zero. + /// + /// Using this function has the same effect as using \ref supplyMap() + /// with such a map in which \c k is assigned to \c s, \c -k is + /// assigned to \c t and all other nodes have zero supply value. + /// + /// \param s The source node. + /// \param t The target node. + /// \param k The required amount of flow from node \c s to node \c t + /// (i.e. the supply of \c s and the demand of \c t). + /// + /// \return <tt>(*this)</tt> + NetworkSimplexSimple& stSupply(const Node& s, const Node& t, Value k) { + for (int i = 0; i != _node_num; ++i) { + _supply[i] = 0; + } + _supply[_node_id(s)] = k; + _supply[_node_id(t)] = -k; + return *this; + } + + /// \brief Set the type of the supply constraints. + /// + /// This function sets the type of the supply/demand constraints. + /// If it is not used before calling \ref run(), the \ref GEQ supply + /// type will be used. + /// + /// For more information, see \ref SupplyType. + /// + /// \return <tt>(*this)</tt> + NetworkSimplexSimple& supplyType(SupplyType supply_type) { + _stype = supply_type; + return *this; + } + + /// @} + + /// \name Execution Control + /// The algorithm can be executed using \ref run(). + + /// @{ + + /// \brief Run the algorithm. + /// + /// This function runs the algorithm. + /// The paramters can be specified using functions \ref lowerMap(), + /// \ref upperMap(), \ref costMap(), \ref supplyMap(), \ref stSupply(), + /// \ref supplyType(). + /// For example, + /// \code + /// NetworkSimplexSimple<ListDigraph> ns(graph); + /// ns.lowerMap(lower).upperMap(upper).costMap(cost) + /// .supplyMap(sup).run(); + /// \endcode + /// + /// This function can be called more than once. All the given parameters + /// are kept for the next call, unless \ref resetParams() or \ref reset() + /// is used, thus only the modified parameters have to be set again. + /// If the underlying digraph was also modified after the construction + /// of the class (or the last \ref reset() call), then the \ref reset() + /// function must be called. + /// + /// \param pivot_rule The pivot rule that will be used during the + /// algorithm. For more information, see \ref PivotRule. + /// + /// \return \c INFEASIBLE if no feasible flow exists, + /// \n \c OPTIMAL if the problem has optimal solution + /// (i.e. it is feasible and bounded), and the algorithm has found + /// optimal flow and node potentials (primal and dual solutions), + /// \n \c UNBOUNDED if the objective function of the problem is + /// unbounded, i.e. there is a directed cycle having negative total + /// cost and infinite upper bound. + /// + /// \see ProblemType, PivotRule + /// \see resetParams(), reset() + ProblemType run() { +#if DEBUG_LVL>0 + std::cout << "OPTIMAL = " << OPTIMAL << "\nINFEASIBLE = " << INFEASIBLE << "\nUNBOUNDED = " << UNBOUNDED << "\nMAX_ITER_REACHED" << MAX_ITER_REACHED << "\n" ; +#endif + if (!init()) return INFEASIBLE; +#if DEBUG_LVL>0 + std::cout << "Init done, starting iterations\n"; +#endif + + return start(); + } + + /// \brief Reset all the parameters that have been given before. + /// + /// This function resets all the paramaters that have been given + /// before using functions \ref lowerMap(), \ref upperMap(), + /// \ref costMap(), \ref supplyMap(), \ref stSupply(), \ref supplyType(). + /// + /// It is useful for multiple \ref run() calls. Basically, all the given + /// parameters are kept for the next \ref run() call, unless + /// \ref resetParams() or \ref reset() is used. + /// If the underlying digraph was also modified after the construction + /// of the class or the last \ref reset() call, then the \ref reset() + /// function must be used, otherwise \ref resetParams() is sufficient. + /// + /// For example, + /// \code + /// NetworkSimplexSimple<ListDigraph> ns(graph); + /// + /// // First run + /// ns.lowerMap(lower).upperMap(upper).costMap(cost) + /// .supplyMap(sup).run(); + /// + /// // Run again with modified cost map (resetParams() is not called, + /// // so only the cost map have to be set again) + /// cost[e] += 100; + /// ns.costMap(cost).run(); + /// + /// // Run again from scratch using resetParams() + /// // (the lower bounds will be set to zero on all arcs) + /// ns.resetParams(); + /// ns.upperMap(capacity).costMap(cost) + /// .supplyMap(sup).run(); + /// \endcode + /// + /// \return <tt>(*this)</tt> + /// + /// \see reset(), run() + NetworkSimplexSimple& resetParams() { + for (int i = 0; i != _node_num; ++i) { + _supply[i] = 0; + } + for (ArcsType i = 0; i != _arc_num; ++i) { + _cost[i] = 1; + } + _stype = GEQ; + return *this; + } + + + /// \brief Reset the internal data structures and all the parameters + /// that have been given before. + /// + /// This function resets the internal data structures and all the + /// paramaters that have been given before using functions \ref lowerMap(), + /// \ref upperMap(), \ref costMap(), \ref supplyMap(), \ref stSupply(), + /// \ref supplyType(). + /// + /// It is useful for multiple \ref run() calls. Basically, all the given + /// parameters are kept for the next \ref run() call, unless + /// \ref resetParams() or \ref reset() is used. + /// If the underlying digraph was also modified after the construction + /// of the class or the last \ref reset() call, then the \ref reset() + /// function must be used, otherwise \ref resetParams() is sufficient. + /// + /// See \ref resetParams() for examples. + /// + /// \return <tt>(*this)</tt> + /// + /// \see resetParams(), run() + NetworkSimplexSimple& reset() { + // Resize vectors + _node_num = _init_nb_nodes; + _arc_num = _init_nb_arcs; + int all_node_num = _node_num + 1; + ArcsType max_arc_num = _arc_num + 2 * _node_num; + + _source.resize(max_arc_num); + _target.resize(max_arc_num); + + _cost.resize(max_arc_num); + _supply.resize(all_node_num); + _flow.resize(max_arc_num); + _pi.resize(all_node_num); + + _parent.resize(all_node_num); + _pred.resize(all_node_num); + _forward.resize(all_node_num); + _thread.resize(all_node_num); + _rev_thread.resize(all_node_num); + _succ_num.resize(all_node_num); + _last_succ.resize(all_node_num); + _state.resize(max_arc_num); + + + //_arc_mixing=false; + if (_arc_mixing && _node_num > 1) { + // Store the arcs in a mixed order + //ArcsType k = std::max(ArcsType(std::sqrt(double(_arc_num))), ArcsType(10)); + const ArcsType k = std::max(ArcsType(_arc_num / _node_num), ArcsType(3)); + mixingCoeff = k; + subsequence_length = _arc_num / mixingCoeff + 1; + num_big_subsequences = _arc_num % mixingCoeff; + num_total_big_subsequence_numbers = subsequence_length * num_big_subsequences; + +#pragma omp parallel for schedule(static) + for (Arc a = 0; a <= _graph.maxArcId(); a++) { // --a <=> _graph.next(a) , -1 == INVALID + ArcsType i = sequence(_graph.maxArcId()-a); + _source[i] = _node_id(_graph.source(a)); + _target[i] = _node_id(_graph.target(a)); + } + } else { + // Store the arcs in the original order + ArcsType i = 0; + Arc a; _graph.first(a); + for (; a != INVALID; _graph.next(a), ++i) { + _source[i] = _node_id(_graph.source(a)); + _target[i] = _node_id(_graph.target(a)); + //_arc_id[a] = i; + } + } + + // Reset parameters + resetParams(); + return *this; + } + + /// @} + + /// \name Query Functions + /// The results of the algorithm can be obtained using these + /// functions.\n + /// The \ref run() function must be called before using them. + + /// @{ + + /// \brief Return the total cost of the found flow. + /// + /// This function returns the total cost of the found flow. + /// Its complexity is O(e). + /// + /// \note The return type of the function can be specified as a + /// template parameter. For example, + /// \code + /// ns.totalCost<double>(); + /// \endcode + /// It is useful if the total cost cannot be stored in the \c Cost + /// type of the algorithm, which is the default return type of the + /// function. + /// + /// \pre \ref run() must be called before using this function. + /*template <typename Number> + Number totalCost() const { + Number c = 0; + for (ArcIt a(_graph); a != INVALID; ++a) { + int i = getArcID(a); + c += Number(_flow[i]) * Number(_cost[i]); + } + return c; + }*/ + + template <typename Number> + Number totalCost() const { + Number c = 0; + +#ifdef SPARSE_FLOW + #ifdef HASHMAP + typename std::unordered_map<size_t, Value>::const_iterator it; + #else + typename std::map<size_t, Value>::const_iterator it; + #endif + for (it = _flow.data.begin(); it!=_flow.data.end(); ++it) + c += Number(it->second) * Number(_cost[it->first]); + return c; +#else + for (ArcsType i = 0; i<_flow.size(); i++) + c += _flow[i] * Number(_cost[i]); + return c; +#endif + } + +#ifndef DOXYGEN + Cost totalCost() const { + return totalCost<Cost>(); + } +#endif + + /// \brief Return the flow on the given arc. + /// + /// This function returns the flow on the given arc. + /// + /// \pre \ref run() must be called before using this function. + Value flow(const Arc& a) const { + return _flow[getArcID(a)]; + } + + /// \brief Return the flow map (the primal solution). + /// + /// This function copies the flow value on each arc into the given + /// map. The \c Value type of the algorithm must be convertible to + /// the \c Value type of the map. + /// + /// \pre \ref run() must be called before using this function. + template <typename FlowMap> + void flowMap(FlowMap &map) const { + Arc a; _graph.first(a); + for (; a != INVALID; _graph.next(a)) { + map.set(a, _flow[getArcID(a)]); + } + } + + /// \brief Return the potential (dual value) of the given node. + /// + /// This function returns the potential (dual value) of the + /// given node. + /// + /// \pre \ref run() must be called before using this function. + Cost potential(const Node& n) const { + return _pi[_node_id(n)]; + } + + /// \brief Return the potential map (the dual solution). + /// + /// This function copies the potential (dual value) of each node + /// into the given map. + /// The \c Cost type of the algorithm must be convertible to the + /// \c Value type of the map. + /// + /// \pre \ref run() must be called before using this function. + template <typename PotentialMap> + void potentialMap(PotentialMap &map) const { + Node n; _graph.first(n); + for (; n != INVALID; _graph.next(n)) { + map.set(n, _pi[_node_id(n)]); + } + } + + /// @} + + private: + + // Initialize internal data structures + bool init() { + if (_node_num == 0) return false; + + // Check the sum of supply values + _sum_supply = 0; + for (int i = 0; i != _node_num; ++i) { + _sum_supply += _supply[i]; + } + /*if (!((_stype == GEQ && _sum_supply <= 0) || + (_stype == LEQ && _sum_supply >= 0))) return false;*/ + + + // Initialize artifical cost + Cost ART_COST; + if (std::numeric_limits<Cost>::is_exact) { + ART_COST = std::numeric_limits<Cost>::max() / 2 + 1; + } else { + ART_COST = 0; + for (ArcsType i = 0; i != _arc_num; ++i) { + if (_cost[i] > ART_COST) ART_COST = _cost[i]; + } + ART_COST = (ART_COST + 1) * _node_num; + } + + // Initialize arc maps + for (ArcsType i = 0; i != _arc_num; ++i) { +#ifndef SPARSE_FLOW + _flow[i] = 0; //by default, the sparse matrix is empty +#endif + _state[i] = STATE_LOWER; + } +#ifdef SPARSE_FLOW + _flow = SparseValueVector<Value>(); +#endif + + // Set data for the artificial root node + _root = _node_num; + _parent[_root] = -1; + _pred[_root] = -1; + _thread[_root] = 0; + _rev_thread[0] = _root; + _succ_num[_root] = _node_num + 1; + _last_succ[_root] = _root - 1; + _supply[_root] = -_sum_supply; + _pi[_root] = 0; + + // Add artificial arcs and initialize the spanning tree data structure + if (_sum_supply == 0) { + // EQ supply constraints + _search_arc_num = _arc_num; + _all_arc_num = _arc_num + _node_num; + for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) { + _parent[u] = _root; + _pred[u] = e; + _thread[u] = u + 1; + _rev_thread[u + 1] = u; + _succ_num[u] = 1; + _last_succ[u] = u; + _state[e] = STATE_TREE; + if (_supply[u] >= 0) { + _forward[u] = true; + _pi[u] = 0; + _source[e] = u; + _target[e] = _root; + _flow[e] = _supply[u]; + _cost[e] = 0; + } else { + _forward[u] = false; + _pi[u] = ART_COST; + _source[e] = _root; + _target[e] = u; + _flow[e] = -_supply[u]; + _cost[e] = ART_COST; + } + } + } else if (_sum_supply > 0) { + // LEQ supply constraints + _search_arc_num = _arc_num + _node_num; + ArcsType f = _arc_num + _node_num; + for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) { + _parent[u] = _root; + _thread[u] = u + 1; + _rev_thread[u + 1] = u; + _succ_num[u] = 1; + _last_succ[u] = u; + if (_supply[u] >= 0) { + _forward[u] = true; + _pi[u] = 0; + _pred[u] = e; + _source[e] = u; + _target[e] = _root; + _flow[e] = _supply[u]; + _cost[e] = 0; + _state[e] = STATE_TREE; + } else { + _forward[u] = false; + _pi[u] = ART_COST; + _pred[u] = f; + _source[f] = _root; + _target[f] = u; + _flow[f] = -_supply[u]; + _cost[f] = ART_COST; + _state[f] = STATE_TREE; + _source[e] = u; + _target[e] = _root; + //_flow[e] = 0; //by default, the sparse matrix is empty + _cost[e] = 0; + _state[e] = STATE_LOWER; + ++f; + } + } + _all_arc_num = f; + } else { + // GEQ supply constraints + _search_arc_num = _arc_num + _node_num; + ArcsType f = _arc_num + _node_num; + for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) { + _parent[u] = _root; + _thread[u] = u + 1; + _rev_thread[u + 1] = u; + _succ_num[u] = 1; + _last_succ[u] = u; + if (_supply[u] <= 0) { + _forward[u] = false; + _pi[u] = 0; + _pred[u] = e; + _source[e] = _root; + _target[e] = u; + _flow[e] = -_supply[u]; + _cost[e] = 0; + _state[e] = STATE_TREE; + } else { + _forward[u] = true; + _pi[u] = -ART_COST; + _pred[u] = f; + _source[f] = u; + _target[f] = _root; + _flow[f] = _supply[u]; + _state[f] = STATE_TREE; + _cost[f] = ART_COST; + _source[e] = _root; + _target[e] = u; + //_flow[e] = 0; //by default, the sparse matrix is empty + _cost[e] = 0; + _state[e] = STATE_LOWER; + ++f; + } + } + _all_arc_num = f; + } + + return true; + } + + // Find the join node + void findJoinNode() { + int u = _source[in_arc]; + int v = _target[in_arc]; + while (u != v) { + if (_succ_num[u] < _succ_num[v]) { + u = _parent[u]; + } else { + v = _parent[v]; + } + } + join = u; + } + + // Find the leaving arc of the cycle and returns true if the + // leaving arc is not the same as the entering arc + bool findLeavingArc() { + // Initialize first and second nodes according to the direction + // of the cycle + if (_state[in_arc] == STATE_LOWER) { + first = _source[in_arc]; + second = _target[in_arc]; + } else { + first = _target[in_arc]; + second = _source[in_arc]; + } + delta = INF; + char result = 0; + Value d; + ArcsType e; + + // Search the cycle along the path form the first node to the root + for (int u = first; u != join; u = _parent[u]) { + e = _pred[u]; + d = _forward[u] ? _flow[e] : INF; + if (d < delta) { + delta = d; + u_out = u; + result = 1; + } + } + // Search the cycle along the path form the second node to the root + for (int u = second; u != join; u = _parent[u]) { + e = _pred[u]; + d = _forward[u] ? INF : _flow[e]; + if (d <= delta) { + delta = d; + u_out = u; + result = 2; + } + } + + if (result == 1) { + u_in = first; + v_in = second; + } else { + u_in = second; + v_in = first; + } + return result != 0; + } + + // Change _flow and _state vectors + void changeFlow(bool change) { + // Augment along the cycle + if (delta > 0) { + Value val = _state[in_arc] * delta; + _flow[in_arc] += val; + for (int u = _source[in_arc]; u != join; u = _parent[u]) { + _flow[_pred[u]] += _forward[u] ? -val : val; + } + for (int u = _target[in_arc]; u != join; u = _parent[u]) { + _flow[_pred[u]] += _forward[u] ? val : -val; + } + } + // Update the state of the entering and leaving arcs + if (change) { + _state[in_arc] = STATE_TREE; + _state[_pred[u_out]] = + (_flow[_pred[u_out]] == 0) ? STATE_LOWER : STATE_UPPER; + } else { + _state[in_arc] = -_state[in_arc]; + } + } + + // Update the tree structure + void updateTreeStructure() { + int old_rev_thread = _rev_thread[u_out]; + int old_succ_num = _succ_num[u_out]; + int old_last_succ = _last_succ[u_out]; + v_out = _parent[u_out]; + + // Check if u_in and u_out coincide + if (u_in == u_out) { + // Update _parent, _pred, _pred_dir + _parent[u_in] = v_in; + _pred[u_in] = in_arc; + _forward[u_in] = (u_in == _source[in_arc]); + + // Update _thread and _rev_thread + if (_thread[v_in] != u_out) { + ArcsType after = _thread[old_last_succ]; + _thread[old_rev_thread] = after; + _rev_thread[after] = old_rev_thread; + after = _thread[v_in]; + _thread[v_in] = u_out; + _rev_thread[u_out] = v_in; + _thread[old_last_succ] = after; + _rev_thread[after] = old_last_succ; + } + } else { + // Handle the case when old_rev_thread equals to v_in + // (it also means that join and v_out coincide) + int thread_continue = old_rev_thread == v_in ? + _thread[old_last_succ] : _thread[v_in]; + + // Update _thread and _parent along the stem nodes (i.e. the nodes + // between u_in and u_out, whose parent have to be changed) + int stem = u_in; // the current stem node + int par_stem = v_in; // the new parent of stem + int next_stem; // the next stem node + int last = _last_succ[u_in]; // the last successor of stem + int before, after = _thread[last]; + _thread[v_in] = u_in; + _dirty_revs.clear(); + _dirty_revs.push_back(v_in); + while (stem != u_out) { + // Insert the next stem node into the thread list + next_stem = _parent[stem]; + _thread[last] = next_stem; + _dirty_revs.push_back(last); + + // Remove the subtree of stem from the thread list + before = _rev_thread[stem]; + _thread[before] = after; + _rev_thread[after] = before; + + // Change the parent node and shift stem nodes + _parent[stem] = par_stem; + par_stem = stem; + stem = next_stem; + + // Update last and after + last = _last_succ[stem] == _last_succ[par_stem] ? + _rev_thread[par_stem] : _last_succ[stem]; + after = _thread[last]; + } + _parent[u_out] = par_stem; + _thread[last] = thread_continue; + _rev_thread[thread_continue] = last; + _last_succ[u_out] = last; + + // Remove the subtree of u_out from the thread list except for + // the case when old_rev_thread equals to v_in + if (old_rev_thread != v_in) { + _thread[old_rev_thread] = after; + _rev_thread[after] = old_rev_thread; + } + + // Update _rev_thread using the new _thread values + for (int i = 0; i != int(_dirty_revs.size()); ++i) { + int u = _dirty_revs[i]; + _rev_thread[_thread[u]] = u; + } + + // Update _pred, _pred_dir, _last_succ and _succ_num for the + // stem nodes from u_out to u_in + int tmp_sc = 0, tmp_ls = _last_succ[u_out]; + for (int u = u_out, p = _parent[u]; u != u_in; u = p, p = _parent[u]) { + _pred[u] = _pred[p]; + _forward[u] = !_forward[p]; + tmp_sc += _succ_num[u] - _succ_num[p]; + _succ_num[u] = tmp_sc; + _last_succ[p] = tmp_ls; + } + _pred[u_in] = in_arc; + _forward[u_in] = (u_in == _source[in_arc]); + _succ_num[u_in] = old_succ_num; + } + + // Update _last_succ from v_in towards the root + int up_limit_out = _last_succ[join] == v_in ? join : -1; + int last_succ_out = _last_succ[u_out]; + for (int u = v_in; u != -1 && _last_succ[u] == v_in; u = _parent[u]) { + _last_succ[u] = last_succ_out; + } + + // Update _last_succ from v_out towards the root + if (join != old_rev_thread && v_in != old_rev_thread) { + for (int u = v_out; u != up_limit_out && _last_succ[u] == old_last_succ; + u = _parent[u]) { + _last_succ[u] = old_rev_thread; + } + } else if (last_succ_out != old_last_succ) { + for (int u = v_out; u != up_limit_out && _last_succ[u] == old_last_succ; + u = _parent[u]) { + _last_succ[u] = last_succ_out; + } + } + + // Update _succ_num from v_in to join + for (int u = v_in; u != join; u = _parent[u]) { + _succ_num[u] += old_succ_num; + } + // Update _succ_num from v_out to join + for (int u = v_out; u != join; u = _parent[u]) { + _succ_num[u] -= old_succ_num; + } + } + + void updatePotential() { + Cost sigma = _pi[v_in] - _pi[u_in] - + ((_forward[u_in])?_cost[in_arc]:(-_cost[in_arc])); + int end = _thread[_last_succ[u_in]]; + for (int u = u_in; u != end; u = _thread[u]) { + _pi[u] += sigma; + } + } + + + // Heuristic initial pivots + bool initialPivots() { + Value curr, total = 0; + std::vector<Node> supply_nodes, demand_nodes; + Node u; _graph.first(u); + for (; u != INVALIDNODE; _graph.next(u)) { + curr = _supply[_node_id(u)]; + if (curr > 0) { + total += curr; + supply_nodes.push_back(u); + } else if (curr < 0) { + demand_nodes.push_back(u); + } + } + if (_sum_supply > 0) total -= _sum_supply; + if (total <= 0) return true; + + ArcVector arc_vector; + if (_sum_supply >= 0) { + if (supply_nodes.size() == 1 && demand_nodes.size() == 1) { + // Perform a reverse graph search from the sink to the source + //typename GR::template NodeMap<bool> reached(_graph, false); + BoolVector reached(_node_num, false); + Node s = supply_nodes[0], t = demand_nodes[0]; + std::vector<Node> stack; + reached[t] = true; + stack.push_back(t); + while (!stack.empty()) { + Node u, v = stack.back(); + stack.pop_back(); + if (v == s) break; + Arc a; _graph.firstIn(a, v); + for (; a != INVALID; _graph.nextIn(a)) { + if (reached[u = _graph.source(a)]) continue; + ArcsType j = getArcID(a); + arc_vector.push_back(j); + reached[u] = true; + stack.push_back(u); + } + } + } else { + arc_vector.resize(demand_nodes.size()); + // Find the min. cost incomming arc for each demand node +#pragma omp parallel for + for (int i = 0; i < demand_nodes.size(); ++i) { + Node v = demand_nodes[i]; + Cost min_cost = std::numeric_limits<Cost>::max(); + Arc min_arc = INVALID; + Arc a; _graph.firstIn(a, v); + for (; a != INVALID; _graph.nextIn(a)) { + Cost c = _cost[getArcID(a)]; + if (c < min_cost) { + min_cost = c; + min_arc = a; + } + } + arc_vector[i] = getArcID(min_arc); + } + arc_vector.erase(std::remove(arc_vector.begin(), arc_vector.end(), INVALID), arc_vector.end()); + } + } else { + arc_vector.resize(supply_nodes.size()); + // Find the min. cost outgoing arc for each supply node +#pragma omp parallel for + for (int i = 0; i < int(supply_nodes.size()); ++i) { + Node u = supply_nodes[i]; + Cost min_cost = std::numeric_limits<Cost>::max(); + Arc min_arc = INVALID; + Arc a; _graph.firstOut(a, u); + for (; a != INVALID; _graph.nextOut(a)) { + Cost c = _cost[getArcID(a)]; + if (c < min_cost) { + min_cost = c; + min_arc = a; + } + } + arc_vector[i] = getArcID(min_arc); + } + arc_vector.erase(std::remove(arc_vector.begin(), arc_vector.end(), INVALID), arc_vector.end()); + } + + // Perform heuristic initial pivots + for (ArcsType i = 0; i != ArcsType(arc_vector.size()); ++i) { + in_arc = arc_vector[i]; + if (_state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] - + _pi[_target[in_arc]]) >= 0) continue; + findJoinNode(); + bool change = findLeavingArc(); + if (delta >= MAX) return false; + changeFlow(change); + if (change) { + updateTreeStructure(); + updatePotential(); + } + } + return true; + } + + // Execute the algorithm + ProblemType start() { + return start<BlockSearchPivotRule>(); + } + + template <typename PivotRuleImpl> + ProblemType start() { + PivotRuleImpl pivot(*this); + ProblemType retVal = OPTIMAL; + + // Perform heuristic initial pivots + if (!initialPivots()) return UNBOUNDED; + + size_t iter_number = 0; + // Execute the Network Simplex algorithm + while (pivot.findEnteringArc()) { + if ((++iter_number <= max_iter&&max_iter > 0) || max_iter<=0) { +#if DEBUG_LVL>0 + if(iter_number>MAX_DEBUG_ITER) + break; + if(iter_number%1000==0||iter_number%1000==1){ + Cost curCost=totalCost(); + Value sumFlow=0; + Cost a; + a= (fabs(_pi[_source[in_arc]])>=fabs(_pi[_target[in_arc]])) ? fabs(_pi[_source[in_arc]]) : fabs(_pi[_target[in_arc]]); + a=a>=fabs(_cost[in_arc])?a:fabs(_cost[in_arc]); + for (int i=0; i<_flow.size(); i++) { + sumFlow+=_state[i]*_flow[i]; + } + std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << iter_number << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n"; + std::cout << "Arc in = (" << _node_id(_source[in_arc]) << ", " << _node_id(_target[in_arc]) <<")\n"; + std::cout << "Supplies = (" << _supply[_source[in_arc]] << ", " << _supply[_target[in_arc]] << ")\n"; + std::cout << _cost[in_arc] << "\n"; + std::cout << _pi[_source[in_arc]] << "\n"; + std::cout << _pi[_target[in_arc]] << "\n"; + std::cout << a << "\n"; + } +#endif + + findJoinNode(); + bool change = findLeavingArc(); + if (delta >= MAX) return UNBOUNDED; + changeFlow(change); + if (change) { + updateTreeStructure(); + updatePotential(); + } + +#if DEBUG_LVL>0 + else{ + std::cout << "No change\n"; + } +#endif + +#if DEBUG_LVL>1 + std::cout << "Arc in = (" << _source[in_arc] << ", " << _target[in_arc] << ")\n"; +#endif + + + } else { + char errMess[1000]; + sprintf( errMess, "RESULT MIGHT BE INACURATE\nMax number of iteration reached, currently \%d. Sometimes iterations go on in cycle even though the solution has been reached, to check if it's the case here have a look at the minimal reduced cost. If it is very close to machine precision, you might actually have the correct solution, if not try setting the maximum number of iterations a bit higher\n",iter_number ); + std::cerr << errMess; + retVal = MAX_ITER_REACHED; + break; + } + + } + + + +#if DEBUG_LVL>0 + Cost curCost=totalCost(); + Value sumFlow=0; + Cost a; + a= (fabs(_pi[_source[in_arc]])>=fabs(_pi[_target[in_arc]])) ? fabs(_pi[_source[in_arc]]) : fabs(_pi[_target[in_arc]]); + a=a>=fabs(_cost[in_arc])?a:fabs(_cost[in_arc]); + for (int i=0; i<_flow.size(); i++) { + sumFlow+=_state[i]*_flow[i]; + } + + std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << niter << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n"; + + std::cout << "Arc in = (" << _node_id(_source[in_arc]) << ", " << _node_id(_target[in_arc]) <<")\n"; + std::cout << "Supplies = (" << _supply[_source[in_arc]] << ", " << _supply[_target[in_arc]] << ")\n"; + +#endif + + + +#if DEBUG_LVL>1 + sumFlow=0; + for (int i=0; i<_flow.size(); i++) { + sumFlow+=_state[i]*_flow[i]; + if (_state[i]==STATE_TREE) { + std::cout << "Non zero value at (" << _node_num+1-_source[i] << ", " << _node_num+1-_target[i] << ")\n"; + } + } + std::cout << "Sum of the flow " << sumFlow << "\n"<< niter <<" iterations, current cost=" << totalCost() << "\n"; +#endif + + + + //Check feasibility + if(retVal == OPTIMAL){ + for (ArcsType e = _search_arc_num; e != _all_arc_num; ++e) { + if (_flow[e] != 0){ + if (fabs(_flow[e]) > _EPSILON) // change of the original code following issue #126 + return INFEASIBLE; + else + _flow[e]=0; + } + } + } + + // Shift potentials to meet the requirements of the GEQ/LEQ type + // optimality conditions + if (_sum_supply == 0) { + if (_stype == GEQ) { + Cost max_pot = -std::numeric_limits<Cost>::max(); + for (ArcsType i = 0; i != _node_num; ++i) { + if (_pi[i] > max_pot) max_pot = _pi[i]; + } + if (max_pot > 0) { + for (ArcsType i = 0; i != _node_num; ++i) + _pi[i] -= max_pot; + } + } else { + Cost min_pot = std::numeric_limits<Cost>::max(); + for (ArcsType i = 0; i != _node_num; ++i) { + if (_pi[i] < min_pot) min_pot = _pi[i]; + } + if (min_pot < 0) { + for (ArcsType i = 0; i != _node_num; ++i) + _pi[i] -= min_pot; + } + } + } + + return retVal; + } + + }; //class NetworkSimplexSimple + + ///@} + +} //namespace lemon_omp diff --git a/ot/lp/solver_1d.py b/ot/lp/solver_1d.py new file mode 100644 index 0000000..8b4d0c3 --- /dev/null +++ b/ot/lp/solver_1d.py @@ -0,0 +1,367 @@ +# -*- coding: utf-8 -*- +""" +Exact solvers for the 1D Wasserstein distance using cvxopt +""" + +# Author: Remi Flamary <remi.flamary@unice.fr> +# Author: Nicolas Courty <ncourty@irisa.fr> +# +# License: MIT License + +import numpy as np +import warnings + +from .emd_wrap import emd_1d_sorted +from ..backend import get_backend +from ..utils import list_to_array + + +def quantile_function(qs, cws, xs): + r""" Computes the quantile function of an empirical distribution + + Parameters + ---------- + qs: array-like, shape (n,) + Quantiles at which the quantile function is evaluated + cws: array-like, shape (m, ...) + cumulative weights of the 1D empirical distribution, if batched, must be similar to xs + xs: array-like, shape (n, ...) + locations of the 1D empirical distribution, batched against the `xs.ndim - 1` first dimensions + + Returns + ------- + q: array-like, shape (..., n) + The quantiles of the distribution + """ + nx = get_backend(qs, cws) + n = xs.shape[0] + if nx.__name__ == 'torch': + # this is to ensure the best performance for torch searchsorted + # and avoid a warninng related to non-contiguous arrays + cws = cws.T.contiguous() + qs = qs.T.contiguous() + else: + cws = cws.T + qs = qs.T + idx = nx.searchsorted(cws, qs).T + return nx.take_along_axis(xs, nx.clip(idx, 0, n - 1), axis=0) + + +def wasserstein_1d(u_values, v_values, u_weights=None, v_weights=None, p=1, require_sort=True): + r""" + Computes the 1 dimensional OT loss [15] between two (batched) empirical + distributions + + .. math: + OT_{loss} = \int_0^1 |cdf_u^{-1}(q) cdf_v^{-1}(q)|^p dq + + It is formally the p-Wasserstein distance raised to the power p. + We do so in a vectorized way by first building the individual quantile functions then integrating them. + + This function should be preferred to `emd_1d` whenever the backend is + different to numpy, and when gradients over + either sample positions or weights are required. + + Parameters + ---------- + u_values: array-like, shape (n, ...) + locations of the first empirical distribution + v_values: array-like, shape (m, ...) + locations of the second empirical distribution + u_weights: array-like, shape (n, ...), optional + weights of the first empirical distribution, if None then uniform weights are used + v_weights: array-like, shape (m, ...), optional + weights of the second empirical distribution, if None then uniform weights are used + p: int, optional + order of the ground metric used, should be at least 1 (see [2, Chap. 2], default is 1 + require_sort: bool, optional + sort the distributions atoms locations, if False we will consider they have been sorted prior to being passed to + the function, default is True + + Returns + ------- + cost: float/array-like, shape (...) + the batched EMD + + References + ---------- + .. [15] Peyré, G., & Cuturi, M. (2018). Computational Optimal Transport. + + """ + + assert p >= 1, "The OT loss is only valid for p>=1, {p} was given".format(p=p) + + if u_weights is not None and v_weights is not None: + nx = get_backend(u_values, v_values, u_weights, v_weights) + else: + nx = get_backend(u_values, v_values) + + n = u_values.shape[0] + m = v_values.shape[0] + + if u_weights is None: + u_weights = nx.full(u_values.shape, 1. / n) + elif u_weights.ndim != u_values.ndim: + u_weights = nx.repeat(u_weights[..., None], u_values.shape[-1], -1) + if v_weights is None: + v_weights = nx.full(v_values.shape, 1. / m) + elif v_weights.ndim != v_values.ndim: + v_weights = nx.repeat(v_weights[..., None], v_values.shape[-1], -1) + + if require_sort: + u_sorter = nx.argsort(u_values, 0) + u_values = nx.take_along_axis(u_values, u_sorter, 0) + + v_sorter = nx.argsort(v_values, 0) + v_values = nx.take_along_axis(v_values, v_sorter, 0) + + u_weights = nx.take_along_axis(u_weights, u_sorter, 0) + v_weights = nx.take_along_axis(v_weights, v_sorter, 0) + + u_cumweights = nx.cumsum(u_weights, 0) + v_cumweights = nx.cumsum(v_weights, 0) + + qs = nx.sort(nx.concatenate((u_cumweights, v_cumweights), 0), 0) + u_quantiles = quantile_function(qs, u_cumweights, u_values) + v_quantiles = quantile_function(qs, v_cumweights, v_values) + qs = nx.zero_pad(qs, pad_width=[(1, 0)] + (qs.ndim - 1) * [(0, 0)]) + delta = qs[1:, ...] - qs[:-1, ...] + diff_quantiles = nx.abs(u_quantiles - v_quantiles) + + if p == 1: + return nx.sum(delta * nx.abs(diff_quantiles), axis=0) + return nx.sum(delta * nx.power(diff_quantiles, p), axis=0) + + +def emd_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True, + log=False): + r"""Solves the Earth Movers distance problem between 1d measures and returns + the OT matrix + + + .. math:: + \gamma = arg\min_\gamma \sum_i \sum_j \gamma_{ij} d(x_a[i], x_b[j]) + + s.t. \gamma 1 = a, + \gamma^T 1= b, + \gamma\geq 0 + where : + + - d is the metric + - x_a and x_b are the samples + - a and b are the sample weights + + When 'minkowski' is used as a metric, :math:`d(x, y) = |x - y|^p`. + + Uses the algorithm detailed in [1]_ + + Parameters + ---------- + x_a : (ns,) or (ns, 1) ndarray, float64 + Source dirac locations (on the real line) + x_b : (nt,) or (ns, 1) ndarray, float64 + Target dirac locations (on the real line) + a : (ns,) ndarray, float64, optional + Source histogram (default is uniform weight) + b : (nt,) ndarray, float64, optional + Target histogram (default is uniform weight) + metric: str, optional (default='sqeuclidean') + Metric to be used. Only strings listed in :func:`ot.dist` are accepted. + Due to implementation details, this function runs faster when + `'sqeuclidean'`, `'cityblock'`, or `'euclidean'` metrics are used. + p: float, optional (default=1.0) + The p-norm to apply for if metric='minkowski' + dense: boolean, optional (default=True) + If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt). + Otherwise returns a sparse representation using scipy's `coo_matrix` + format. Due to implementation details, this function runs faster when + `'sqeuclidean'`, `'minkowski'`, `'cityblock'`, or `'euclidean'` metrics + are used. + log: boolean, optional (default=False) + If True, returns a dictionary containing the cost. + Otherwise returns only the optimal transportation matrix. + + Returns + ------- + gamma: (ns, nt) ndarray + Optimal transportation matrix for the given parameters + log: dict + If input log is True, a dictionary containing the cost + + + Examples + -------- + + Simple example with obvious solution. The function emd_1d accepts lists and + performs automatic conversion to numpy arrays + + >>> import ot + >>> a=[.5, .5] + >>> b=[.5, .5] + >>> x_a = [2., 0.] + >>> x_b = [0., 3.] + >>> ot.emd_1d(x_a, x_b, a, b) + array([[0. , 0.5], + [0.5, 0. ]]) + >>> ot.emd_1d(x_a, x_b) + array([[0. , 0.5], + [0.5, 0. ]]) + + References + ---------- + + .. [1] Peyré, G., & Cuturi, M. (2017). "Computational Optimal + Transport", 2018. + + See Also + -------- + ot.lp.emd : EMD for multidimensional distributions + ot.lp.emd2_1d : EMD for 1d distributions (returns cost instead of the + transportation matrix) + """ + a, b, x_a, x_b = list_to_array(a, b, x_a, x_b) + nx = get_backend(x_a, x_b) + + assert (x_a.ndim == 1 or x_a.ndim == 2 and x_a.shape[1] == 1), \ + "emd_1d should only be used with monodimensional data" + assert (x_b.ndim == 1 or x_b.ndim == 2 and x_b.shape[1] == 1), \ + "emd_1d should only be used with monodimensional data" + + # if empty array given then use uniform distributions + if a is None or a.ndim == 0 or len(a) == 0: + a = nx.ones((x_a.shape[0],), type_as=x_a) / x_a.shape[0] + if b is None or b.ndim == 0 or len(b) == 0: + b = nx.ones((x_b.shape[0],), type_as=x_b) / x_b.shape[0] + + # ensure that same mass + np.testing.assert_almost_equal( + nx.to_numpy(nx.sum(a, axis=0)), + nx.to_numpy(nx.sum(b, axis=0)), + err_msg='a and b vector must have the same sum' + ) + b = b * nx.sum(a) / nx.sum(b) + + x_a_1d = nx.reshape(x_a, (-1,)) + x_b_1d = nx.reshape(x_b, (-1,)) + perm_a = nx.argsort(x_a_1d) + perm_b = nx.argsort(x_b_1d) + + G_sorted, indices, cost = emd_1d_sorted( + nx.to_numpy(a[perm_a]).astype(np.float64), + nx.to_numpy(b[perm_b]).astype(np.float64), + nx.to_numpy(x_a_1d[perm_a]).astype(np.float64), + nx.to_numpy(x_b_1d[perm_b]).astype(np.float64), + metric=metric, p=p + ) + + G = nx.coo_matrix( + G_sorted, + perm_a[indices[:, 0]], + perm_b[indices[:, 1]], + shape=(a.shape[0], b.shape[0]), + type_as=x_a + ) + if dense: + G = nx.todense(G) + elif str(nx) == "jax": + warnings.warn("JAX does not support sparse matrices, converting to dense") + if log: + log = {'cost': nx.from_numpy(cost, type_as=x_a)} + return G, log + return G + + +def emd2_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True, + log=False): + r"""Solves the Earth Movers distance problem between 1d measures and returns + the loss + + + .. math:: + \gamma = arg\min_\gamma \sum_i \sum_j \gamma_{ij} d(x_a[i], x_b[j]) + + s.t. \gamma 1 = a, + \gamma^T 1= b, + \gamma\geq 0 + where : + + - d is the metric + - x_a and x_b are the samples + - a and b are the sample weights + + When 'minkowski' is used as a metric, :math:`d(x, y) = |x - y|^p`. + + Uses the algorithm detailed in [1]_ + + Parameters + ---------- + x_a : (ns,) or (ns, 1) ndarray, float64 + Source dirac locations (on the real line) + x_b : (nt,) or (ns, 1) ndarray, float64 + Target dirac locations (on the real line) + a : (ns,) ndarray, float64, optional + Source histogram (default is uniform weight) + b : (nt,) ndarray, float64, optional + Target histogram (default is uniform weight) + metric: str, optional (default='sqeuclidean') + Metric to be used. Only strings listed in :func:`ot.dist` are accepted. + Due to implementation details, this function runs faster when + `'sqeuclidean'`, `'minkowski'`, `'cityblock'`, or `'euclidean'` metrics + are used. + p: float, optional (default=1.0) + The p-norm to apply for if metric='minkowski' + dense: boolean, optional (default=True) + If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt). + Otherwise returns a sparse representation using scipy's `coo_matrix` + format. Only used if log is set to True. Due to implementation details, + this function runs faster when dense is set to False. + log: boolean, optional (default=False) + If True, returns a dictionary containing the transportation matrix. + Otherwise returns only the loss. + + Returns + ------- + loss: float + Cost associated to the optimal transportation + log: dict + If input log is True, a dictionary containing the Optimal transportation + matrix for the given parameters + + + Examples + -------- + + Simple example with obvious solution. The function emd2_1d accepts lists and + performs automatic conversion to numpy arrays + + >>> import ot + >>> a=[.5, .5] + >>> b=[.5, .5] + >>> x_a = [2., 0.] + >>> x_b = [0., 3.] + >>> ot.emd2_1d(x_a, x_b, a, b) + 0.5 + >>> ot.emd2_1d(x_a, x_b) + 0.5 + + References + ---------- + + .. [1] Peyré, G., & Cuturi, M. (2017). "Computational Optimal + Transport", 2018. + + See Also + -------- + ot.lp.emd2 : EMD for multidimensional distributions + ot.lp.emd_1d : EMD for 1d distributions (returns the transportation matrix + instead of the cost) + """ + # If we do not return G (log==False), then we should not to cast it to dense + # (useless overhead) + G, log_emd = emd_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric=metric, p=p, + dense=dense and log, log=True) + cost = log_emd['cost'] + if log: + log_emd = {'G': G} + return cost, log_emd + return cost |