summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorkguerda-idris <84066930+kguerda-idris@users.noreply.github.com>2021-09-29 15:29:31 +0200
committerGitHub <noreply@github.com>2021-09-29 15:29:31 +0200
commit1c7e7ce2da8bb362c184fb6eae71fe7e36356494 (patch)
tree92fdc31870b6d5384c8ba83ff72d85a2d5a1eee6
parent7dde9e8e4b6aae756e103d49198caaa4f24150e3 (diff)
[MRG] OpenMP support (#260)
* Added : OpenMP support Restored : Epsilon and Debug mode Replaced : parmap => multiprocessing is now replace by multithreading * Commit clean up * Number of CPUs correctly calculated on SLURM clusters * Corrected number of processes for cluster slurm * Mistake corrected * parmap is now deprecated * Now a different solver is used depending on the requested number of threads * Tiny mistake corrected * Folders are now in the ot library instead of at the root * Helpers is now correctly placed * Attempt to make compilation work smoothly * OS compatible path * NumThreads now defaults to 1 * Better flags * Mistake corrected in case of OpenMP unavailability * Revert OpenMP flags modification, which do not compile on Windows * Test helper functions * Helpers comments * Documentation update * File title corrected * Warning no longer using print * Last attempt for macos compilation * pls work * atempt * solving a type error * TypeError OpenMP * Compilation finally working on Windows * Bug solve, number of threads now correctly selected * 64 bits solver to avoid overflows for bigger problems * 64 bits EMD corrected Co-authored-by: kguerda-idris <ssos023@jean-zay3.idris.fr> Co-authored-by: ncassereau-idris <84033440+ncassereau-idris@users.noreply.github.com> Co-authored-by: ncassereau <nathan.cassereau@idris.fr> Co-authored-by: RĂ©mi Flamary <remi.flamary@gmail.com>
-rw-r--r--ot/helpers/openmp_helpers.py85
-rw-r--r--ot/helpers/pre_build_helpers.py87
-rw-r--r--ot/lp/EMD.h5
-rw-r--r--ot/lp/EMD_wrapper.cpp124
-rw-r--r--ot/lp/__init__.py71
-rw-r--r--ot/lp/emd_wrap.pyx9
-rw-r--r--ot/lp/full_bipartitegraph.h27
-rw-r--r--ot/lp/full_bipartitegraph_omp.h234
-rw-r--r--ot/lp/network_simplex_simple.h210
-rw-r--r--ot/lp/network_simplex_simple_omp.h1699
-rw-r--r--ot/utils.py38
-rw-r--r--setup.py12
-rw-r--r--test/test_helpers.py26
13 files changed, 2442 insertions, 185 deletions
diff --git a/ot/helpers/openmp_helpers.py b/ot/helpers/openmp_helpers.py
new file mode 100644
index 0000000..a6ad38b
--- /dev/null
+++ b/ot/helpers/openmp_helpers.py
@@ -0,0 +1,85 @@
+"""Helpers for OpenMP support during the build."""
+
+# This code is adapted for a large part from the astropy openmp helpers, which
+# can be found at: https://github.com/astropy/extension-helpers/blob/master/extension_helpers/_openmp_helpers.py # noqa
+
+
+import os
+import sys
+import textwrap
+import subprocess
+
+from distutils.errors import CompileError, LinkError
+
+from pre_build_helpers import compile_test_program
+
+
+def get_openmp_flag(compiler):
+ """Get openmp flags for a given compiler"""
+
+ if hasattr(compiler, 'compiler'):
+ compiler = compiler.compiler[0]
+ else:
+ compiler = compiler.__class__.__name__
+
+ if sys.platform == "win32" and ('icc' in compiler or 'icl' in compiler):
+ omp_flag = ['/Qopenmp']
+ elif sys.platform == "win32":
+ omp_flag = ['/openmp']
+ elif sys.platform in ("darwin", "linux") and "icc" in compiler:
+ omp_flag = ['-qopenmp']
+ elif sys.platform == "darwin" and 'openmp' in os.getenv('CPPFLAGS', ''):
+ omp_flag = []
+ else:
+ # Default flag for GCC and clang:
+ omp_flag = ['-fopenmp']
+ if sys.platform.startswith("darwin"):
+ omp_flag += ["-Xpreprocessor", "-lomp"]
+ return omp_flag
+
+
+def check_openmp_support():
+ """Check whether OpenMP test code can be compiled and run"""
+
+ code = textwrap.dedent(
+ """\
+ #include <omp.h>
+ #include <stdio.h>
+ int main(void) {
+ #pragma omp parallel
+ printf("nthreads=%d\\n", omp_get_num_threads());
+ return 0;
+ }
+ """)
+
+ extra_preargs = os.getenv('LDFLAGS', None)
+ if extra_preargs is not None:
+ extra_preargs = extra_preargs.strip().split(" ")
+ extra_preargs = [
+ flag for flag in extra_preargs
+ if flag.startswith(('-L', '-Wl,-rpath', '-l'))]
+
+ extra_postargs = get_openmp_flag
+
+ try:
+ output, compile_flags = compile_test_program(
+ code,
+ extra_preargs=extra_preargs,
+ extra_postargs=extra_postargs
+ )
+
+ if output and 'nthreads=' in output[0]:
+ nthreads = int(output[0].strip().split('=')[1])
+ openmp_supported = len(output) == nthreads
+ elif "PYTHON_CROSSENV" in os.environ:
+ # Since we can't run the test program when cross-compiling
+ # assume that openmp is supported if the program can be
+ # compiled.
+ openmp_supported = True
+ else:
+ openmp_supported = False
+
+ except (CompileError, LinkError, subprocess.CalledProcessError):
+ openmp_supported = False
+ compile_flags = []
+ return openmp_supported, compile_flags
diff --git a/ot/helpers/pre_build_helpers.py b/ot/helpers/pre_build_helpers.py
new file mode 100644
index 0000000..93ecd6a
--- /dev/null
+++ b/ot/helpers/pre_build_helpers.py
@@ -0,0 +1,87 @@
+"""Helpers to check build environment before actual build of POT"""
+
+import os
+import sys
+import glob
+import tempfile
+import setuptools # noqa
+import subprocess
+
+from distutils.dist import Distribution
+from distutils.sysconfig import customize_compiler
+from numpy.distutils.ccompiler import new_compiler
+from numpy.distutils.command.config_compiler import config_cc
+
+
+def _get_compiler():
+ """Get a compiler equivalent to the one that will be used to build POT
+ Handles compiler specified as follows:
+ - python setup.py build_ext --compiler=<compiler>
+ - CC=<compiler> python setup.py build_ext
+ """
+ dist = Distribution({'script_name': os.path.basename(sys.argv[0]),
+ 'script_args': sys.argv[1:],
+ 'cmdclass': {'config_cc': config_cc}})
+
+ cmd_opts = dist.command_options.get('build_ext')
+ if cmd_opts is not None and 'compiler' in cmd_opts:
+ compiler = cmd_opts['compiler'][1]
+ else:
+ compiler = None
+
+ ccompiler = new_compiler(compiler=compiler)
+ customize_compiler(ccompiler)
+
+ return ccompiler
+
+
+def compile_test_program(code, extra_preargs=[], extra_postargs=[]):
+ """Check that some C code can be compiled and run"""
+ ccompiler = _get_compiler()
+
+ # extra_(pre/post)args can be a callable to make it possible to get its
+ # value from the compiler
+ if callable(extra_preargs):
+ extra_preargs = extra_preargs(ccompiler)
+ if callable(extra_postargs):
+ extra_postargs = extra_postargs(ccompiler)
+
+ start_dir = os.path.abspath('.')
+
+ with tempfile.TemporaryDirectory() as tmp_dir:
+ try:
+ os.chdir(tmp_dir)
+
+ # Write test program
+ with open('test_program.c', 'w') as f:
+ f.write(code)
+
+ os.mkdir('objects')
+
+ # Compile, test program
+ ccompiler.compile(['test_program.c'], output_dir='objects',
+ extra_postargs=extra_postargs)
+
+ # Link test program
+ objects = glob.glob(
+ os.path.join('objects', '*' + ccompiler.obj_extension))
+ ccompiler.link_executable(objects, 'test_program',
+ extra_preargs=extra_preargs,
+ extra_postargs=extra_postargs)
+
+ if "PYTHON_CROSSENV" not in os.environ:
+ # Run test program if not cross compiling
+ # will raise a CalledProcessError if return code was non-zero
+ output = subprocess.check_output('./test_program')
+ output = output.decode(
+ sys.stdout.encoding or 'utf-8').splitlines()
+ else:
+ # Return an empty output if we are cross compiling
+ # as we cannot run the test_program
+ output = []
+ except Exception:
+ raise
+ finally:
+ os.chdir(start_dir)
+
+ return output, extra_postargs
diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h
index c0fe7a3..8a1f9ac 100644
--- a/ot/lp/EMD.h
+++ b/ot/lp/EMD.h
@@ -18,19 +18,18 @@
#include <iostream>
#include <vector>
-#include "network_simplex_simple.h"
-using namespace lemon;
typedef unsigned int node_id_type;
enum ProblemType {
INFEASIBLE,
OPTIMAL,
UNBOUNDED,
- MAX_ITER_REACHED
+ MAX_ITER_REACHED
};
int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter);
+int EMD_wrap_omp(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter, int numThreads);
diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp
index bc873ed..2bdc172 100644
--- a/ot/lp/EMD_wrapper.cpp
+++ b/ot/lp/EMD_wrapper.cpp
@@ -12,16 +12,22 @@
*
*/
+
+#include "network_simplex_simple.h"
+#include "network_simplex_simple_omp.h"
#include "EMD.h"
+#include <cstdint>
int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G,
double* alpha, double* beta, double *cost, int maxIter) {
- // beware M and C anre strored in row major C style!!!
- int n, m, i, cur;
+ // beware M and C are stored in row major C style!!!
+
+ using namespace lemon;
+ int n, m, cur;
typedef FullBipartiteDigraph Digraph;
- DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
+ DIGRAPH_TYPEDEFS(Digraph);
// Get the number of non zero coordinates for r and c
n=0;
@@ -48,7 +54,7 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G,
std::vector<int> indI(n), indJ(m);
std::vector<double> weights1(n), weights2(m);
Digraph di(n, m);
- NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, maxIter);
+ NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, ((int64_t)n)*((int64_t)m), maxIter);
// Set supply and demand, don't account for 0 values (faster)
@@ -76,10 +82,12 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G,
net.supplyMap(&weights1[0], n, &weights2[0], m);
// Set the cost of each edge
+ int64_t idarc = 0;
for (int i=0; i<n; i++) {
for (int j=0; j<m; j++) {
double val=*(D+indI[i]*n2+indJ[j]);
- net.setCost(di.arcFromId(i*m+j), val);
+ net.setCost(di.arcFromId(idarc), val);
+ ++idarc;
}
}
@@ -87,12 +95,13 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G,
// Solve the problem with the network simplex algorithm
int ret=net.run();
+ int i, j;
if (ret==(int)net.OPTIMAL || ret==(int)net.MAX_ITER_REACHED) {
*cost = 0;
Arc a; di.first(a);
for (; a != INVALID; di.next(a)) {
- int i = di.source(a);
- int j = di.target(a);
+ i = di.source(a);
+ j = di.target(a);
double flow = net.flow(a);
*cost += flow * (*(D+indI[i]*n2+indJ[j-n]));
*(G+indI[i]*n2+indJ[j-n]) = flow;
@@ -106,3 +115,104 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G,
return ret;
}
+
+
+
+
+
+
+int EMD_wrap_omp(int n1, int n2, double *X, double *Y, double *D, double *G,
+ double* alpha, double* beta, double *cost, int maxIter, int numThreads) {
+ // beware M and C are stored in row major C style!!!
+
+ using namespace lemon_omp;
+ int n, m, cur;
+
+ typedef FullBipartiteDigraph Digraph;
+ DIGRAPH_TYPEDEFS(Digraph);
+
+ // Get the number of non zero coordinates for r and c
+ n=0;
+ for (int i=0; i<n1; i++) {
+ double val=*(X+i);
+ if (val>0) {
+ n++;
+ }else if(val<0){
+ return INFEASIBLE;
+ }
+ }
+ m=0;
+ for (int i=0; i<n2; i++) {
+ double val=*(Y+i);
+ if (val>0) {
+ m++;
+ }else if(val<0){
+ return INFEASIBLE;
+ }
+ }
+
+ // Define the graph
+
+ std::vector<int> indI(n), indJ(m);
+ std::vector<double> weights1(n), weights2(m);
+ Digraph di(n, m);
+ NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, ((int64_t)n)*((int64_t)m), maxIter, numThreads);
+
+ // Set supply and demand, don't account for 0 values (faster)
+
+ cur=0;
+ for (int i=0; i<n1; i++) {
+ double val=*(X+i);
+ if (val>0) {
+ weights1[ cur ] = val;
+ indI[cur++]=i;
+ }
+ }
+
+ // Demand is actually negative supply...
+
+ cur=0;
+ for (int i=0; i<n2; i++) {
+ double val=*(Y+i);
+ if (val>0) {
+ weights2[ cur ] = -val;
+ indJ[cur++]=i;
+ }
+ }
+
+
+ net.supplyMap(&weights1[0], n, &weights2[0], m);
+
+ // Set the cost of each edge
+ int64_t idarc = 0;
+ for (int i=0; i<n; i++) {
+ for (int j=0; j<m; j++) {
+ double val=*(D+indI[i]*n2+indJ[j]);
+ net.setCost(di.arcFromId(idarc), val);
+ ++idarc;
+ }
+ }
+
+
+ // Solve the problem with the network simplex algorithm
+
+ int ret=net.run();
+ int i, j;
+ if (ret==(int)net.OPTIMAL || ret==(int)net.MAX_ITER_REACHED) {
+ *cost = 0;
+ Arc a; di.first(a);
+ for (; a != INVALID; di.next(a)) {
+ i = di.source(a);
+ j = di.target(a);
+ double flow = net.flow(a);
+ *cost += flow * (*(D+indI[i]*n2+indJ[j-n]));
+ *(G+indI[i]*n2+indJ[j-n]) = flow;
+ *(alpha + indI[i]) = -net.potential(i);
+ *(beta + indJ[j-n]) = net.potential(j);
+ }
+
+ }
+
+
+ return ret;
+}
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index c8c9da6..b907b10 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -8,11 +8,13 @@ Solvers for the original linear program OT problem
#
# License: MIT License
+import os
import multiprocessing
import sys
import numpy as np
from scipy.sparse import coo_matrix
+import warnings
from . import cvx
from .cvx import barycenter
@@ -25,9 +27,28 @@ from ..backend import get_backend
__all__ = ['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
'emd_1d', 'emd2_1d', 'wasserstein_1d']
+def check_number_threads(numThreads):
+ """Checks whether or not the requested number of threads has a valid value.
+
+ Parameters
+ ----------
+ numThreads : int or str
+ The requested number of threads, should either be a strictly positive integer or "max" or None
+
+ Returns
+ -------
+ numThreads : int
+ Corrected number of threads
+ """
+ if (numThreads is None) or (isinstance(numThreads, str) and numThreads.lower() == 'max'):
+ return -1
+ if (not isinstance(numThreads, int)) or numThreads < 1:
+ raise ValueError('numThreads should either be "max" or a strictly positive integer')
+ return numThreads
+
def center_ot_dual(alpha0, beta0, a=None, b=None):
- r"""Center dual OT potentials w.r.t. theirs weights
+ r"""Center dual OT potentials w.r.t. their weights
The main idea of this function is to find unique dual potentials
that ensure some kind of centering/fairness. The main idea is to find dual potentials that lead to the same final objective value for both source and targets (see below for more details). It will help having
@@ -173,7 +194,7 @@ def estimate_dual_null_weights(alpha0, beta0, a, b, M):
return center_ot_dual(alpha, beta, a, b)
-def emd(a, b, M, numItermax=100000, log=False, center_dual=True):
+def emd(a, b, M, numItermax=100000, log=False, center_dual=True, numThreads=1):
r"""Solves the Earth Movers distance problem and returns the OT matrix
@@ -215,6 +236,9 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True):
center_dual: boolean, optional (default=True)
If True, centers the dual potential using function
:ref:`center_ot_dual`.
+ numThreads: int or "max", optional (default=1, i.e. OpenMP is not used)
+ If compiled with OpenMP, chooses the number of threads to parallelize.
+ "max" selects the highest number possible.
Returns
-------
@@ -285,7 +309,9 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True):
asel = a != 0
bsel = b != 0
- G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
+ numThreads = check_number_threads(numThreads)
+
+ G, cost, u, v, result_code = emd_c(a, b, M, numItermax, numThreads)
if center_dual:
u, v = center_ot_dual(u, v, a, b)
@@ -305,9 +331,9 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True):
return nx.from_numpy(G, type_as=M0)
-def emd2(a, b, M, processes=multiprocessing.cpu_count(),
+def emd2(a, b, M, processes=1,
numItermax=100000, log=False, return_matrix=False,
- center_dual=True):
+ center_dual=True, numThreads=1):
r"""Solves the Earth Movers distance problem and returns the loss
.. math::
@@ -336,8 +362,8 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
Target histogram (uniform weight if empty list)
M : (ns,nt) array-like, float64
Loss matrix (for numpy c-order array with type float64)
- processes : int, optional (default=nb cpu)
- Nb of processes used for multiple emd computation (not used on windows)
+ processes : int, optional (default=1)
+ Nb of processes used for multiple emd computation (deprecated)
numItermax : int, optional (default=100000)
The maximum number of iterations before stopping the optimization
algorithm if it has not converged.
@@ -349,6 +375,9 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
center_dual: boolean, optional (default=True)
If True, centers the dual potential using function
:ref:`center_ot_dual`.
+ numThreads: int or "max", optional (default=1, i.e. OpenMP is not used)
+ If compiled with OpenMP, chooses the number of threads to parallelize.
+ "max" selects the highest number possible.
Returns
-------
@@ -390,7 +419,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
a0, b0, M0 = a, b, M
nx = get_backend(M0, a0, b0)
-
+
# convert to numpy
M = nx.to_numpy(M)
a = nx.to_numpy(a)
@@ -400,10 +429,6 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
b = np.asarray(b, dtype=np.float64)
M = np.asarray(M, dtype=np.float64, order= 'C')
- # problem with pikling Forks
- if sys.platform.endswith('win32') or not nx.__name__ == 'numpy':
- processes = 1
-
# if empty array given then use uniform distributions
if len(a) == 0:
a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
@@ -415,11 +440,13 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
asel = a != 0
+ numThreads = check_number_threads(numThreads)
+
if log or return_matrix:
def f(b):
bsel = b != 0
- G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
+ G, cost, u, v, result_code = emd_c(a, b, M, numItermax, numThreads)
if center_dual:
u, v = center_ot_dual(u, v, a, b)
@@ -442,7 +469,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
else:
def f(b):
bsel = b != 0
- G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
+ G, cost, u, v, result_code = emd_c(a, b, M, numItermax, numThreads)
if center_dual:
u, v = center_ot_dual(u, v, a, b)
@@ -463,15 +490,17 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
nb = b.shape[1]
if processes > 1:
- res = parmap(f, [b[:, i].copy() for i in range(nb)], processes)
- else:
- res = list(map(f, [b[:, i].copy() for i in range(nb)]))
+ warnings.warn(
+ "The 'processes' parameter has been deprecated. "
+ "Multiprocessing should be done outside of POT."
+ )
+ res = list(map(f, [b[:, i].copy() for i in range(nb)]))
return res
def free_support_barycenter(measures_locations, measures_weights, X_init, b=None, weights=None, numItermax=100,
- stopThr=1e-7, verbose=False, log=None):
+ stopThr=1e-7, verbose=False, log=None, numThreads=1):
r"""
Solves the free support (locations of the barycenters are optimized, not the weights) Wasserstein barycenter problem (i.e. the weighted Frechet mean for the 2-Wasserstein distance), formally:
@@ -512,6 +541,10 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
Print information along iterations
log : bool, optional
record log if True
+ numThreads: int or "max", optional (default=1, i.e. OpenMP is not used)
+ If compiled with OpenMP, chooses the number of threads to parallelize.
+ "max" selects the highest number possible.
+
Returns
-------
@@ -551,7 +584,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
for (measure_locations_i, measure_weights_i, weight_i) in zip(measures_locations, measures_weights,
weights.tolist()):
M_i = dist(X, measure_locations_i)
- T_i = emd(b, measure_weights_i, M_i)
+ T_i = emd(b, measure_weights_i, M_i, numThreads=numThreads)
T_sum = T_sum + weight_i * np.reshape(1. / b, (-1, 1)) * np.matmul(T_i, measure_locations_i)
displacement_square_norm = np.sum(np.square(T_sum - X))
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index de9a700..42e08f4 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -20,6 +20,7 @@ import warnings
cdef extern from "EMD.h":
int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter) nogil
+ int EMD_wrap_omp(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter, int numThreads) nogil
cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED
@@ -38,7 +39,7 @@ def check_result(result_code):
@cython.boundscheck(False)
@cython.wraparound(False)
-def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter):
+def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter, int numThreads):
"""
Solves the Earth Movers distance problem and returns the optimal transport matrix
@@ -109,8 +110,10 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
# calling the function
with nogil:
- result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
-
+ if numThreads == 1:
+ result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
+ else:
+ result_code = EMD_wrap_omp(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter, numThreads)
return G, cost, alpha, beta, result_code
diff --git a/ot/lp/full_bipartitegraph.h b/ot/lp/full_bipartitegraph.h
index 87a1bec..713ccb5 100644
--- a/ot/lp/full_bipartitegraph.h
+++ b/ot/lp/full_bipartitegraph.h
@@ -23,10 +23,10 @@
*
*/
-#ifndef LEMON_FULL_BIPARTITE_GRAPH_H
-#define LEMON_FULL_BIPARTITE_GRAPH_H
+#pragma once
#include "core.h"
+#include <cstdint>
///\ingroup graphs
///\file
@@ -44,16 +44,16 @@ namespace lemon {
//class Node;
typedef int Node;
//class Arc;
- typedef long long Arc;
+ typedef int64_t Arc;
protected:
int _node_num;
- long long _arc_num;
+ int64_t _arc_num;
FullBipartiteDigraphBase() {}
- void construct(int n1, int n2) { _node_num = n1+n2; _arc_num = n1 * n2; _n1=n1; _n2=n2;}
+ void construct(int n1, int n2) { _node_num = n1+n2; _arc_num = (int64_t)n1 * (int64_t)n2; _n1=n1; _n2=n2;}
public:
@@ -65,25 +65,25 @@ namespace lemon {
Arc arc(const Node& s, const Node& t) const {
if (s<_n1 && t>=_n1)
- return Arc(s * _n2 + (t-_n1) );
+ return Arc((int64_t)s * (int64_t)_n2 + (int64_t)(t-_n1) );
else
return Arc(-1);
}
int nodeNum() const { return _node_num; }
- long long arcNum() const { return _arc_num; }
+ int64_t arcNum() const { return _arc_num; }
int maxNodeId() const { return _node_num - 1; }
- long long maxArcId() const { return _arc_num - 1; }
+ int64_t maxArcId() const { return _arc_num - 1; }
Node source(Arc arc) const { return arc / _n2; }
Node target(Arc arc) const { return (arc % _n2) + _n1; }
static int id(Node node) { return node; }
- static long long id(Arc arc) { return arc; }
+ static int64_t id(Arc arc) { return arc; }
static Node nodeFromId(int id) { return Node(id);}
- static Arc arcFromId(int id) { return Arc(id);}
+ static Arc arcFromId(int64_t id) { return Arc(id);}
Arc findArc(Node s, Node t, Arc prev = -1) const {
@@ -136,7 +136,7 @@ namespace lemon {
///
/// \brief A directed full graph class.
///
- /// FullBipartiteDigraph is a simple and fast implmenetation of directed full
+ /// FullBipartiteDigraph is a simple and fast implementation of directed full
/// (complete) graphs. It contains an arc from each node to each node
/// (including a loop for each node), therefore the number of arcs
/// is the square of the number of nodes.
@@ -203,13 +203,10 @@ namespace lemon {
/// \brief Number of nodes.
int nodeNum() const { return Parent::nodeNum(); }
/// \brief Number of arcs.
- long long arcNum() const { return Parent::arcNum(); }
+ int64_t arcNum() const { return Parent::arcNum(); }
};
} //namespace lemon
-
-
-#endif //LEMON_FULL_GRAPH_H
diff --git a/ot/lp/full_bipartitegraph_omp.h b/ot/lp/full_bipartitegraph_omp.h
new file mode 100644
index 0000000..8cbed0b
--- /dev/null
+++ b/ot/lp/full_bipartitegraph_omp.h
@@ -0,0 +1,234 @@
+/* -*- mode: C++; indent-tabs-mode: nil; -*-
+ *
+ * This file has been adapted by Nicolas Bonneel (2013),
+ * from full_graph.h from LEMON, a generic C++ optimization library,
+ * to implement a lightweight fully connected bipartite graph. A previous
+ * version of this file is used as part of the Displacement Interpolation
+ * project,
+ * Web: http://www.cs.ubc.ca/labs/imager/tr/2011/DisplacementInterpolation/
+ *
+ *
+ **** Original file Copyright Notice :
+ * Copyright (C) 2003-2010
+ * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport
+ * (Egervary Research Group on Combinatorial Optimization, EGRES).
+ *
+ * Permission to use, modify and distribute this software is granted
+ * provided that this copyright notice appears in all copies. For
+ * precise terms see the accompanying LICENSE file.
+ *
+ * This software is provided "AS IS" with no warranty of any kind,
+ * express or implied, and with no claim as to its suitability for any
+ * purpose.
+ *
+ */
+
+#pragma once
+
+#include <cstdint>
+
+///\ingroup graphs
+///\file
+///\brief FullBipartiteDigraph and FullBipartiteGraph classes.
+
+
+namespace lemon_omp {
+
+ ///This \c \#define creates convenient type definitions for the following
+ ///types of \c Digraph: \c Node, \c NodeIt, \c Arc, \c ArcIt, \c InArcIt,
+ ///\c OutArcIt, \c BoolNodeMap, \c IntNodeMap, \c DoubleNodeMap,
+ ///\c BoolArcMap, \c IntArcMap, \c DoubleArcMap.
+ ///
+ ///\note If the graph type is a dependent type, ie. the graph type depend
+ ///on a template parameter, then use \c TEMPLATE_DIGRAPH_TYPEDEFS()
+ ///macro.
+#define DIGRAPH_TYPEDEFS(Digraph) \
+ typedef Digraph::Node Node; \
+ typedef Digraph::Arc Arc; \
+
+
+ ///Create convenience typedefs for the digraph types and iterators
+
+ ///\see DIGRAPH_TYPEDEFS
+ ///
+ ///\note Use this macro, if the graph type is a dependent type,
+ ///ie. the graph type depend on a template parameter.
+#define TEMPLATE_DIGRAPH_TYPEDEFS(Digraph) \
+ typedef typename Digraph::Node Node; \
+ typedef typename Digraph::Arc Arc; \
+
+
+ class FullBipartiteDigraphBase {
+ public:
+
+ typedef FullBipartiteDigraphBase Digraph;
+
+ //class Node;
+ typedef int Node;
+ //class Arc;
+ typedef int64_t Arc;
+
+ protected:
+
+ int _node_num;
+ int64_t _arc_num;
+
+ FullBipartiteDigraphBase() {}
+
+ void construct(int n1, int n2) { _node_num = n1+n2; _arc_num = (int64_t)n1 * (int64_t)n2; _n1=n1; _n2=n2;}
+
+ public:
+
+ int _n1, _n2;
+
+
+ Node operator()(int ix) const { return Node(ix); }
+ static int index(const Node& node) { return node; }
+
+ Arc arc(const Node& s, const Node& t) const {
+ if (s<_n1 && t>=_n1)
+ return Arc((int64_t)s * (int64_t)_n2 + (int64_t)(t-_n1) );
+ else
+ return Arc(-1);
+ }
+
+ int nodeNum() const { return _node_num; }
+ int64_t arcNum() const { return _arc_num; }
+
+ int maxNodeId() const { return _node_num - 1; }
+ int64_t maxArcId() const { return _arc_num - 1; }
+
+ Node source(Arc arc) const { return arc / _n2; }
+ Node target(Arc arc) const { return (arc % _n2) + _n1; }
+
+ static int id(Node node) { return node; }
+ static int64_t id(Arc arc) { return arc; }
+
+ static Node nodeFromId(int id) { return Node(id);}
+ static Arc arcFromId(int64_t id) { return Arc(id);}
+
+
+ Arc findArc(Node s, Node t, Arc prev = -1) const {
+ return prev == -1 ? arc(s, t) : -1;
+ }
+
+ void first(Node& node) const {
+ node = _node_num - 1;
+ }
+
+ static void next(Node& node) {
+ --node;
+ }
+
+ void first(Arc& arc) const {
+ arc = _arc_num - 1;
+ }
+
+ static void next(Arc& arc) {
+ --arc;
+ }
+
+ void firstOut(Arc& arc, const Node& node) const {
+ if (node>=_n1)
+ arc = -1;
+ else
+ arc = (node + 1) * _n2 - 1;
+ }
+
+ void nextOut(Arc& arc) const {
+ if (arc % _n2 == 0) arc = 0;
+ --arc;
+ }
+
+ void firstIn(Arc& arc, const Node& node) const {
+ if (node<_n1)
+ arc = -1;
+ else
+ arc = _arc_num + node - _node_num;
+ }
+
+ void nextIn(Arc& arc) const {
+ arc -= _n2;
+ if (arc < 0) arc = -1;
+ }
+
+ };
+
+ /// \ingroup graphs
+ ///
+ /// \brief A directed full graph class.
+ ///
+ /// FullBipartiteDigraph is a simple and fast implmenetation of directed full
+ /// (complete) graphs. It contains an arc from each node to each node
+ /// (including a loop for each node), therefore the number of arcs
+ /// is the square of the number of nodes.
+ /// This class is completely static and it needs constant memory space.
+ /// Thus you can neither add nor delete nodes or arcs, however
+ /// the structure can be resized using resize().
+ ///
+ /// This type fully conforms to the \ref concepts::Digraph "Digraph concept".
+ /// Most of its member functions and nested classes are documented
+ /// only in the concept class.
+ ///
+ /// This class provides constant time counting for nodes and arcs.
+ ///
+ /// \note FullBipartiteDigraph and FullBipartiteGraph classes are very similar,
+ /// but there are two differences. While this class conforms only
+ /// to the \ref concepts::Digraph "Digraph" concept, FullBipartiteGraph
+ /// conforms to the \ref concepts::Graph "Graph" concept,
+ /// moreover FullBipartiteGraph does not contain a loop for each
+ /// node as this class does.
+ ///
+ /// \sa FullBipartiteGraph
+ class FullBipartiteDigraph : public FullBipartiteDigraphBase {
+ typedef FullBipartiteDigraphBase Parent;
+
+ public:
+
+ /// \brief Default constructor.
+ ///
+ /// Default constructor. The number of nodes and arcs will be zero.
+ FullBipartiteDigraph() { construct(0,0); }
+
+ /// \brief Constructor
+ ///
+ /// Constructor.
+ /// \param n The number of the nodes.
+ FullBipartiteDigraph(int n1, int n2) { construct(n1, n2); }
+
+
+ /// \brief Returns the node with the given index.
+ ///
+ /// Returns the node with the given index. Since this structure is
+ /// completely static, the nodes can be indexed with integers from
+ /// the range <tt>[0..nodeNum()-1]</tt>.
+ /// The index of a node is the same as its ID.
+ /// \sa index()
+ Node operator()(int ix) const { return Parent::operator()(ix); }
+
+ /// \brief Returns the index of the given node.
+ ///
+ /// Returns the index of the given node. Since this structure is
+ /// completely static, the nodes can be indexed with integers from
+ /// the range <tt>[0..nodeNum()-1]</tt>.
+ /// The index of a node is the same as its ID.
+ /// \sa operator()()
+ static int index(const Node& node) { return Parent::index(node); }
+
+ /// \brief Returns the arc connecting the given nodes.
+ ///
+ /// Returns the arc connecting the given nodes.
+ /*Arc arc(Node u, Node v) const {
+ return Parent::arc(u, v);
+ }*/
+
+ /// \brief Number of nodes.
+ int nodeNum() const { return Parent::nodeNum(); }
+ /// \brief Number of arcs.
+ int64_t arcNum() const { return Parent::arcNum(); }
+ };
+
+
+
+
+} //namespace lemon_omp
diff --git a/ot/lp/network_simplex_simple.h b/ot/lp/network_simplex_simple.h
index 630b595..3b46b9b 100644
--- a/ot/lp/network_simplex_simple.h
+++ b/ot/lp/network_simplex_simple.h
@@ -25,15 +25,17 @@
*
*/
-#ifndef LEMON_NETWORK_SIMPLEX_SIMPLE_H
-#define LEMON_NETWORK_SIMPLEX_SIMPLE_H
+#pragma once
+#undef DEBUG_LVL
#define DEBUG_LVL 0
#if DEBUG_LVL>0
#include <iomanip>
#endif
-
+#undef EPSILON
+#undef _EPSILON
+#undef MAX_DEBUG_ITER
#define EPSILON 2.2204460492503131e-15
#define _EPSILON 1e-8
#define MAX_DEBUG_ITER 100000
@@ -50,6 +52,7 @@
#include <vector>
#include <limits>
#include <algorithm>
+#include <iostream>
#include <cstdio>
#ifdef HASHMAP
#include <hash_map>
@@ -63,6 +66,8 @@
//#include "sparse_array_n.h"
#include "full_bipartitegraph.h"
+#undef INVALIDNODE
+#undef INVALID
#define INVALIDNODE -1
#define INVALID (-1)
@@ -76,16 +81,16 @@ namespace lemon {
class SparseValueVector
{
public:
- SparseValueVector(int n=0)
+ SparseValueVector(size_t n=0)
{
}
- void resize(int n=0){};
- T operator[](const int id) const
+ void resize(size_t n=0){};
+ T operator[](const size_t id) const
{
#ifdef HASHMAP
- typename stdext::hash_map<int,T>::const_iterator it = data.find(id);
+ typename stdext::hash_map<size_t,T>::const_iterator it = data.find(id);
#else
- typename std::map<int,T>::const_iterator it = data.find(id);
+ typename std::map<size_t,T>::const_iterator it = data.find(id);
#endif
if (it==data.end())
return 0;
@@ -93,16 +98,16 @@ namespace lemon {
return it->second;
}
- ProxyObject<T> operator[](const int id)
+ ProxyObject<T> operator[](const size_t id)
{
return ProxyObject<T>( this, id );
}
//private:
#ifdef HASHMAP
- stdext::hash_map<int,T> data;
+ stdext::hash_map<size_t,T> data;
#else
- std::map<int,T> data;
+ std::map<size_t,T> data;
#endif
};
@@ -110,7 +115,7 @@ namespace lemon {
template <typename T>
class ProxyObject {
public:
- ProxyObject( SparseValueVector<T> *v, int idx ){_v=v; _idx=idx;};
+ ProxyObject( SparseValueVector<T> *v, size_t idx ){_v=v; _idx=idx;};
ProxyObject<T> & operator=( const T &v ) {
// If we get here, we know that operator[] was called to perform a write access,
// so we can insert an item in the vector if needed
@@ -123,9 +128,9 @@ namespace lemon {
// If we get here, we know that operator[] was called to perform a read access,
// so we can simply return the existing object
#ifdef HASHMAP
- typename stdext::hash_map<int,T>::iterator it = _v->data.find(_idx);
+ typename stdext::hash_map<size_t,T>::iterator it = _v->data.find(_idx);
#else
- typename std::map<int,T>::iterator it = _v->data.find(_idx);
+ typename std::map<size_t,T>::iterator it = _v->data.find(_idx);
#endif
if (it==_v->data.end())
return 0;
@@ -137,9 +142,9 @@ namespace lemon {
{
if (val==0) return;
#ifdef HASHMAP
- typename stdext::hash_map<int,T>::iterator it = _v->data.find(_idx);
+ typename stdext::hash_map<size_t,T>::iterator it = _v->data.find(_idx);
#else
- typename std::map<int,T>::iterator it = _v->data.find(_idx);
+ typename std::map<size_t,T>::iterator it = _v->data.find(_idx);
#endif
if (it==_v->data.end())
_v->data[_idx] = val;
@@ -156,9 +161,9 @@ namespace lemon {
{
if (val==0) return;
#ifdef HASHMAP
- typename stdext::hash_map<int,T>::iterator it = _v->data.find(_idx);
+ typename stdext::hash_map<size_t,T>::iterator it = _v->data.find(_idx);
#else
- typename std::map<int,T>::iterator it = _v->data.find(_idx);
+ typename std::map<size_t,T>::iterator it = _v->data.find(_idx);
#endif
if (it==_v->data.end())
_v->data[_idx] = -val;
@@ -173,7 +178,7 @@ namespace lemon {
}
SparseValueVector<T> *_v;
- int _idx;
+ size_t _idx;
};
@@ -204,7 +209,7 @@ namespace lemon {
///
/// \tparam GR The digraph type the algorithm runs on.
/// \tparam V The number type used for flow amounts, capacity bounds
- /// and supply values in the algorithm. By default, it is \c int.
+ /// and supply values in the algorithm. By default, it is \c int64_t.
/// \tparam C The number type used for costs and potentials in the
/// algorithm. By default, it is the same as \c V.
///
@@ -214,7 +219,7 @@ namespace lemon {
/// \note %NetworkSimplexSimple provides five different pivot rule
/// implementations, from which the most efficient one is used
/// by default. For more information, see \ref PivotRule.
- template <typename GR, typename V = int, typename C = V, typename NodesType = unsigned short int>
+ template <typename GR, typename V = int, typename C = V, typename NodesType = unsigned short int, typename ArcsType = int64_t>
class NetworkSimplexSimple
{
public:
@@ -228,7 +233,7 @@ namespace lemon {
/// mixed order in the internal data structure.
/// In special cases, it could lead to better overall performance,
/// but it is usually slower. Therefore it is disabled by default.
- NetworkSimplexSimple(const GR& graph, bool arc_mixing, int nbnodes, long long nb_arcs,int maxiters) :
+ NetworkSimplexSimple(const GR& graph, bool arc_mixing, int nbnodes, ArcsType nb_arcs, size_t maxiters) :
_graph(graph), //_arc_id(graph),
_arc_mixing(arc_mixing), _init_nb_nodes(nbnodes), _init_nb_arcs(nb_arcs),
MAX(std::numeric_limits<Value>::max()),
@@ -288,11 +293,11 @@ namespace lemon {
private:
- int max_iter;
+ size_t max_iter;
TEMPLATE_DIGRAPH_TYPEDEFS(GR);
typedef std::vector<int> IntVector;
- typedef std::vector<NodesType> UHalfIntVector;
+ typedef std::vector<ArcsType> ArcVector;
typedef std::vector<Value> ValueVector;
typedef std::vector<Cost> CostVector;
// typedef SparseValueVector<Cost> CostVector;
@@ -315,9 +320,9 @@ namespace lemon {
// Data related to the underlying digraph
const GR &_graph;
int _node_num;
- int _arc_num;
- int _all_arc_num;
- int _search_arc_num;
+ ArcsType _arc_num;
+ ArcsType _all_arc_num;
+ ArcsType _search_arc_num;
// Parameters of the problem
SupplyType _stype;
@@ -325,9 +330,9 @@ namespace lemon {
inline int _node_id(int n) const {return _node_num-n-1;} ;
- //IntArcMap _arc_id;
- UHalfIntVector _source;
- UHalfIntVector _target;
+// IntArcMap _arc_id;
+ IntVector _source; // keep nodes as integers
+ IntVector _target;
bool _arc_mixing;
public:
// Node and arc data
@@ -341,7 +346,7 @@ namespace lemon {
private:
// Data for storing the spanning tree structure
IntVector _parent;
- IntVector _pred;
+ ArcVector _pred;
IntVector _thread;
IntVector _rev_thread;
IntVector _succ_num;
@@ -349,17 +354,17 @@ namespace lemon {
IntVector _dirty_revs;
BoolVector _forward;
StateVector _state;
- int _root;
+ ArcsType _root;
// Temporary data used in the current pivot iteration
- int in_arc, join, u_in, v_in, u_out, v_out;
- int first, second, right, last;
- int stem, par_stem, new_stem;
+ ArcsType in_arc, join, u_in, v_in, u_out, v_out;
+ ArcsType first, second, right, last;
+ ArcsType stem, par_stem, new_stem;
Value delta;
const Value MAX;
- int mixingCoeff;
+ ArcsType mixingCoeff;
public:
@@ -373,27 +378,27 @@ namespace lemon {
private:
// thank you to DVK and MizardX from StackOverflow for this function!
- inline int sequence(int k) const {
- int smallv = (k > num_total_big_subsequence_numbers) & 1;
+ inline ArcsType sequence(ArcsType k) const {
+ ArcsType smallv = (k > num_total_big_subsequence_numbers) & 1;
k -= num_total_big_subsequence_numbers * smallv;
- int subsequence_length2 = subsequence_length- smallv;
- int subsequence_num = (k / subsequence_length2) + num_big_subseqiences * smallv;
- int subsequence_offset = (k % subsequence_length2) * mixingCoeff;
+ ArcsType subsequence_length2 = subsequence_length- smallv;
+ ArcsType subsequence_num = (k / subsequence_length2) + num_big_subseqiences * smallv;
+ ArcsType subsequence_offset = (k % subsequence_length2) * mixingCoeff;
return subsequence_offset + subsequence_num;
}
- int subsequence_length;
- int num_big_subseqiences;
- int num_total_big_subsequence_numbers;
+ ArcsType subsequence_length;
+ ArcsType num_big_subseqiences;
+ ArcsType num_total_big_subsequence_numbers;
- inline int getArcID(const Arc &arc) const
+ inline ArcsType getArcID(const Arc &arc) const
{
//int n = _arc_num-arc._id-1;
- int n = _arc_num-GR::id(arc)-1;
+ ArcsType n = _arc_num-GR::id(arc)-1;
- //int a = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff;
- //int b = _arc_id[arc];
+ //ArcsType a = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff;
+ //ArcsType b = _arc_id[arc];
if (_arc_mixing)
return sequence(n);
else
@@ -401,16 +406,16 @@ namespace lemon {
}
// finally unused because too slow
- inline int getSource(const int arc) const
+ inline ArcsType getSource(const ArcsType arc) const
{
- //int a = _source[arc];
+ //ArcsType a = _source[arc];
//return a;
- int n = _arc_num-arc-1;
+ ArcsType n = _arc_num-arc-1;
if (_arc_mixing)
n = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff;
- int b;
+ ArcsType b;
if (n>=0)
b = _node_id(_graph.source(GR::arcFromId( n ) ));
else
@@ -436,17 +441,17 @@ namespace lemon {
private:
// References to the NetworkSimplexSimple class
- const UHalfIntVector &_source;
- const UHalfIntVector &_target;
+ const IntVector &_source;
+ const IntVector &_target;
const CostVector &_cost;
const StateVector &_state;
const CostVector &_pi;
- int &_in_arc;
- int _search_arc_num;
+ ArcsType &_in_arc;
+ ArcsType _search_arc_num;
// Pivot rule data
- int _block_size;
- int _next_arc;
+ ArcsType _block_size;
+ ArcsType _next_arc;
NetworkSimplexSimple &_ns;
public:
@@ -460,17 +465,16 @@ namespace lemon {
{
// The main parameters of the pivot rule
const double BLOCK_SIZE_FACTOR = 1.0;
- const int MIN_BLOCK_SIZE = 10;
+ const ArcsType MIN_BLOCK_SIZE = 10;
- _block_size = std::max( int(BLOCK_SIZE_FACTOR *
- std::sqrt(double(_search_arc_num))),
- MIN_BLOCK_SIZE );
+ _block_size = std::max(ArcsType(BLOCK_SIZE_FACTOR * std::sqrt(double(_search_arc_num))), MIN_BLOCK_SIZE);
}
+
// Find next entering arc
bool findEnteringArc() {
Cost c, min = 0;
- int e;
- int cnt = _block_size;
+ ArcsType e;
+ ArcsType cnt = _block_size;
double a;
for (e = _next_arc; e != _search_arc_num; ++e) {
c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
@@ -516,7 +520,7 @@ namespace lemon {
int _init_nb_nodes;
- long long _init_nb_arcs;
+ ArcsType _init_nb_arcs;
/// \name Parameters
/// The parameters of the algorithm can be specified using these
@@ -736,7 +740,7 @@ namespace lemon {
for (int i = 0; i != _node_num; ++i) {
_supply[i] = 0;
}
- for (int i = 0; i != _arc_num; ++i) {
+ for (ArcsType i = 0; i != _arc_num; ++i) {
_cost[i] = 1;
}
_stype = GEQ;
@@ -745,7 +749,7 @@ namespace lemon {
- int divid (int x, int y)
+ int64_t divid (int64_t x, int64_t y)
{
return (x-x%y)/y;
}
@@ -775,7 +779,7 @@ namespace lemon {
_node_num = _init_nb_nodes;
_arc_num = _init_nb_arcs;
int all_node_num = _node_num + 1;
- int max_arc_num = _arc_num + 2 * _node_num;
+ ArcsType max_arc_num = _arc_num + 2 * _node_num;
_source.resize(max_arc_num);
_target.resize(max_arc_num);
@@ -798,13 +802,13 @@ namespace lemon {
//_arc_mixing=false;
if (_arc_mixing) {
// Store the arcs in a mixed order
- int k = std::max(int(std::sqrt(double(_arc_num))), 10);
+ const ArcsType k = std::max(ArcsType(std::sqrt(double(_arc_num))), ArcsType(10));
mixingCoeff = k;
subsequence_length = _arc_num / mixingCoeff + 1;
num_big_subseqiences = _arc_num % mixingCoeff;
num_total_big_subsequence_numbers = subsequence_length * num_big_subseqiences;
- int i = 0, j = 0;
+ ArcsType i = 0, j = 0;
Arc a; _graph.first(a);
for (; a != INVALID; _graph.next(a)) {
_source[i] = _node_id(_graph.source(a));
@@ -814,7 +818,7 @@ namespace lemon {
}
} else {
// Store the arcs in the original order
- int i = 0;
+ ArcsType i = 0;
Arc a; _graph.first(a);
for (; a != INVALID; _graph.next(a), ++i) {
_source[i] = _node_id(_graph.source(a));
@@ -856,7 +860,7 @@ namespace lemon {
Number totalCost() const {
Number c = 0;
for (ArcIt a(_graph); a != INVALID; ++a) {
- int i = getArcID(a);
+ int64_t i = getArcID(a);
c += Number(_flow[i]) * Number(_cost[i]);
}
return c;
@@ -867,15 +871,15 @@ namespace lemon {
Number c = 0;
/*#ifdef HASHMAP
- typename stdext::hash_map<int, Value>::const_iterator it;
+ typename stdext::hash_map<int64_t, Value>::const_iterator it;
#else
- typename std::map<int, Value>::const_iterator it;
+ typename std::map<int64_t, Value>::const_iterator it;
#endif
for (it = _flow.data.begin(); it!=_flow.data.end(); ++it)
c += Number(it->second) * Number(_cost[it->first]);
return c;*/
- for (unsigned long i=0; i<_flow.size(); i++)
+ for (ArcsType i=0; i<_flow.size(); i++)
c += _flow[i] * Number(_cost[i]);
return c;
@@ -944,14 +948,14 @@ namespace lemon {
// Initialize internal data structures
bool init() {
if (_node_num == 0) return false;
-
+
// Check the sum of supply values
_sum_supply = 0;
for (int i = 0; i != _node_num; ++i) {
_sum_supply += _supply[i];
}
if ( fabs(_sum_supply) > _EPSILON ) return false;
-
+
_sum_supply = 0;
// Initialize artifical cost
@@ -960,14 +964,14 @@ namespace lemon {
ART_COST = std::numeric_limits<Cost>::max() / 2 + 1;
} else {
ART_COST = 0;
- for (int i = 0; i != _arc_num; ++i) {
+ for (ArcsType i = 0; i != _arc_num; ++i) {
if (_cost[i] > ART_COST) ART_COST = _cost[i];
}
ART_COST = (ART_COST + 1) * _node_num;
}
// Initialize arc maps
- for (int i = 0; i != _arc_num; ++i) {
+ for (ArcsType i = 0; i != _arc_num; ++i) {
//_flow[i] = 0; //by default, the sparse matrix is empty
_state[i] = STATE_LOWER;
}
@@ -988,7 +992,7 @@ namespace lemon {
// EQ supply constraints
_search_arc_num = _arc_num;
_all_arc_num = _arc_num + _node_num;
- for (int u = 0, e = _arc_num; u != _node_num; ++u, ++e) {
+ for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) {
_parent[u] = _root;
_pred[u] = e;
_thread[u] = u + 1;
@@ -1016,8 +1020,8 @@ namespace lemon {
else if (_sum_supply > 0) {
// LEQ supply constraints
_search_arc_num = _arc_num + _node_num;
- int f = _arc_num + _node_num;
- for (int u = 0, e = _arc_num; u != _node_num; ++u, ++e) {
+ ArcsType f = _arc_num + _node_num;
+ for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) {
_parent[u] = _root;
_thread[u] = u + 1;
_rev_thread[u + 1] = u;
@@ -1054,8 +1058,8 @@ namespace lemon {
else {
// GEQ supply constraints
_search_arc_num = _arc_num + _node_num;
- int f = _arc_num + _node_num;
- for (int u = 0, e = _arc_num; u != _node_num; ++u, ++e) {
+ ArcsType f = _arc_num + _node_num;
+ for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) {
_parent[u] = _root;
_thread[u] = u + 1;
_rev_thread[u + 1] = u;
@@ -1120,9 +1124,9 @@ namespace lemon {
second = _source[in_arc];
}
delta = INF;
- int result = 0;
+ char result = 0;
Value d;
- int e;
+ ArcsType e;
// Search the cycle along the path form the first node to the root
for (int u = first; u != join; u = _parent[u]) {
@@ -1239,7 +1243,7 @@ namespace lemon {
// Update _rev_thread using the new _thread values
for (int i = 0; i != int(_dirty_revs.size()); ++i) {
- u = _dirty_revs[i];
+ int u = _dirty_revs[i];
_rev_thread[_thread[u]] = u;
}
@@ -1257,7 +1261,7 @@ namespace lemon {
u = w;
}
_pred[u_in] = in_arc;
- _forward[u_in] = ((unsigned int)u_in == _source[in_arc]);
+ _forward[u_in] = (u_in == _source[in_arc]);
_succ_num[u_in] = old_succ_num;
// Set limits for updating _last_succ form v_in and v_out
@@ -1328,7 +1332,7 @@ namespace lemon {
if (_sum_supply > 0) total -= _sum_supply;
if (total <= 0) return true;
- IntVector arc_vector;
+ ArcVector arc_vector;
if (_sum_supply >= 0) {
if (supply_nodes.size() == 1 && demand_nodes.size() == 1) {
// Perform a reverse graph search from the sink to the source
@@ -1345,7 +1349,7 @@ namespace lemon {
Arc a; _graph.firstIn(a, v);
for (; a != INVALID; _graph.nextIn(a)) {
if (reached[u = _graph.source(a)]) continue;
- int j = getArcID(a);
+ ArcsType j = getArcID(a);
if (INF >= total) {
arc_vector.push_back(j);
reached[u] = true;
@@ -1355,7 +1359,7 @@ namespace lemon {
}
} else {
// Find the min. cost incomming arc for each demand node
- for (int i = 0; i != int(demand_nodes.size()); ++i) {
+ for (int i = 0; i != demand_nodes.size(); ++i) {
Node v = demand_nodes[i];
Cost c, min_cost = std::numeric_limits<Cost>::max();
Arc min_arc = INVALID;
@@ -1393,7 +1397,7 @@ namespace lemon {
}
// Perform heuristic initial pivots
- for (int i = 0; i != int(arc_vector.size()); ++i) {
+ for (ArcsType i = 0; i != arc_vector.size(); ++i) {
in_arc = arc_vector[i];
// l'erreur est probablement ici...
if (_state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -
@@ -1423,7 +1427,7 @@ namespace lemon {
// Perform heuristic initial pivots
if (!initialPivots()) return UNBOUNDED;
- int iter_number=0;
+ size_t iter_number=0;
//pivot.setDantzig(true);
// Execute the Network Simplex algorithm
while (pivot.findEnteringArc()) {
@@ -1443,7 +1447,7 @@ namespace lemon {
double a;
a= (fabs(_pi[_source[in_arc]])>=fabs(_pi[_target[in_arc]])) ? fabs(_pi[_source[in_arc]]) : fabs(_pi[_target[in_arc]]);
a=a>=fabs(_cost[in_arc])?a:fabs(_cost[in_arc]);
- for (int i=0; i<_flow.size(); i++) {
+ for (int64_t i=0; i<_flow.size(); i++) {
sumFlow+=_state[i]*_flow[i];
}
std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << iter_number << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n";
@@ -1482,12 +1486,12 @@ namespace lemon {
double a;
a= (fabs(_pi[_source[in_arc]])>=fabs(_pi[_target[in_arc]])) ? fabs(_pi[_source[in_arc]]) : fabs(_pi[_target[in_arc]]);
a=a>=fabs(_cost[in_arc])?a:fabs(_cost[in_arc]);
- for (int i=0; i<_flow.size(); i++) {
+ for (int64_t i=0; i<_flow.size(); i++) {
sumFlow+=_state[i]*_flow[i];
}
-
+
std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << niter << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n";
-
+
std::cout << "Arc in = (" << _node_id(_source[in_arc]) << ", " << _node_id(_target[in_arc]) <<")\n";
std::cout << "Supplies = (" << _supply[_source[in_arc]] << ", " << _supply[_target[in_arc]] << ")\n";
@@ -1505,7 +1509,7 @@ namespace lemon {
#endif
// Check feasibility
if( retVal == OPTIMAL){
- for (int e = _search_arc_num; e != _all_arc_num; ++e) {
+ for (ArcsType e = _search_arc_num; e != _all_arc_num; ++e) {
if (_flow[e] != 0){
if (fabs(_flow[e]) > _EPSILON) // change of the original code following issue #126
return INFEASIBLE;
@@ -1521,20 +1525,20 @@ namespace lemon {
if (_sum_supply == 0) {
if (_stype == GEQ) {
Cost max_pot = -std::numeric_limits<Cost>::max();
- for (int i = 0; i != _node_num; ++i) {
+ for (ArcsType i = 0; i != _node_num; ++i) {
if (_pi[i] > max_pot) max_pot = _pi[i];
}
if (max_pot > 0) {
- for (int i = 0; i != _node_num; ++i)
+ for (ArcsType i = 0; i != _node_num; ++i)
_pi[i] -= max_pot;
}
} else {
Cost min_pot = std::numeric_limits<Cost>::max();
- for (int i = 0; i != _node_num; ++i) {
+ for (ArcsType i = 0; i != _node_num; ++i) {
if (_pi[i] < min_pot) min_pot = _pi[i];
}
if (min_pot < 0) {
- for (int i = 0; i != _node_num; ++i)
+ for (ArcsType i = 0; i != _node_num; ++i)
_pi[i] -= min_pot;
}
}
@@ -1548,5 +1552,3 @@ namespace lemon {
///@}
} //namespace lemon
-
-#endif //LEMON_NETWORK_SIMPLEX_H
diff --git a/ot/lp/network_simplex_simple_omp.h b/ot/lp/network_simplex_simple_omp.h
new file mode 100644
index 0000000..87e4c05
--- /dev/null
+++ b/ot/lp/network_simplex_simple_omp.h
@@ -0,0 +1,1699 @@
+/* -*- mode: C++; indent-tabs-mode: nil; -*-
+*
+*
+* This file has been adapted by Nicolas Bonneel (2013),
+* from network_simplex.h from LEMON, a generic C++ optimization library,
+* to implement a lightweight network simplex for mass transport, more
+* memory efficient than the original file. A previous version of this file
+* is used as part of the Displacement Interpolation project,
+* Web: http://www.cs.ubc.ca/labs/imager/tr/2011/DisplacementInterpolation/
+*
+* Revisions:
+* March 2015: added OpenMP parallelization
+* March 2017: included Antoine Rolet's trick to make it more robust
+* April 2018: IMPORTANT bug fix + uses 64bit integers (slightly slower but less risks of overflows), updated to a newer version of the algo by LEMON, sparse flow by default + minor edits.
+*
+*
+**** Original file Copyright Notice :
+*
+* Copyright (C) 2003-2010
+* Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport
+* (Egervary Research Group on Combinatorial Optimization, EGRES).
+*
+* Permission to use, modify and distribute this software is granted
+* provided that this copyright notice appears in all copies. For
+* precise terms see the accompanying LICENSE file.
+*
+* This software is provided "AS IS" with no warranty of any kind,
+* express or implied, and with no claim as to its suitability for any
+* purpose.
+*
+*/
+
+#pragma once
+#undef DEBUG_LVL
+#define DEBUG_LVL 0
+
+#if DEBUG_LVL>0
+#include <iomanip>
+#endif
+
+#undef EPSILON
+#undef _EPSILON
+#undef MAX_DEBUG_ITER
+#define EPSILON std::numeric_limits<Cost>::epsilon()*10
+#define _EPSILON 1e-8
+#define MAX_DEBUG_ITER 100000
+
+/// \ingroup min_cost_flow_algs
+///
+/// \file
+/// \brief Network Simplex algorithm for finding a minimum cost flow.
+
+// if your compiler has troubles with unorderedmaps, just comment the following line to use a slower std::map instead
+#define HASHMAP // now handled with unorderedmaps instead of stdext::hash_map. Should be better supported.
+
+#define SPARSE_FLOW // a sparse flow vector will be 10-15% slower for small problems but uses less memory and becomes faster for large problems (40k total nodes)
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include <iostream>
+#ifdef HASHMAP
+#include <unordered_map>
+#else
+#include <map>
+#endif
+//#include "core.h"
+//#include "lmath.h"
+
+#ifdef OMP
+#include <omp.h>
+#endif
+#include <cmath>
+
+
+//#include "sparse_array_n.h"
+#include "full_bipartitegraph_omp.h"
+
+#undef INVALIDNODE
+#undef INVALID
+#define INVALIDNODE -1
+#define INVALID (-1)
+
+namespace lemon_omp {
+
+ int64_t max_threads = -1;
+
+ template <typename T>
+ class ProxyObject;
+
+ template<typename T>
+ class SparseValueVector
+ {
+ public:
+ SparseValueVector(size_t n = 0) // parameter n for compatibility with standard vectors
+ {
+ }
+ void resize(size_t n = 0) {};
+ T operator[](const size_t id) const
+ {
+#ifdef HASHMAP
+ typename std::unordered_map<size_t, T>::const_iterator it = data.find(id);
+#else
+ typename std::map<size_t, T>::const_iterator it = data.find(id);
+#endif
+ if (it == data.end())
+ return 0;
+ else
+ return it->second;
+ }
+
+ ProxyObject<T> operator[](const size_t id)
+ {
+ return ProxyObject<T>(this, id);
+ }
+
+ //private:
+#ifdef HASHMAP
+ std::unordered_map<size_t, T> data;
+#else
+ std::map<size_t, T> data;
+#endif
+
+ };
+
+ template <typename T>
+ class ProxyObject {
+ public:
+ ProxyObject(SparseValueVector<T> *v, size_t idx) { _v = v; _idx = idx; };
+ ProxyObject<T> & operator=(const T &v) {
+ // If we get here, we know that operator[] was called to perform a write access,
+ // so we can insert an item in the vector if needed
+ if (v != 0)
+ _v->data[_idx] = v;
+ return *this;
+ }
+
+ operator T() {
+ // If we get here, we know that operator[] was called to perform a read access,
+ // so we can simply return the existing object
+#ifdef HASHMAP
+ typename std::unordered_map<size_t, T>::iterator it = _v->data.find(_idx);
+#else
+ typename std::map<size_t, T>::iterator it = _v->data.find(_idx);
+#endif
+ if (it == _v->data.end())
+ return 0;
+ else
+ return it->second;
+ }
+
+ void operator+=(T val)
+ {
+ if (val == 0) return;
+#ifdef HASHMAP
+ typename std::unordered_map<size_t, T>::iterator it = _v->data.find(_idx);
+#else
+ typename std::map<size_t, T>::iterator it = _v->data.find(_idx);
+#endif
+ if (it == _v->data.end())
+ _v->data[_idx] = val;
+ else
+ {
+ T sum = it->second + val;
+ if (sum == 0)
+ _v->data.erase(it);
+ else
+ it->second = sum;
+ }
+ }
+ void operator-=(T val)
+ {
+ if (val == 0) return;
+#ifdef HASHMAP
+ typename std::unordered_map<size_t, T>::iterator it = _v->data.find(_idx);
+#else
+ typename std::map<size_t, T>::iterator it = _v->data.find(_idx);
+#endif
+ if (it == _v->data.end())
+ _v->data[_idx] = -val;
+ else
+ {
+ T sum = it->second - val;
+ if (sum == 0)
+ _v->data.erase(it);
+ else
+ it->second = sum;
+ }
+ }
+
+ SparseValueVector<T> *_v;
+ size_t _idx;
+ };
+
+
+
+ /// \addtogroup min_cost_flow_algs
+ /// @{
+
+ /// \brief Implementation of the primal Network Simplex algorithm
+ /// for finding a \ref min_cost_flow "minimum cost flow".
+ ///
+ /// \ref NetworkSimplexSimple implements the primal Network Simplex algorithm
+ /// for finding a \ref min_cost_flow "minimum cost flow"
+ /// \ref amo93networkflows, \ref dantzig63linearprog,
+ /// \ref kellyoneill91netsimplex.
+ /// This algorithm is a highly efficient specialized version of the
+ /// linear programming simplex method directly for the minimum cost
+ /// flow problem.
+ ///
+ /// In general, %NetworkSimplexSimple is the fastest implementation available
+ /// in LEMON for this problem.
+ /// Moreover, it supports both directions of the supply/demand inequality
+ /// constraints. For more information, see \ref SupplyType.
+ ///
+ /// Most of the parameters of the problem (except for the digraph)
+ /// can be given using separate functions, and the algorithm can be
+ /// executed using the \ref run() function. If some parameters are not
+ /// specified, then default values will be used.
+ ///
+ /// \tparam GR The digraph type the algorithm runs on.
+ /// \tparam V The number type used for flow amounts, capacity bounds
+ /// and supply values in the algorithm. By default, it is \c int.
+ /// \tparam C The number type used for costs and potentials in the
+ /// algorithm. By default, it is the same as \c V.
+ ///
+ /// \warning Both number types must be signed and all input data must
+ /// be integer.
+ ///
+ /// \note %NetworkSimplexSimple provides five different pivot rule
+ /// implementations, from which the most efficient one is used
+ /// by default. For more information, see \ref PivotRule.
+ template <typename GR, typename V = int, typename C = V, typename ArcsType = int64_t>
+ class NetworkSimplexSimple
+ {
+ public:
+
+ /// \brief Constructor.
+ ///
+ /// The constructor of the class.
+ ///
+ /// \param graph The digraph the algorithm runs on.
+ /// \param arc_mixing Indicate if the arcs have to be stored in a
+ /// mixed order in the internal data structure.
+ /// In special cases, it could lead to better overall performance,
+ /// but it is usually slower. Therefore it is disabled by default.
+ NetworkSimplexSimple(const GR& graph, bool arc_mixing, int nbnodes, ArcsType nb_arcs, size_t maxiters = 0, int numThreads=-1) :
+ _graph(graph), //_arc_id(graph),
+ _arc_mixing(arc_mixing), _init_nb_nodes(nbnodes), _init_nb_arcs(nb_arcs),
+ MAX(std::numeric_limits<Value>::max()),
+ INF(std::numeric_limits<Value>::has_infinity ?
+ std::numeric_limits<Value>::infinity() : MAX)
+ {
+ // Reset data structures
+ reset();
+ max_iter = maxiters;
+#ifdef OMP
+ if (max_threads < 0) {
+ max_threads = omp_get_max_threads();
+ }
+ if (numThreads > 0 && numThreads<=max_threads){
+ num_threads = numThreads;
+ } else if (numThreads == -1 || numThreads>max_threads) {
+ num_threads = max_threads;
+ } else {
+ num_threads = 1;
+ }
+ omp_set_num_threads(num_threads);
+#else
+ num_threads = 1;
+#endif
+ }
+
+ /// The type of the flow amounts, capacity bounds and supply values
+ typedef V Value;
+ /// The type of the arc costs
+ typedef C Cost;
+
+ public:
+ /// \brief Problem type constants for the \c run() function.
+ ///
+ /// Enum type containing the problem type constants that can be
+ /// returned by the \ref run() function of the algorithm.
+ enum ProblemType {
+ /// The problem has no feasible solution (flow).
+ INFEASIBLE,
+ /// The problem has optimal solution (i.e. it is feasible and
+ /// bounded), and the algorithm has found optimal flow and node
+ /// potentials (primal and dual solutions).
+ OPTIMAL,
+ /// The objective function of the problem is unbounded, i.e.
+ /// there is a directed cycle having negative total cost and
+ /// infinite upper bound.
+ UNBOUNDED,
+ // The maximum number of iteration has been reached
+ MAX_ITER_REACHED
+ };
+
+ /// \brief Constants for selecting the type of the supply constraints.
+ ///
+ /// Enum type containing constants for selecting the supply type,
+ /// i.e. the direction of the inequalities in the supply/demand
+ /// constraints of the \ref min_cost_flow "minimum cost flow problem".
+ ///
+ /// The default supply type is \c GEQ, the \c LEQ type can be
+ /// selected using \ref supplyType().
+ /// The equality form is a special case of both supply types.
+ enum SupplyType {
+ /// This option means that there are <em>"greater or equal"</em>
+ /// supply/demand constraints in the definition of the problem.
+ GEQ,
+ /// This option means that there are <em>"less or equal"</em>
+ /// supply/demand constraints in the definition of the problem.
+ LEQ
+ };
+
+
+
+ private:
+ size_t max_iter;
+ int num_threads;
+ TEMPLATE_DIGRAPH_TYPEDEFS(GR);
+
+ typedef std::vector<int> IntVector;
+ typedef std::vector<ArcsType> ArcVector;
+ typedef std::vector<Value> ValueVector;
+ typedef std::vector<Cost> CostVector;
+ // typedef SparseValueVector<Cost> CostVector;
+ typedef std::vector<char> BoolVector;
+ // Note: vector<char> is used instead of vector<bool> for efficiency reasons
+
+ // State constants for arcs
+ enum ArcState {
+ STATE_UPPER = -1,
+ STATE_TREE = 0,
+ STATE_LOWER = 1
+ };
+
+ typedef std::vector<signed char> StateVector;
+ // Note: vector<signed char> is used instead of vector<ArcState> for
+ // efficiency reasons
+
+ private:
+
+ // Data related to the underlying digraph
+ const GR &_graph;
+ int _node_num;
+ ArcsType _arc_num;
+ ArcsType _all_arc_num;
+ ArcsType _search_arc_num;
+
+ // Parameters of the problem
+ SupplyType _stype;
+ Value _sum_supply;
+
+ inline int _node_id(int n) const { return _node_num - n - 1; };
+
+ //IntArcMap _arc_id;
+ IntVector _source; // keep nodes as integers
+ IntVector _target;
+ bool _arc_mixing;
+
+ // Node and arc data
+ CostVector _cost;
+ ValueVector _supply;
+#ifdef SPARSE_FLOW
+ SparseValueVector<Value> _flow;
+#else
+ ValueVector _flow;
+#endif
+
+ CostVector _pi;
+
+ // Data for storing the spanning tree structure
+ IntVector _parent;
+ ArcVector _pred;
+ IntVector _thread;
+ IntVector _rev_thread;
+ IntVector _succ_num;
+ IntVector _last_succ;
+ IntVector _dirty_revs;
+ BoolVector _forward;
+ StateVector _state;
+ ArcsType _root;
+
+ // Temporary data used in the current pivot iteration
+ ArcsType in_arc, join, u_in, v_in, u_out, v_out;
+ ArcsType first, second, right, last;
+ ArcsType stem, par_stem, new_stem;
+ Value delta;
+
+ const Value MAX;
+
+ ArcsType mixingCoeff;
+
+ public:
+
+ /// \brief Constant for infinite upper bounds (capacities).
+ ///
+ /// Constant for infinite upper bounds (capacities).
+ /// It is \c std::numeric_limits<Value>::infinity() if available,
+ /// \c std::numeric_limits<Value>::max() otherwise.
+ const Value INF;
+
+ private:
+
+ // thank you to DVK and MizardX from StackOverflow for this function!
+ inline ArcsType sequence(ArcsType k) const {
+ ArcsType smallv = (k > num_total_big_subsequence_numbers) & 1;
+
+ k -= num_total_big_subsequence_numbers * smallv;
+ ArcsType subsequence_length2 = subsequence_length - smallv;
+ ArcsType subsequence_num = (k / subsequence_length2) + num_big_subsequences * smallv;
+ ArcsType subsequence_offset = (k % subsequence_length2) * mixingCoeff;
+
+ return subsequence_offset + subsequence_num;
+ }
+ ArcsType subsequence_length;
+ ArcsType num_big_subsequences;
+ ArcsType num_total_big_subsequence_numbers;
+
+ inline ArcsType getArcID(const Arc &arc) const
+ {
+ //int n = _arc_num-arc._id-1;
+ ArcsType n = _arc_num - GR::id(arc) - 1;
+
+ //ArcsType a = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff;
+ //ArcsType b = _arc_id[arc];
+ if (_arc_mixing)
+ return sequence(n);
+ else
+ return n;
+ }
+
+ // finally unused because too slow
+ inline ArcsType getSource(const ArcsType arc) const
+ {
+ //ArcsType a = _source[arc];
+ //return a;
+
+ ArcsType n = _arc_num - arc - 1;
+ if (_arc_mixing)
+ n = mixingCoeff*(n%mixingCoeff) + n / mixingCoeff;
+
+ ArcsType b;
+ if (n >= 0)
+ b = _node_id(_graph.source(GR::arcFromId(n)));
+ else
+ {
+ n = arc + 1 - _arc_num;
+ if (n <= _node_num)
+ b = _node_num;
+ else
+ if (n >= _graph._n1)
+ b = _graph._n1;
+ else
+ b = _graph._n1 - n;
+ }
+
+ return b;
+ }
+
+
+
+ // Implementation of the Block Search pivot rule
+ class BlockSearchPivotRule
+ {
+ private:
+
+ // References to the NetworkSimplexSimple class
+ const IntVector &_source;
+ const IntVector &_target;
+ const CostVector &_cost;
+ const StateVector &_state;
+ const CostVector &_pi;
+ ArcsType &_in_arc;
+ ArcsType _search_arc_num;
+
+ // Pivot rule data
+ ArcsType _block_size;
+ ArcsType _next_arc;
+ NetworkSimplexSimple &_ns;
+
+ public:
+
+ // Constructor
+ BlockSearchPivotRule(NetworkSimplexSimple &ns) :
+ _source(ns._source), _target(ns._target),
+ _cost(ns._cost), _state(ns._state), _pi(ns._pi),
+ _in_arc(ns.in_arc), _search_arc_num(ns._search_arc_num),
+ _next_arc(0), _ns(ns)
+ {
+ // The main parameters of the pivot rule
+ const double BLOCK_SIZE_FACTOR = 1;
+ const ArcsType MIN_BLOCK_SIZE = 10;
+
+ _block_size = std::max(ArcsType(BLOCK_SIZE_FACTOR * std::sqrt(double(_search_arc_num))), MIN_BLOCK_SIZE);
+ }
+
+ // Find next entering arc
+ bool findEnteringArc() {
+ Cost min_val = 0;
+
+ ArcsType N = _ns.num_threads;
+
+ std::vector<Cost> minArray(N, 0);
+ std::vector<ArcsType> arcId(N);
+ ArcsType bs = (ArcsType)ceil(_block_size / (double)N);
+
+ for (ArcsType i = 0; i < _search_arc_num; i += _block_size) {
+
+ ArcsType e;
+ int j;
+#pragma omp parallel
+ {
+#ifdef OMP
+ int t = omp_get_thread_num();
+#else
+ int t = 0;
+#endif
+
+#pragma omp for schedule(static, bs) lastprivate(e)
+ for (j = 0; j < std::min(i + _block_size, _search_arc_num) - i; j++) {
+ e = (_next_arc + i + j); if (e >= _search_arc_num) e -= _search_arc_num;
+ Cost c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
+ if (c < minArray[t]) {
+ minArray[t] = c;
+ arcId[t] = e;
+ }
+ }
+ }
+ for (int j = 0; j < N; j++) {
+ if (minArray[j] < min_val) {
+ min_val = minArray[j];
+ _in_arc = arcId[j];
+ }
+ }
+ Cost a = std::abs(_pi[_source[_in_arc]]) > std::abs(_pi[_target[_in_arc]]) ? std::abs(_pi[_source[_in_arc]]) : std::abs(_pi[_target[_in_arc]]);
+ a = a > std::abs(_cost[_in_arc]) ? a : std::abs(_cost[_in_arc]);
+ if (min_val < -EPSILON*a) {
+ _next_arc = e;
+ return true;
+ }
+ }
+
+ Cost a = fabs(_pi[_source[_in_arc]]) > fabs(_pi[_target[_in_arc]]) ? fabs(_pi[_source[_in_arc]]) : fabs(_pi[_target[_in_arc]]);
+ a = a > fabs(_cost[_in_arc]) ? a : fabs(_cost[_in_arc]);
+ if (min_val >= -EPSILON*a) return false;
+
+ return true;
+ }
+
+
+ // Find next entering arc
+ /*bool findEnteringArc() {
+ Cost min_val = 0;
+ int N = omp_get_max_threads();
+ std::vector<Cost> minArray(N);
+ std::vector<ArcsType> arcId(N);
+
+ ArcsType bs = (ArcsType)ceil(_block_size / (double)N);
+ for (ArcsType i = 0; i < _search_arc_num; i += _block_size) {
+
+ ArcsType maxJ = std::min(i + _block_size, _search_arc_num) - i;
+ ArcsType j;
+#pragma omp parallel
+ {
+ int t = omp_get_thread_num();
+ Cost minV = 0;
+ ArcsType arcStart = _next_arc + i;
+ ArcsType arc = -1;
+#pragma omp for schedule(static, bs)
+ for (j = 0; j < maxJ; j++) {
+ ArcsType e = arcStart + j; if (e >= _search_arc_num) e -= _search_arc_num;
+ Cost c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
+ if (c < minV) {
+ minV = c;
+ arc = e;
+ }
+ }
+
+ minArray[t] = minV;
+ arcId[t] = arc;
+ }
+ for (int j = 0; j < N; j++) {
+ if (minArray[j] < min_val) {
+ min_val = minArray[j];
+ _in_arc = arcId[j];
+ }
+ }
+
+ //FIX by Antoine Rolet to avoid precision issues
+ Cost a = std::max(std::abs(_cost[_in_arc]), std::max(std::abs(_pi[_source[_in_arc]]), std::abs(_pi[_target[_in_arc]])));
+ if (min_val <-std::numeric_limits<Cost>::epsilon()*a) {
+ _next_arc = _next_arc + i + maxJ - 1;
+ if (_next_arc >= _search_arc_num) _next_arc -= _search_arc_num;
+ return true;
+ }
+ }
+
+ if (min_val >= 0) {
+ return false;
+ }
+
+ return true;
+ }*/
+
+
+ /*bool findEnteringArc() {
+ Cost c, min = 0;
+ int cnt = _block_size;
+ int e, min_arc = _next_arc;
+ for (e = _next_arc; e < _search_arc_num; ++e) {
+ c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
+ if (c < min) {
+ min = c;
+ min_arc = e;
+
+ }
+ if (--cnt == 0) {
+ if (min < 0) break;
+ cnt = _block_size;
+
+ }
+
+ }
+ if (min == 0 || cnt > 0) {
+ for (e = 0; e < _next_arc; ++e) {
+ c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
+ if (c < min) {
+ min = c;
+ min_arc = e;
+
+ }
+ if (--cnt == 0) {
+ if (min < 0) break;
+ cnt = _block_size;
+
+ }
+
+ }
+
+ }
+ if (min >= 0) return false;
+ _in_arc = min_arc;
+ _next_arc = e;
+ return true;
+ }*/
+
+
+
+ }; //class BlockSearchPivotRule
+
+
+
+ public:
+
+
+
+ int _init_nb_nodes;
+ ArcsType _init_nb_arcs;
+
+ /// \name Parameters
+ /// The parameters of the algorithm can be specified using these
+ /// functions.
+
+ /// @{
+
+
+ /// \brief Set the costs of the arcs.
+ ///
+ /// This function sets the costs of the arcs.
+ /// If it is not used before calling \ref run(), the costs
+ /// will be set to \c 1 on all arcs.
+ ///
+ /// \param map An arc map storing the costs.
+ /// Its \c Value type must be convertible to the \c Cost type
+ /// of the algorithm.
+ ///
+ /// \return <tt>(*this)</tt>
+ template<typename CostMap>
+ NetworkSimplexSimple& costMap(const CostMap& map) {
+ Arc a; _graph.first(a);
+ for (; a != INVALID; _graph.next(a)) {
+ _cost[getArcID(a)] = map[a];
+ }
+ return *this;
+ }
+
+
+ /// \brief Set the costs of one arc.
+ ///
+ /// This function sets the costs of one arcs.
+ /// Done for memory reasons
+ ///
+ /// \param arc An arc.
+ /// \param arc A cost
+ ///
+ /// \return <tt>(*this)</tt>
+ template<typename Value>
+ NetworkSimplexSimple& setCost(const Arc& arc, const Value cost) {
+ _cost[getArcID(arc)] = cost;
+ return *this;
+ }
+
+
+ /// \brief Set the supply values of the nodes.
+ ///
+ /// This function sets the supply values of the nodes.
+ /// If neither this function nor \ref stSupply() is used before
+ /// calling \ref run(), the supply of each node will be set to zero.
+ ///
+ /// \param map A node map storing the supply values.
+ /// Its \c Value type must be convertible to the \c Value type
+ /// of the algorithm.
+ ///
+ /// \return <tt>(*this)</tt>
+ template<typename SupplyMap>
+ NetworkSimplexSimple& supplyMap(const SupplyMap& map) {
+ Node n; _graph.first(n);
+ for (; n != INVALIDNODE; _graph.next(n)) {
+ _supply[_node_id(n)] = map[n];
+ }
+ return *this;
+ }
+ template<typename SupplyMap>
+ NetworkSimplexSimple& supplyMap(const SupplyMap* map1, int n1, const SupplyMap* map2, int n2) {
+ Node n; _graph.first(n);
+ for (; n != INVALIDNODE; _graph.next(n)) {
+ if (n<n1)
+ _supply[_node_id(n)] = map1[n];
+ else
+ _supply[_node_id(n)] = map2[n - n1];
+ }
+ return *this;
+ }
+ template<typename SupplyMap>
+ NetworkSimplexSimple& supplyMapAll(SupplyMap val1, int n1, SupplyMap val2, int n2) {
+ Node n; _graph.first(n);
+ for (; n != INVALIDNODE; _graph.next(n)) {
+ if (n<n1)
+ _supply[_node_id(n)] = val1;
+ else
+ _supply[_node_id(n)] = val2;
+ }
+ return *this;
+ }
+
+ /// \brief Set single source and target nodes and a supply value.
+ ///
+ /// This function sets a single source node and a single target node
+ /// and the required flow value.
+ /// If neither this function nor \ref supplyMap() is used before
+ /// calling \ref run(), the supply of each node will be set to zero.
+ ///
+ /// Using this function has the same effect as using \ref supplyMap()
+ /// with such a map in which \c k is assigned to \c s, \c -k is
+ /// assigned to \c t and all other nodes have zero supply value.
+ ///
+ /// \param s The source node.
+ /// \param t The target node.
+ /// \param k The required amount of flow from node \c s to node \c t
+ /// (i.e. the supply of \c s and the demand of \c t).
+ ///
+ /// \return <tt>(*this)</tt>
+ NetworkSimplexSimple& stSupply(const Node& s, const Node& t, Value k) {
+ for (int i = 0; i != _node_num; ++i) {
+ _supply[i] = 0;
+ }
+ _supply[_node_id(s)] = k;
+ _supply[_node_id(t)] = -k;
+ return *this;
+ }
+
+ /// \brief Set the type of the supply constraints.
+ ///
+ /// This function sets the type of the supply/demand constraints.
+ /// If it is not used before calling \ref run(), the \ref GEQ supply
+ /// type will be used.
+ ///
+ /// For more information, see \ref SupplyType.
+ ///
+ /// \return <tt>(*this)</tt>
+ NetworkSimplexSimple& supplyType(SupplyType supply_type) {
+ _stype = supply_type;
+ return *this;
+ }
+
+ /// @}
+
+ /// \name Execution Control
+ /// The algorithm can be executed using \ref run().
+
+ /// @{
+
+ /// \brief Run the algorithm.
+ ///
+ /// This function runs the algorithm.
+ /// The paramters can be specified using functions \ref lowerMap(),
+ /// \ref upperMap(), \ref costMap(), \ref supplyMap(), \ref stSupply(),
+ /// \ref supplyType().
+ /// For example,
+ /// \code
+ /// NetworkSimplexSimple<ListDigraph> ns(graph);
+ /// ns.lowerMap(lower).upperMap(upper).costMap(cost)
+ /// .supplyMap(sup).run();
+ /// \endcode
+ ///
+ /// This function can be called more than once. All the given parameters
+ /// are kept for the next call, unless \ref resetParams() or \ref reset()
+ /// is used, thus only the modified parameters have to be set again.
+ /// If the underlying digraph was also modified after the construction
+ /// of the class (or the last \ref reset() call), then the \ref reset()
+ /// function must be called.
+ ///
+ /// \param pivot_rule The pivot rule that will be used during the
+ /// algorithm. For more information, see \ref PivotRule.
+ ///
+ /// \return \c INFEASIBLE if no feasible flow exists,
+ /// \n \c OPTIMAL if the problem has optimal solution
+ /// (i.e. it is feasible and bounded), and the algorithm has found
+ /// optimal flow and node potentials (primal and dual solutions),
+ /// \n \c UNBOUNDED if the objective function of the problem is
+ /// unbounded, i.e. there is a directed cycle having negative total
+ /// cost and infinite upper bound.
+ ///
+ /// \see ProblemType, PivotRule
+ /// \see resetParams(), reset()
+ ProblemType run() {
+#if DEBUG_LVL>0
+ std::cout << "OPTIMAL = " << OPTIMAL << "\nINFEASIBLE = " << INFEASIBLE << "\nUNBOUNDED = " << UNBOUNDED << "\nMAX_ITER_REACHED" << MAX_ITER_REACHED << "\n" ;
+#endif
+ if (!init()) return INFEASIBLE;
+#if DEBUG_LVL>0
+ std::cout << "Init done, starting iterations\n";
+#endif
+
+ return start();
+ }
+
+ /// \brief Reset all the parameters that have been given before.
+ ///
+ /// This function resets all the paramaters that have been given
+ /// before using functions \ref lowerMap(), \ref upperMap(),
+ /// \ref costMap(), \ref supplyMap(), \ref stSupply(), \ref supplyType().
+ ///
+ /// It is useful for multiple \ref run() calls. Basically, all the given
+ /// parameters are kept for the next \ref run() call, unless
+ /// \ref resetParams() or \ref reset() is used.
+ /// If the underlying digraph was also modified after the construction
+ /// of the class or the last \ref reset() call, then the \ref reset()
+ /// function must be used, otherwise \ref resetParams() is sufficient.
+ ///
+ /// For example,
+ /// \code
+ /// NetworkSimplexSimple<ListDigraph> ns(graph);
+ ///
+ /// // First run
+ /// ns.lowerMap(lower).upperMap(upper).costMap(cost)
+ /// .supplyMap(sup).run();
+ ///
+ /// // Run again with modified cost map (resetParams() is not called,
+ /// // so only the cost map have to be set again)
+ /// cost[e] += 100;
+ /// ns.costMap(cost).run();
+ ///
+ /// // Run again from scratch using resetParams()
+ /// // (the lower bounds will be set to zero on all arcs)
+ /// ns.resetParams();
+ /// ns.upperMap(capacity).costMap(cost)
+ /// .supplyMap(sup).run();
+ /// \endcode
+ ///
+ /// \return <tt>(*this)</tt>
+ ///
+ /// \see reset(), run()
+ NetworkSimplexSimple& resetParams() {
+ for (int i = 0; i != _node_num; ++i) {
+ _supply[i] = 0;
+ }
+ for (ArcsType i = 0; i != _arc_num; ++i) {
+ _cost[i] = 1;
+ }
+ _stype = GEQ;
+ return *this;
+ }
+
+
+ /// \brief Reset the internal data structures and all the parameters
+ /// that have been given before.
+ ///
+ /// This function resets the internal data structures and all the
+ /// paramaters that have been given before using functions \ref lowerMap(),
+ /// \ref upperMap(), \ref costMap(), \ref supplyMap(), \ref stSupply(),
+ /// \ref supplyType().
+ ///
+ /// It is useful for multiple \ref run() calls. Basically, all the given
+ /// parameters are kept for the next \ref run() call, unless
+ /// \ref resetParams() or \ref reset() is used.
+ /// If the underlying digraph was also modified after the construction
+ /// of the class or the last \ref reset() call, then the \ref reset()
+ /// function must be used, otherwise \ref resetParams() is sufficient.
+ ///
+ /// See \ref resetParams() for examples.
+ ///
+ /// \return <tt>(*this)</tt>
+ ///
+ /// \see resetParams(), run()
+ NetworkSimplexSimple& reset() {
+ // Resize vectors
+ _node_num = _init_nb_nodes;
+ _arc_num = _init_nb_arcs;
+ int all_node_num = _node_num + 1;
+ ArcsType max_arc_num = _arc_num + 2 * _node_num;
+
+ _source.resize(max_arc_num);
+ _target.resize(max_arc_num);
+
+ _cost.resize(max_arc_num);
+ _supply.resize(all_node_num);
+ _flow.resize(max_arc_num);
+ _pi.resize(all_node_num);
+
+ _parent.resize(all_node_num);
+ _pred.resize(all_node_num);
+ _forward.resize(all_node_num);
+ _thread.resize(all_node_num);
+ _rev_thread.resize(all_node_num);
+ _succ_num.resize(all_node_num);
+ _last_succ.resize(all_node_num);
+ _state.resize(max_arc_num);
+
+
+ //_arc_mixing=false;
+ if (_arc_mixing && _node_num > 1) {
+ // Store the arcs in a mixed order
+ //ArcsType k = std::max(ArcsType(std::sqrt(double(_arc_num))), ArcsType(10));
+ const ArcsType k = std::max(ArcsType(_arc_num / _node_num), ArcsType(3));
+ mixingCoeff = k;
+ subsequence_length = _arc_num / mixingCoeff + 1;
+ num_big_subsequences = _arc_num % mixingCoeff;
+ num_total_big_subsequence_numbers = subsequence_length * num_big_subsequences;
+
+#pragma omp parallel for schedule(static)
+ for (Arc a = 0; a <= _graph.maxArcId(); a++) { // --a <=> _graph.next(a) , -1 == INVALID
+ ArcsType i = sequence(_graph.maxArcId()-a);
+ _source[i] = _node_id(_graph.source(a));
+ _target[i] = _node_id(_graph.target(a));
+ }
+ } else {
+ // Store the arcs in the original order
+ ArcsType i = 0;
+ Arc a; _graph.first(a);
+ for (; a != INVALID; _graph.next(a), ++i) {
+ _source[i] = _node_id(_graph.source(a));
+ _target[i] = _node_id(_graph.target(a));
+ //_arc_id[a] = i;
+ }
+ }
+
+ // Reset parameters
+ resetParams();
+ return *this;
+ }
+
+ /// @}
+
+ /// \name Query Functions
+ /// The results of the algorithm can be obtained using these
+ /// functions.\n
+ /// The \ref run() function must be called before using them.
+
+ /// @{
+
+ /// \brief Return the total cost of the found flow.
+ ///
+ /// This function returns the total cost of the found flow.
+ /// Its complexity is O(e).
+ ///
+ /// \note The return type of the function can be specified as a
+ /// template parameter. For example,
+ /// \code
+ /// ns.totalCost<double>();
+ /// \endcode
+ /// It is useful if the total cost cannot be stored in the \c Cost
+ /// type of the algorithm, which is the default return type of the
+ /// function.
+ ///
+ /// \pre \ref run() must be called before using this function.
+ /*template <typename Number>
+ Number totalCost() const {
+ Number c = 0;
+ for (ArcIt a(_graph); a != INVALID; ++a) {
+ int i = getArcID(a);
+ c += Number(_flow[i]) * Number(_cost[i]);
+ }
+ return c;
+ }*/
+
+ template <typename Number>
+ Number totalCost() const {
+ Number c = 0;
+
+#ifdef SPARSE_FLOW
+ #ifdef HASHMAP
+ typename std::unordered_map<size_t, Value>::const_iterator it;
+ #else
+ typename std::map<size_t, Value>::const_iterator it;
+ #endif
+ for (it = _flow.data.begin(); it!=_flow.data.end(); ++it)
+ c += Number(it->second) * Number(_cost[it->first]);
+ return c;
+#else
+ for (ArcsType i = 0; i<_flow.size(); i++)
+ c += _flow[i] * Number(_cost[i]);
+ return c;
+#endif
+ }
+
+#ifndef DOXYGEN
+ Cost totalCost() const {
+ return totalCost<Cost>();
+ }
+#endif
+
+ /// \brief Return the flow on the given arc.
+ ///
+ /// This function returns the flow on the given arc.
+ ///
+ /// \pre \ref run() must be called before using this function.
+ Value flow(const Arc& a) const {
+ return _flow[getArcID(a)];
+ }
+
+ /// \brief Return the flow map (the primal solution).
+ ///
+ /// This function copies the flow value on each arc into the given
+ /// map. The \c Value type of the algorithm must be convertible to
+ /// the \c Value type of the map.
+ ///
+ /// \pre \ref run() must be called before using this function.
+ template <typename FlowMap>
+ void flowMap(FlowMap &map) const {
+ Arc a; _graph.first(a);
+ for (; a != INVALID; _graph.next(a)) {
+ map.set(a, _flow[getArcID(a)]);
+ }
+ }
+
+ /// \brief Return the potential (dual value) of the given node.
+ ///
+ /// This function returns the potential (dual value) of the
+ /// given node.
+ ///
+ /// \pre \ref run() must be called before using this function.
+ Cost potential(const Node& n) const {
+ return _pi[_node_id(n)];
+ }
+
+ /// \brief Return the potential map (the dual solution).
+ ///
+ /// This function copies the potential (dual value) of each node
+ /// into the given map.
+ /// The \c Cost type of the algorithm must be convertible to the
+ /// \c Value type of the map.
+ ///
+ /// \pre \ref run() must be called before using this function.
+ template <typename PotentialMap>
+ void potentialMap(PotentialMap &map) const {
+ Node n; _graph.first(n);
+ for (; n != INVALID; _graph.next(n)) {
+ map.set(n, _pi[_node_id(n)]);
+ }
+ }
+
+ /// @}
+
+ private:
+
+ // Initialize internal data structures
+ bool init() {
+ if (_node_num == 0) return false;
+
+ // Check the sum of supply values
+ _sum_supply = 0;
+ for (int i = 0; i != _node_num; ++i) {
+ _sum_supply += _supply[i];
+ }
+ /*if (!((_stype == GEQ && _sum_supply <= 0) ||
+ (_stype == LEQ && _sum_supply >= 0))) return false;*/
+
+
+ // Initialize artifical cost
+ Cost ART_COST;
+ if (std::numeric_limits<Cost>::is_exact) {
+ ART_COST = std::numeric_limits<Cost>::max() / 2 + 1;
+ } else {
+ ART_COST = 0;
+ for (ArcsType i = 0; i != _arc_num; ++i) {
+ if (_cost[i] > ART_COST) ART_COST = _cost[i];
+ }
+ ART_COST = (ART_COST + 1) * _node_num;
+ }
+
+ // Initialize arc maps
+ for (ArcsType i = 0; i != _arc_num; ++i) {
+#ifndef SPARSE_FLOW
+ _flow[i] = 0; //by default, the sparse matrix is empty
+#endif
+ _state[i] = STATE_LOWER;
+ }
+#ifdef SPARSE_FLOW
+ _flow = SparseValueVector<Value>();
+#endif
+
+ // Set data for the artificial root node
+ _root = _node_num;
+ _parent[_root] = -1;
+ _pred[_root] = -1;
+ _thread[_root] = 0;
+ _rev_thread[0] = _root;
+ _succ_num[_root] = _node_num + 1;
+ _last_succ[_root] = _root - 1;
+ _supply[_root] = -_sum_supply;
+ _pi[_root] = 0;
+
+ // Add artificial arcs and initialize the spanning tree data structure
+ if (_sum_supply == 0) {
+ // EQ supply constraints
+ _search_arc_num = _arc_num;
+ _all_arc_num = _arc_num + _node_num;
+ for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) {
+ _parent[u] = _root;
+ _pred[u] = e;
+ _thread[u] = u + 1;
+ _rev_thread[u + 1] = u;
+ _succ_num[u] = 1;
+ _last_succ[u] = u;
+ _state[e] = STATE_TREE;
+ if (_supply[u] >= 0) {
+ _forward[u] = true;
+ _pi[u] = 0;
+ _source[e] = u;
+ _target[e] = _root;
+ _flow[e] = _supply[u];
+ _cost[e] = 0;
+ } else {
+ _forward[u] = false;
+ _pi[u] = ART_COST;
+ _source[e] = _root;
+ _target[e] = u;
+ _flow[e] = -_supply[u];
+ _cost[e] = ART_COST;
+ }
+ }
+ } else if (_sum_supply > 0) {
+ // LEQ supply constraints
+ _search_arc_num = _arc_num + _node_num;
+ ArcsType f = _arc_num + _node_num;
+ for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) {
+ _parent[u] = _root;
+ _thread[u] = u + 1;
+ _rev_thread[u + 1] = u;
+ _succ_num[u] = 1;
+ _last_succ[u] = u;
+ if (_supply[u] >= 0) {
+ _forward[u] = true;
+ _pi[u] = 0;
+ _pred[u] = e;
+ _source[e] = u;
+ _target[e] = _root;
+ _flow[e] = _supply[u];
+ _cost[e] = 0;
+ _state[e] = STATE_TREE;
+ } else {
+ _forward[u] = false;
+ _pi[u] = ART_COST;
+ _pred[u] = f;
+ _source[f] = _root;
+ _target[f] = u;
+ _flow[f] = -_supply[u];
+ _cost[f] = ART_COST;
+ _state[f] = STATE_TREE;
+ _source[e] = u;
+ _target[e] = _root;
+ //_flow[e] = 0; //by default, the sparse matrix is empty
+ _cost[e] = 0;
+ _state[e] = STATE_LOWER;
+ ++f;
+ }
+ }
+ _all_arc_num = f;
+ } else {
+ // GEQ supply constraints
+ _search_arc_num = _arc_num + _node_num;
+ ArcsType f = _arc_num + _node_num;
+ for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) {
+ _parent[u] = _root;
+ _thread[u] = u + 1;
+ _rev_thread[u + 1] = u;
+ _succ_num[u] = 1;
+ _last_succ[u] = u;
+ if (_supply[u] <= 0) {
+ _forward[u] = false;
+ _pi[u] = 0;
+ _pred[u] = e;
+ _source[e] = _root;
+ _target[e] = u;
+ _flow[e] = -_supply[u];
+ _cost[e] = 0;
+ _state[e] = STATE_TREE;
+ } else {
+ _forward[u] = true;
+ _pi[u] = -ART_COST;
+ _pred[u] = f;
+ _source[f] = u;
+ _target[f] = _root;
+ _flow[f] = _supply[u];
+ _state[f] = STATE_TREE;
+ _cost[f] = ART_COST;
+ _source[e] = _root;
+ _target[e] = u;
+ //_flow[e] = 0; //by default, the sparse matrix is empty
+ _cost[e] = 0;
+ _state[e] = STATE_LOWER;
+ ++f;
+ }
+ }
+ _all_arc_num = f;
+ }
+
+ return true;
+ }
+
+ // Find the join node
+ void findJoinNode() {
+ int u = _source[in_arc];
+ int v = _target[in_arc];
+ while (u != v) {
+ if (_succ_num[u] < _succ_num[v]) {
+ u = _parent[u];
+ } else {
+ v = _parent[v];
+ }
+ }
+ join = u;
+ }
+
+ // Find the leaving arc of the cycle and returns true if the
+ // leaving arc is not the same as the entering arc
+ bool findLeavingArc() {
+ // Initialize first and second nodes according to the direction
+ // of the cycle
+ if (_state[in_arc] == STATE_LOWER) {
+ first = _source[in_arc];
+ second = _target[in_arc];
+ } else {
+ first = _target[in_arc];
+ second = _source[in_arc];
+ }
+ delta = INF;
+ char result = 0;
+ Value d;
+ ArcsType e;
+
+ // Search the cycle along the path form the first node to the root
+ for (int u = first; u != join; u = _parent[u]) {
+ e = _pred[u];
+ d = _forward[u] ? _flow[e] : INF;
+ if (d < delta) {
+ delta = d;
+ u_out = u;
+ result = 1;
+ }
+ }
+ // Search the cycle along the path form the second node to the root
+ for (int u = second; u != join; u = _parent[u]) {
+ e = _pred[u];
+ d = _forward[u] ? INF : _flow[e];
+ if (d <= delta) {
+ delta = d;
+ u_out = u;
+ result = 2;
+ }
+ }
+
+ if (result == 1) {
+ u_in = first;
+ v_in = second;
+ } else {
+ u_in = second;
+ v_in = first;
+ }
+ return result != 0;
+ }
+
+ // Change _flow and _state vectors
+ void changeFlow(bool change) {
+ // Augment along the cycle
+ if (delta > 0) {
+ Value val = _state[in_arc] * delta;
+ _flow[in_arc] += val;
+ for (int u = _source[in_arc]; u != join; u = _parent[u]) {
+ _flow[_pred[u]] += _forward[u] ? -val : val;
+ }
+ for (int u = _target[in_arc]; u != join; u = _parent[u]) {
+ _flow[_pred[u]] += _forward[u] ? val : -val;
+ }
+ }
+ // Update the state of the entering and leaving arcs
+ if (change) {
+ _state[in_arc] = STATE_TREE;
+ _state[_pred[u_out]] =
+ (_flow[_pred[u_out]] == 0) ? STATE_LOWER : STATE_UPPER;
+ } else {
+ _state[in_arc] = -_state[in_arc];
+ }
+ }
+
+ // Update the tree structure
+ void updateTreeStructure() {
+ int old_rev_thread = _rev_thread[u_out];
+ int old_succ_num = _succ_num[u_out];
+ int old_last_succ = _last_succ[u_out];
+ v_out = _parent[u_out];
+
+ // Check if u_in and u_out coincide
+ if (u_in == u_out) {
+ // Update _parent, _pred, _pred_dir
+ _parent[u_in] = v_in;
+ _pred[u_in] = in_arc;
+ _forward[u_in] = (u_in == _source[in_arc]);
+
+ // Update _thread and _rev_thread
+ if (_thread[v_in] != u_out) {
+ ArcsType after = _thread[old_last_succ];
+ _thread[old_rev_thread] = after;
+ _rev_thread[after] = old_rev_thread;
+ after = _thread[v_in];
+ _thread[v_in] = u_out;
+ _rev_thread[u_out] = v_in;
+ _thread[old_last_succ] = after;
+ _rev_thread[after] = old_last_succ;
+ }
+ } else {
+ // Handle the case when old_rev_thread equals to v_in
+ // (it also means that join and v_out coincide)
+ int thread_continue = old_rev_thread == v_in ?
+ _thread[old_last_succ] : _thread[v_in];
+
+ // Update _thread and _parent along the stem nodes (i.e. the nodes
+ // between u_in and u_out, whose parent have to be changed)
+ int stem = u_in; // the current stem node
+ int par_stem = v_in; // the new parent of stem
+ int next_stem; // the next stem node
+ int last = _last_succ[u_in]; // the last successor of stem
+ int before, after = _thread[last];
+ _thread[v_in] = u_in;
+ _dirty_revs.clear();
+ _dirty_revs.push_back(v_in);
+ while (stem != u_out) {
+ // Insert the next stem node into the thread list
+ next_stem = _parent[stem];
+ _thread[last] = next_stem;
+ _dirty_revs.push_back(last);
+
+ // Remove the subtree of stem from the thread list
+ before = _rev_thread[stem];
+ _thread[before] = after;
+ _rev_thread[after] = before;
+
+ // Change the parent node and shift stem nodes
+ _parent[stem] = par_stem;
+ par_stem = stem;
+ stem = next_stem;
+
+ // Update last and after
+ last = _last_succ[stem] == _last_succ[par_stem] ?
+ _rev_thread[par_stem] : _last_succ[stem];
+ after = _thread[last];
+ }
+ _parent[u_out] = par_stem;
+ _thread[last] = thread_continue;
+ _rev_thread[thread_continue] = last;
+ _last_succ[u_out] = last;
+
+ // Remove the subtree of u_out from the thread list except for
+ // the case when old_rev_thread equals to v_in
+ if (old_rev_thread != v_in) {
+ _thread[old_rev_thread] = after;
+ _rev_thread[after] = old_rev_thread;
+ }
+
+ // Update _rev_thread using the new _thread values
+ for (int i = 0; i != int(_dirty_revs.size()); ++i) {
+ int u = _dirty_revs[i];
+ _rev_thread[_thread[u]] = u;
+ }
+
+ // Update _pred, _pred_dir, _last_succ and _succ_num for the
+ // stem nodes from u_out to u_in
+ int tmp_sc = 0, tmp_ls = _last_succ[u_out];
+ for (int u = u_out, p = _parent[u]; u != u_in; u = p, p = _parent[u]) {
+ _pred[u] = _pred[p];
+ _forward[u] = !_forward[p];
+ tmp_sc += _succ_num[u] - _succ_num[p];
+ _succ_num[u] = tmp_sc;
+ _last_succ[p] = tmp_ls;
+ }
+ _pred[u_in] = in_arc;
+ _forward[u_in] = (u_in == _source[in_arc]);
+ _succ_num[u_in] = old_succ_num;
+ }
+
+ // Update _last_succ from v_in towards the root
+ int up_limit_out = _last_succ[join] == v_in ? join : -1;
+ int last_succ_out = _last_succ[u_out];
+ for (int u = v_in; u != -1 && _last_succ[u] == v_in; u = _parent[u]) {
+ _last_succ[u] = last_succ_out;
+ }
+
+ // Update _last_succ from v_out towards the root
+ if (join != old_rev_thread && v_in != old_rev_thread) {
+ for (int u = v_out; u != up_limit_out && _last_succ[u] == old_last_succ;
+ u = _parent[u]) {
+ _last_succ[u] = old_rev_thread;
+ }
+ } else if (last_succ_out != old_last_succ) {
+ for (int u = v_out; u != up_limit_out && _last_succ[u] == old_last_succ;
+ u = _parent[u]) {
+ _last_succ[u] = last_succ_out;
+ }
+ }
+
+ // Update _succ_num from v_in to join
+ for (int u = v_in; u != join; u = _parent[u]) {
+ _succ_num[u] += old_succ_num;
+ }
+ // Update _succ_num from v_out to join
+ for (int u = v_out; u != join; u = _parent[u]) {
+ _succ_num[u] -= old_succ_num;
+ }
+ }
+
+ void updatePotential() {
+ Cost sigma = _pi[v_in] - _pi[u_in] -
+ ((_forward[u_in])?_cost[in_arc]:(-_cost[in_arc]));
+ int end = _thread[_last_succ[u_in]];
+ for (int u = u_in; u != end; u = _thread[u]) {
+ _pi[u] += sigma;
+ }
+ }
+
+
+ // Heuristic initial pivots
+ bool initialPivots() {
+ Value curr, total = 0;
+ std::vector<Node> supply_nodes, demand_nodes;
+ Node u; _graph.first(u);
+ for (; u != INVALIDNODE; _graph.next(u)) {
+ curr = _supply[_node_id(u)];
+ if (curr > 0) {
+ total += curr;
+ supply_nodes.push_back(u);
+ } else if (curr < 0) {
+ demand_nodes.push_back(u);
+ }
+ }
+ if (_sum_supply > 0) total -= _sum_supply;
+ if (total <= 0) return true;
+
+ ArcVector arc_vector;
+ if (_sum_supply >= 0) {
+ if (supply_nodes.size() == 1 && demand_nodes.size() == 1) {
+ // Perform a reverse graph search from the sink to the source
+ //typename GR::template NodeMap<bool> reached(_graph, false);
+ BoolVector reached(_node_num, false);
+ Node s = supply_nodes[0], t = demand_nodes[0];
+ std::vector<Node> stack;
+ reached[t] = true;
+ stack.push_back(t);
+ while (!stack.empty()) {
+ Node u, v = stack.back();
+ stack.pop_back();
+ if (v == s) break;
+ Arc a; _graph.firstIn(a, v);
+ for (; a != INVALID; _graph.nextIn(a)) {
+ if (reached[u = _graph.source(a)]) continue;
+ ArcsType j = getArcID(a);
+ arc_vector.push_back(j);
+ reached[u] = true;
+ stack.push_back(u);
+ }
+ }
+ } else {
+ arc_vector.resize(demand_nodes.size());
+ // Find the min. cost incomming arc for each demand node
+#pragma omp parallel for
+ for (int i = 0; i < demand_nodes.size(); ++i) {
+ Node v = demand_nodes[i];
+ Cost min_cost = std::numeric_limits<Cost>::max();
+ Arc min_arc = INVALID;
+ Arc a; _graph.firstIn(a, v);
+ for (; a != INVALID; _graph.nextIn(a)) {
+ Cost c = _cost[getArcID(a)];
+ if (c < min_cost) {
+ min_cost = c;
+ min_arc = a;
+ }
+ }
+ arc_vector[i] = getArcID(min_arc);
+ }
+ arc_vector.erase(std::remove(arc_vector.begin(), arc_vector.end(), INVALID), arc_vector.end());
+ }
+ } else {
+ arc_vector.resize(supply_nodes.size());
+ // Find the min. cost outgoing arc for each supply node
+#pragma omp parallel for
+ for (int i = 0; i < int(supply_nodes.size()); ++i) {
+ Node u = supply_nodes[i];
+ Cost min_cost = std::numeric_limits<Cost>::max();
+ Arc min_arc = INVALID;
+ Arc a; _graph.firstOut(a, u);
+ for (; a != INVALID; _graph.nextOut(a)) {
+ Cost c = _cost[getArcID(a)];
+ if (c < min_cost) {
+ min_cost = c;
+ min_arc = a;
+ }
+ }
+ arc_vector[i] = getArcID(min_arc);
+ }
+ arc_vector.erase(std::remove(arc_vector.begin(), arc_vector.end(), INVALID), arc_vector.end());
+ }
+
+ // Perform heuristic initial pivots
+ for (ArcsType i = 0; i != ArcsType(arc_vector.size()); ++i) {
+ in_arc = arc_vector[i];
+ if (_state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -
+ _pi[_target[in_arc]]) >= 0) continue;
+ findJoinNode();
+ bool change = findLeavingArc();
+ if (delta >= MAX) return false;
+ changeFlow(change);
+ if (change) {
+ updateTreeStructure();
+ updatePotential();
+ }
+ }
+ return true;
+ }
+
+ // Execute the algorithm
+ ProblemType start() {
+ return start<BlockSearchPivotRule>();
+ }
+
+ template <typename PivotRuleImpl>
+ ProblemType start() {
+ PivotRuleImpl pivot(*this);
+ ProblemType retVal = OPTIMAL;
+
+ // Perform heuristic initial pivots
+ if (!initialPivots()) return UNBOUNDED;
+
+ size_t iter_number = 0;
+ // Execute the Network Simplex algorithm
+ while (pivot.findEnteringArc()) {
+ if ((++iter_number <= max_iter&&max_iter > 0) || max_iter<=0) {
+#if DEBUG_LVL>0
+ if(iter_number>MAX_DEBUG_ITER)
+ break;
+ if(iter_number%1000==0||iter_number%1000==1){
+ Cost curCost=totalCost();
+ Value sumFlow=0;
+ Cost a;
+ a= (fabs(_pi[_source[in_arc]])>=fabs(_pi[_target[in_arc]])) ? fabs(_pi[_source[in_arc]]) : fabs(_pi[_target[in_arc]]);
+ a=a>=fabs(_cost[in_arc])?a:fabs(_cost[in_arc]);
+ for (int i=0; i<_flow.size(); i++) {
+ sumFlow+=_state[i]*_flow[i];
+ }
+ std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << iter_number << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n";
+ std::cout << "Arc in = (" << _node_id(_source[in_arc]) << ", " << _node_id(_target[in_arc]) <<")\n";
+ std::cout << "Supplies = (" << _supply[_source[in_arc]] << ", " << _supply[_target[in_arc]] << ")\n";
+ std::cout << _cost[in_arc] << "\n";
+ std::cout << _pi[_source[in_arc]] << "\n";
+ std::cout << _pi[_target[in_arc]] << "\n";
+ std::cout << a << "\n";
+ }
+#endif
+
+ findJoinNode();
+ bool change = findLeavingArc();
+ if (delta >= MAX) return UNBOUNDED;
+ changeFlow(change);
+ if (change) {
+ updateTreeStructure();
+ updatePotential();
+ }
+
+#if DEBUG_LVL>0
+ else{
+ std::cout << "No change\n";
+ }
+#endif
+
+#if DEBUG_LVL>1
+ std::cout << "Arc in = (" << _source[in_arc] << ", " << _target[in_arc] << ")\n";
+#endif
+
+
+ } else {
+ char errMess[1000];
+ sprintf( errMess, "RESULT MIGHT BE INACURATE\nMax number of iteration reached, currently \%d. Sometimes iterations go on in cycle even though the solution has been reached, to check if it's the case here have a look at the minimal reduced cost. If it is very close to machine precision, you might actually have the correct solution, if not try setting the maximum number of iterations a bit higher\n",iter_number );
+ std::cerr << errMess;
+ retVal = MAX_ITER_REACHED;
+ break;
+ }
+
+ }
+
+
+
+#if DEBUG_LVL>0
+ Cost curCost=totalCost();
+ Value sumFlow=0;
+ Cost a;
+ a= (fabs(_pi[_source[in_arc]])>=fabs(_pi[_target[in_arc]])) ? fabs(_pi[_source[in_arc]]) : fabs(_pi[_target[in_arc]]);
+ a=a>=fabs(_cost[in_arc])?a:fabs(_cost[in_arc]);
+ for (int i=0; i<_flow.size(); i++) {
+ sumFlow+=_state[i]*_flow[i];
+ }
+
+ std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << niter << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n";
+
+ std::cout << "Arc in = (" << _node_id(_source[in_arc]) << ", " << _node_id(_target[in_arc]) <<")\n";
+ std::cout << "Supplies = (" << _supply[_source[in_arc]] << ", " << _supply[_target[in_arc]] << ")\n";
+
+#endif
+
+
+
+#if DEBUG_LVL>1
+ sumFlow=0;
+ for (int i=0; i<_flow.size(); i++) {
+ sumFlow+=_state[i]*_flow[i];
+ if (_state[i]==STATE_TREE) {
+ std::cout << "Non zero value at (" << _node_num+1-_source[i] << ", " << _node_num+1-_target[i] << ")\n";
+ }
+ }
+ std::cout << "Sum of the flow " << sumFlow << "\n"<< niter <<" iterations, current cost=" << totalCost() << "\n";
+#endif
+
+
+
+ //Check feasibility
+ if(retVal == OPTIMAL){
+ for (ArcsType e = _search_arc_num; e != _all_arc_num; ++e) {
+ if (_flow[e] != 0){
+ if (fabs(_flow[e]) > _EPSILON) // change of the original code following issue #126
+ return INFEASIBLE;
+ else
+ _flow[e]=0;
+ }
+ }
+ }
+
+ // Shift potentials to meet the requirements of the GEQ/LEQ type
+ // optimality conditions
+ if (_sum_supply == 0) {
+ if (_stype == GEQ) {
+ Cost max_pot = -std::numeric_limits<Cost>::max();
+ for (ArcsType i = 0; i != _node_num; ++i) {
+ if (_pi[i] > max_pot) max_pot = _pi[i];
+ }
+ if (max_pot > 0) {
+ for (ArcsType i = 0; i != _node_num; ++i)
+ _pi[i] -= max_pot;
+ }
+ } else {
+ Cost min_pot = std::numeric_limits<Cost>::max();
+ for (ArcsType i = 0; i != _node_num; ++i) {
+ if (_pi[i] < min_pot) min_pot = _pi[i];
+ }
+ if (min_pot < 0) {
+ for (ArcsType i = 0; i != _node_num; ++i)
+ _pi[i] -= min_pot;
+ }
+ }
+ }
+
+ return retVal;
+ }
+
+ }; //class NetworkSimplexSimple
+
+ ///@}
+
+} //namespace lemon_omp
diff --git a/ot/utils.py b/ot/utils.py
index 4dac0c5..6a782e6 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -7,7 +7,6 @@ Various useful functions
#
# License: MIT License
-import multiprocessing
from functools import reduce
import time
@@ -311,38 +310,11 @@ def label_normalization(y, start=0):
return y
-def fun(f, q_in, q_out):
- """ Utility function for parmap with no serializing problems """
- while True:
- i, x = q_in.get()
- if i is None:
- break
- q_out.put((i, f(x)))
-
-
-def parmap(f, X, nprocs=multiprocessing.cpu_count()):
- """ paralell map for multiprocessing (only map on windows)"""
-
- if not sys.platform.endswith('win32') and not sys.platform.endswith('darwin'):
-
- q_in = multiprocessing.Queue(1)
- q_out = multiprocessing.Queue()
-
- proc = [multiprocessing.Process(target=fun, args=(f, q_in, q_out))
- for _ in range(nprocs)]
- for p in proc:
- p.daemon = True
- p.start()
-
- sent = [q_in.put((i, x)) for i, x in enumerate(X)]
- [q_in.put((None, None)) for _ in range(nprocs)]
- res = [q_out.get() for _ in range(len(sent))]
-
- [p.join() for p in proc]
-
- return [x for i, x in sorted(res)]
- else:
- return list(map(f, X))
+def parmap(f, X, nprocs="default"):
+ """ paralell map for multiprocessing.
+ The function has been deprecated and only performs a regular map.
+ """
+ return list(map(f, X))
def check_params(**kwargs):
diff --git a/setup.py b/setup.py
index 37a5824..ef95eaf 100644
--- a/setup.py
+++ b/setup.py
@@ -11,6 +11,8 @@ from setuptools.extension import Extension
import numpy
from Cython.Build import cythonize
+sys.path.append(os.path.join("ot", "helpers"))
+from openmp_helpers import check_openmp_support
# dirty but working
__version__ = re.search(
@@ -30,7 +32,14 @@ if 'clean' in sys.argv[1:]:
os.remove('ot/lp/emd_wrap.cpp')
# add platform dependant optional compilation argument
-compile_args = ["-O3"]
+openmp_supported, flags = check_openmp_support()
+compile_args = ["/O2" if sys.platform == "win32" else "-O3"]
+link_args = []
+
+if openmp_supported:
+ compile_args += flags + ["/DOMP" if sys.platform == 'win32' else "-DOMP"]
+ link_args += flags
+
if sys.platform.startswith('darwin'):
compile_args.append("-stdlib=libc++")
sdk_path = subprocess.check_output(['xcrun', '--show-sdk-path'])
@@ -52,6 +61,7 @@ setup(
language="c++",
include_dirs=[numpy.get_include(), os.path.join(ROOT, 'ot/lp')],
extra_compile_args=compile_args,
+ extra_link_args=link_args
)),
platforms=['linux', 'macosx', 'windows'],
download_url='https://github.com/PythonOT/POT/archive/{}.tar.gz'.format(__version__),
diff --git a/test/test_helpers.py b/test/test_helpers.py
new file mode 100644
index 0000000..8bd0015
--- /dev/null
+++ b/test/test_helpers.py
@@ -0,0 +1,26 @@
+"""Tests for helpers functions """
+
+# Author: Remi Flamary <remi.flamary@polytechnique.edu>
+#
+# License: MIT License
+
+import os
+import sys
+
+sys.path.append(os.path.join("ot", "helpers"))
+
+from openmp_helpers import get_openmp_flag, check_openmp_support # noqa
+from pre_build_helpers import _get_compiler, compile_test_program # noqa
+
+
+def test_helpers():
+
+ compiler = _get_compiler()
+
+ get_openmp_flag(compiler)
+
+ s = '#include <stdio.h>\n#include <stdlib.h>\n\nint main(void) {\n\tprintf("Hello world!\\n");\n\treturn 0;\n}'
+ output, _ = compile_test_program(s)
+ assert len(output) == 1 and output[0] == "Hello world!"
+
+ check_openmp_support()