From 1c7e7ce2da8bb362c184fb6eae71fe7e36356494 Mon Sep 17 00:00:00 2001 From: kguerda-idris <84066930+kguerda-idris@users.noreply.github.com> Date: Wed, 29 Sep 2021 15:29:31 +0200 Subject: [MRG] OpenMP support (#260) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Added : OpenMP support Restored : Epsilon and Debug mode Replaced : parmap => multiprocessing is now replace by multithreading * Commit clean up * Number of CPUs correctly calculated on SLURM clusters * Corrected number of processes for cluster slurm * Mistake corrected * parmap is now deprecated * Now a different solver is used depending on the requested number of threads * Tiny mistake corrected * Folders are now in the ot library instead of at the root * Helpers is now correctly placed * Attempt to make compilation work smoothly * OS compatible path * NumThreads now defaults to 1 * Better flags * Mistake corrected in case of OpenMP unavailability * Revert OpenMP flags modification, which do not compile on Windows * Test helper functions * Helpers comments * Documentation update * File title corrected * Warning no longer using print * Last attempt for macos compilation * pls work * atempt * solving a type error * TypeError OpenMP * Compilation finally working on Windows * Bug solve, number of threads now correctly selected * 64 bits solver to avoid overflows for bigger problems * 64 bits EMD corrected Co-authored-by: kguerda-idris Co-authored-by: ncassereau-idris <84033440+ncassereau-idris@users.noreply.github.com> Co-authored-by: ncassereau Co-authored-by: RĂ©mi Flamary --- ot/helpers/openmp_helpers.py | 85 ++ ot/helpers/pre_build_helpers.py | 87 ++ ot/lp/EMD.h | 5 +- ot/lp/EMD_wrapper.cpp | 124 ++- ot/lp/__init__.py | 71 +- ot/lp/emd_wrap.pyx | 9 +- ot/lp/full_bipartitegraph.h | 27 +- ot/lp/full_bipartitegraph_omp.h | 234 +++++ ot/lp/network_simplex_simple.h | 210 ++--- ot/lp/network_simplex_simple_omp.h | 1699 ++++++++++++++++++++++++++++++++++++ ot/utils.py | 38 +- setup.py | 12 +- test/test_helpers.py | 26 + 13 files changed, 2442 insertions(+), 185 deletions(-) create mode 100644 ot/helpers/openmp_helpers.py create mode 100644 ot/helpers/pre_build_helpers.py create mode 100644 ot/lp/full_bipartitegraph_omp.h create mode 100644 ot/lp/network_simplex_simple_omp.h create mode 100644 test/test_helpers.py diff --git a/ot/helpers/openmp_helpers.py b/ot/helpers/openmp_helpers.py new file mode 100644 index 0000000..a6ad38b --- /dev/null +++ b/ot/helpers/openmp_helpers.py @@ -0,0 +1,85 @@ +"""Helpers for OpenMP support during the build.""" + +# This code is adapted for a large part from the astropy openmp helpers, which +# can be found at: https://github.com/astropy/extension-helpers/blob/master/extension_helpers/_openmp_helpers.py # noqa + + +import os +import sys +import textwrap +import subprocess + +from distutils.errors import CompileError, LinkError + +from pre_build_helpers import compile_test_program + + +def get_openmp_flag(compiler): + """Get openmp flags for a given compiler""" + + if hasattr(compiler, 'compiler'): + compiler = compiler.compiler[0] + else: + compiler = compiler.__class__.__name__ + + if sys.platform == "win32" and ('icc' in compiler or 'icl' in compiler): + omp_flag = ['/Qopenmp'] + elif sys.platform == "win32": + omp_flag = ['/openmp'] + elif sys.platform in ("darwin", "linux") and "icc" in compiler: + omp_flag = ['-qopenmp'] + elif sys.platform == "darwin" and 'openmp' in os.getenv('CPPFLAGS', ''): + omp_flag = [] + else: + # Default flag for GCC and clang: + omp_flag = ['-fopenmp'] + if sys.platform.startswith("darwin"): + omp_flag += ["-Xpreprocessor", "-lomp"] + return omp_flag + + +def check_openmp_support(): + """Check whether OpenMP test code can be compiled and run""" + + code = textwrap.dedent( + """\ + #include + #include + int main(void) { + #pragma omp parallel + printf("nthreads=%d\\n", omp_get_num_threads()); + return 0; + } + """) + + extra_preargs = os.getenv('LDFLAGS', None) + if extra_preargs is not None: + extra_preargs = extra_preargs.strip().split(" ") + extra_preargs = [ + flag for flag in extra_preargs + if flag.startswith(('-L', '-Wl,-rpath', '-l'))] + + extra_postargs = get_openmp_flag + + try: + output, compile_flags = compile_test_program( + code, + extra_preargs=extra_preargs, + extra_postargs=extra_postargs + ) + + if output and 'nthreads=' in output[0]: + nthreads = int(output[0].strip().split('=')[1]) + openmp_supported = len(output) == nthreads + elif "PYTHON_CROSSENV" in os.environ: + # Since we can't run the test program when cross-compiling + # assume that openmp is supported if the program can be + # compiled. + openmp_supported = True + else: + openmp_supported = False + + except (CompileError, LinkError, subprocess.CalledProcessError): + openmp_supported = False + compile_flags = [] + return openmp_supported, compile_flags diff --git a/ot/helpers/pre_build_helpers.py b/ot/helpers/pre_build_helpers.py new file mode 100644 index 0000000..93ecd6a --- /dev/null +++ b/ot/helpers/pre_build_helpers.py @@ -0,0 +1,87 @@ +"""Helpers to check build environment before actual build of POT""" + +import os +import sys +import glob +import tempfile +import setuptools # noqa +import subprocess + +from distutils.dist import Distribution +from distutils.sysconfig import customize_compiler +from numpy.distutils.ccompiler import new_compiler +from numpy.distutils.command.config_compiler import config_cc + + +def _get_compiler(): + """Get a compiler equivalent to the one that will be used to build POT + Handles compiler specified as follows: + - python setup.py build_ext --compiler= + - CC= python setup.py build_ext + """ + dist = Distribution({'script_name': os.path.basename(sys.argv[0]), + 'script_args': sys.argv[1:], + 'cmdclass': {'config_cc': config_cc}}) + + cmd_opts = dist.command_options.get('build_ext') + if cmd_opts is not None and 'compiler' in cmd_opts: + compiler = cmd_opts['compiler'][1] + else: + compiler = None + + ccompiler = new_compiler(compiler=compiler) + customize_compiler(ccompiler) + + return ccompiler + + +def compile_test_program(code, extra_preargs=[], extra_postargs=[]): + """Check that some C code can be compiled and run""" + ccompiler = _get_compiler() + + # extra_(pre/post)args can be a callable to make it possible to get its + # value from the compiler + if callable(extra_preargs): + extra_preargs = extra_preargs(ccompiler) + if callable(extra_postargs): + extra_postargs = extra_postargs(ccompiler) + + start_dir = os.path.abspath('.') + + with tempfile.TemporaryDirectory() as tmp_dir: + try: + os.chdir(tmp_dir) + + # Write test program + with open('test_program.c', 'w') as f: + f.write(code) + + os.mkdir('objects') + + # Compile, test program + ccompiler.compile(['test_program.c'], output_dir='objects', + extra_postargs=extra_postargs) + + # Link test program + objects = glob.glob( + os.path.join('objects', '*' + ccompiler.obj_extension)) + ccompiler.link_executable(objects, 'test_program', + extra_preargs=extra_preargs, + extra_postargs=extra_postargs) + + if "PYTHON_CROSSENV" not in os.environ: + # Run test program if not cross compiling + # will raise a CalledProcessError if return code was non-zero + output = subprocess.check_output('./test_program') + output = output.decode( + sys.stdout.encoding or 'utf-8').splitlines() + else: + # Return an empty output if we are cross compiling + # as we cannot run the test_program + output = [] + except Exception: + raise + finally: + os.chdir(start_dir) + + return output, extra_postargs diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h index c0fe7a3..8a1f9ac 100644 --- a/ot/lp/EMD.h +++ b/ot/lp/EMD.h @@ -18,19 +18,18 @@ #include #include -#include "network_simplex_simple.h" -using namespace lemon; typedef unsigned int node_id_type; enum ProblemType { INFEASIBLE, OPTIMAL, UNBOUNDED, - MAX_ITER_REACHED + MAX_ITER_REACHED }; int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter); +int EMD_wrap_omp(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter, int numThreads); diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp index bc873ed..2bdc172 100644 --- a/ot/lp/EMD_wrapper.cpp +++ b/ot/lp/EMD_wrapper.cpp @@ -12,16 +12,22 @@ * */ + +#include "network_simplex_simple.h" +#include "network_simplex_simple_omp.h" #include "EMD.h" +#include int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, double* alpha, double* beta, double *cost, int maxIter) { - // beware M and C anre strored in row major C style!!! - int n, m, i, cur; + // beware M and C are stored in row major C style!!! + + using namespace lemon; + int n, m, cur; typedef FullBipartiteDigraph Digraph; - DIGRAPH_TYPEDEFS(FullBipartiteDigraph); + DIGRAPH_TYPEDEFS(Digraph); // Get the number of non zero coordinates for r and c n=0; @@ -48,7 +54,7 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, std::vector indI(n), indJ(m); std::vector weights1(n), weights2(m); Digraph di(n, m); - NetworkSimplexSimple net(di, true, n+m, n*m, maxIter); + NetworkSimplexSimple net(di, true, n+m, ((int64_t)n)*((int64_t)m), maxIter); // Set supply and demand, don't account for 0 values (faster) @@ -76,10 +82,12 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G, net.supplyMap(&weights1[0], n, &weights2[0], m); // Set the cost of each edge + int64_t idarc = 0; for (int i=0; i0) { + n++; + }else if(val<0){ + return INFEASIBLE; + } + } + m=0; + for (int i=0; i0) { + m++; + }else if(val<0){ + return INFEASIBLE; + } + } + + // Define the graph + + std::vector indI(n), indJ(m); + std::vector weights1(n), weights2(m); + Digraph di(n, m); + NetworkSimplexSimple net(di, true, n+m, ((int64_t)n)*((int64_t)m), maxIter, numThreads); + + // Set supply and demand, don't account for 0 values (faster) + + cur=0; + for (int i=0; i0) { + weights1[ cur ] = val; + indI[cur++]=i; + } + } + + // Demand is actually negative supply... + + cur=0; + for (int i=0; i0) { + weights2[ cur ] = -val; + indJ[cur++]=i; + } + } + + + net.supplyMap(&weights1[0], n, &weights2[0], m); + + // Set the cost of each edge + int64_t idarc = 0; + for (int i=0; i 1: - res = parmap(f, [b[:, i].copy() for i in range(nb)], processes) - else: - res = list(map(f, [b[:, i].copy() for i in range(nb)])) + warnings.warn( + "The 'processes' parameter has been deprecated. " + "Multiprocessing should be done outside of POT." + ) + res = list(map(f, [b[:, i].copy() for i in range(nb)])) return res def free_support_barycenter(measures_locations, measures_weights, X_init, b=None, weights=None, numItermax=100, - stopThr=1e-7, verbose=False, log=None): + stopThr=1e-7, verbose=False, log=None, numThreads=1): r""" Solves the free support (locations of the barycenters are optimized, not the weights) Wasserstein barycenter problem (i.e. the weighted Frechet mean for the 2-Wasserstein distance), formally: @@ -512,6 +541,10 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None Print information along iterations log : bool, optional record log if True + numThreads: int or "max", optional (default=1, i.e. OpenMP is not used) + If compiled with OpenMP, chooses the number of threads to parallelize. + "max" selects the highest number possible. + Returns ------- @@ -551,7 +584,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None for (measure_locations_i, measure_weights_i, weight_i) in zip(measures_locations, measures_weights, weights.tolist()): M_i = dist(X, measure_locations_i) - T_i = emd(b, measure_weights_i, M_i) + T_i = emd(b, measure_weights_i, M_i, numThreads=numThreads) T_sum = T_sum + weight_i * np.reshape(1. / b, (-1, 1)) * np.matmul(T_i, measure_locations_i) displacement_square_norm = np.sum(np.square(T_sum - X)) diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx index de9a700..42e08f4 100644 --- a/ot/lp/emd_wrap.pyx +++ b/ot/lp/emd_wrap.pyx @@ -20,6 +20,7 @@ import warnings cdef extern from "EMD.h": int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter) nogil + int EMD_wrap_omp(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter, int numThreads) nogil cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED @@ -38,7 +39,7 @@ def check_result(result_code): @cython.boundscheck(False) @cython.wraparound(False) -def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter): +def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"] b, np.ndarray[double, ndim=2, mode="c"] M, int max_iter, int numThreads): """ Solves the Earth Movers distance problem and returns the optimal transport matrix @@ -109,8 +110,10 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod # calling the function with nogil: - result_code = EMD_wrap(n1, n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, max_iter) - + if numThreads == 1: + result_code = EMD_wrap(n1, n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, max_iter) + else: + result_code = EMD_wrap_omp(n1, n2, a.data, b.data, M.data, G.data, alpha.data, beta.data, &cost, max_iter, numThreads) return G, cost, alpha, beta, result_code diff --git a/ot/lp/full_bipartitegraph.h b/ot/lp/full_bipartitegraph.h index 87a1bec..713ccb5 100644 --- a/ot/lp/full_bipartitegraph.h +++ b/ot/lp/full_bipartitegraph.h @@ -23,10 +23,10 @@ * */ -#ifndef LEMON_FULL_BIPARTITE_GRAPH_H -#define LEMON_FULL_BIPARTITE_GRAPH_H +#pragma once #include "core.h" +#include ///\ingroup graphs ///\file @@ -44,16 +44,16 @@ namespace lemon { //class Node; typedef int Node; //class Arc; - typedef long long Arc; + typedef int64_t Arc; protected: int _node_num; - long long _arc_num; + int64_t _arc_num; FullBipartiteDigraphBase() {} - void construct(int n1, int n2) { _node_num = n1+n2; _arc_num = n1 * n2; _n1=n1; _n2=n2;} + void construct(int n1, int n2) { _node_num = n1+n2; _arc_num = (int64_t)n1 * (int64_t)n2; _n1=n1; _n2=n2;} public: @@ -65,25 +65,25 @@ namespace lemon { Arc arc(const Node& s, const Node& t) const { if (s<_n1 && t>=_n1) - return Arc(s * _n2 + (t-_n1) ); + return Arc((int64_t)s * (int64_t)_n2 + (int64_t)(t-_n1) ); else return Arc(-1); } int nodeNum() const { return _node_num; } - long long arcNum() const { return _arc_num; } + int64_t arcNum() const { return _arc_num; } int maxNodeId() const { return _node_num - 1; } - long long maxArcId() const { return _arc_num - 1; } + int64_t maxArcId() const { return _arc_num - 1; } Node source(Arc arc) const { return arc / _n2; } Node target(Arc arc) const { return (arc % _n2) + _n1; } static int id(Node node) { return node; } - static long long id(Arc arc) { return arc; } + static int64_t id(Arc arc) { return arc; } static Node nodeFromId(int id) { return Node(id);} - static Arc arcFromId(int id) { return Arc(id);} + static Arc arcFromId(int64_t id) { return Arc(id);} Arc findArc(Node s, Node t, Arc prev = -1) const { @@ -136,7 +136,7 @@ namespace lemon { /// /// \brief A directed full graph class. /// - /// FullBipartiteDigraph is a simple and fast implmenetation of directed full + /// FullBipartiteDigraph is a simple and fast implementation of directed full /// (complete) graphs. It contains an arc from each node to each node /// (including a loop for each node), therefore the number of arcs /// is the square of the number of nodes. @@ -203,13 +203,10 @@ namespace lemon { /// \brief Number of nodes. int nodeNum() const { return Parent::nodeNum(); } /// \brief Number of arcs. - long long arcNum() const { return Parent::arcNum(); } + int64_t arcNum() const { return Parent::arcNum(); } }; } //namespace lemon - - -#endif //LEMON_FULL_GRAPH_H diff --git a/ot/lp/full_bipartitegraph_omp.h b/ot/lp/full_bipartitegraph_omp.h new file mode 100644 index 0000000..8cbed0b --- /dev/null +++ b/ot/lp/full_bipartitegraph_omp.h @@ -0,0 +1,234 @@ +/* -*- mode: C++; indent-tabs-mode: nil; -*- + * + * This file has been adapted by Nicolas Bonneel (2013), + * from full_graph.h from LEMON, a generic C++ optimization library, + * to implement a lightweight fully connected bipartite graph. A previous + * version of this file is used as part of the Displacement Interpolation + * project, + * Web: http://www.cs.ubc.ca/labs/imager/tr/2011/DisplacementInterpolation/ + * + * + **** Original file Copyright Notice : + * Copyright (C) 2003-2010 + * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport + * (Egervary Research Group on Combinatorial Optimization, EGRES). + * + * Permission to use, modify and distribute this software is granted + * provided that this copyright notice appears in all copies. For + * precise terms see the accompanying LICENSE file. + * + * This software is provided "AS IS" with no warranty of any kind, + * express or implied, and with no claim as to its suitability for any + * purpose. + * + */ + +#pragma once + +#include + +///\ingroup graphs +///\file +///\brief FullBipartiteDigraph and FullBipartiteGraph classes. + + +namespace lemon_omp { + + ///This \c \#define creates convenient type definitions for the following + ///types of \c Digraph: \c Node, \c NodeIt, \c Arc, \c ArcIt, \c InArcIt, + ///\c OutArcIt, \c BoolNodeMap, \c IntNodeMap, \c DoubleNodeMap, + ///\c BoolArcMap, \c IntArcMap, \c DoubleArcMap. + /// + ///\note If the graph type is a dependent type, ie. the graph type depend + ///on a template parameter, then use \c TEMPLATE_DIGRAPH_TYPEDEFS() + ///macro. +#define DIGRAPH_TYPEDEFS(Digraph) \ + typedef Digraph::Node Node; \ + typedef Digraph::Arc Arc; \ + + + ///Create convenience typedefs for the digraph types and iterators + + ///\see DIGRAPH_TYPEDEFS + /// + ///\note Use this macro, if the graph type is a dependent type, + ///ie. the graph type depend on a template parameter. +#define TEMPLATE_DIGRAPH_TYPEDEFS(Digraph) \ + typedef typename Digraph::Node Node; \ + typedef typename Digraph::Arc Arc; \ + + + class FullBipartiteDigraphBase { + public: + + typedef FullBipartiteDigraphBase Digraph; + + //class Node; + typedef int Node; + //class Arc; + typedef int64_t Arc; + + protected: + + int _node_num; + int64_t _arc_num; + + FullBipartiteDigraphBase() {} + + void construct(int n1, int n2) { _node_num = n1+n2; _arc_num = (int64_t)n1 * (int64_t)n2; _n1=n1; _n2=n2;} + + public: + + int _n1, _n2; + + + Node operator()(int ix) const { return Node(ix); } + static int index(const Node& node) { return node; } + + Arc arc(const Node& s, const Node& t) const { + if (s<_n1 && t>=_n1) + return Arc((int64_t)s * (int64_t)_n2 + (int64_t)(t-_n1) ); + else + return Arc(-1); + } + + int nodeNum() const { return _node_num; } + int64_t arcNum() const { return _arc_num; } + + int maxNodeId() const { return _node_num - 1; } + int64_t maxArcId() const { return _arc_num - 1; } + + Node source(Arc arc) const { return arc / _n2; } + Node target(Arc arc) const { return (arc % _n2) + _n1; } + + static int id(Node node) { return node; } + static int64_t id(Arc arc) { return arc; } + + static Node nodeFromId(int id) { return Node(id);} + static Arc arcFromId(int64_t id) { return Arc(id);} + + + Arc findArc(Node s, Node t, Arc prev = -1) const { + return prev == -1 ? arc(s, t) : -1; + } + + void first(Node& node) const { + node = _node_num - 1; + } + + static void next(Node& node) { + --node; + } + + void first(Arc& arc) const { + arc = _arc_num - 1; + } + + static void next(Arc& arc) { + --arc; + } + + void firstOut(Arc& arc, const Node& node) const { + if (node>=_n1) + arc = -1; + else + arc = (node + 1) * _n2 - 1; + } + + void nextOut(Arc& arc) const { + if (arc % _n2 == 0) arc = 0; + --arc; + } + + void firstIn(Arc& arc, const Node& node) const { + if (node<_n1) + arc = -1; + else + arc = _arc_num + node - _node_num; + } + + void nextIn(Arc& arc) const { + arc -= _n2; + if (arc < 0) arc = -1; + } + + }; + + /// \ingroup graphs + /// + /// \brief A directed full graph class. + /// + /// FullBipartiteDigraph is a simple and fast implmenetation of directed full + /// (complete) graphs. It contains an arc from each node to each node + /// (including a loop for each node), therefore the number of arcs + /// is the square of the number of nodes. + /// This class is completely static and it needs constant memory space. + /// Thus you can neither add nor delete nodes or arcs, however + /// the structure can be resized using resize(). + /// + /// This type fully conforms to the \ref concepts::Digraph "Digraph concept". + /// Most of its member functions and nested classes are documented + /// only in the concept class. + /// + /// This class provides constant time counting for nodes and arcs. + /// + /// \note FullBipartiteDigraph and FullBipartiteGraph classes are very similar, + /// but there are two differences. While this class conforms only + /// to the \ref concepts::Digraph "Digraph" concept, FullBipartiteGraph + /// conforms to the \ref concepts::Graph "Graph" concept, + /// moreover FullBipartiteGraph does not contain a loop for each + /// node as this class does. + /// + /// \sa FullBipartiteGraph + class FullBipartiteDigraph : public FullBipartiteDigraphBase { + typedef FullBipartiteDigraphBase Parent; + + public: + + /// \brief Default constructor. + /// + /// Default constructor. The number of nodes and arcs will be zero. + FullBipartiteDigraph() { construct(0,0); } + + /// \brief Constructor + /// + /// Constructor. + /// \param n The number of the nodes. + FullBipartiteDigraph(int n1, int n2) { construct(n1, n2); } + + + /// \brief Returns the node with the given index. + /// + /// Returns the node with the given index. Since this structure is + /// completely static, the nodes can be indexed with integers from + /// the range [0..nodeNum()-1]. + /// The index of a node is the same as its ID. + /// \sa index() + Node operator()(int ix) const { return Parent::operator()(ix); } + + /// \brief Returns the index of the given node. + /// + /// Returns the index of the given node. Since this structure is + /// completely static, the nodes can be indexed with integers from + /// the range [0..nodeNum()-1]. + /// The index of a node is the same as its ID. + /// \sa operator()() + static int index(const Node& node) { return Parent::index(node); } + + /// \brief Returns the arc connecting the given nodes. + /// + /// Returns the arc connecting the given nodes. + /*Arc arc(Node u, Node v) const { + return Parent::arc(u, v); + }*/ + + /// \brief Number of nodes. + int nodeNum() const { return Parent::nodeNum(); } + /// \brief Number of arcs. + int64_t arcNum() const { return Parent::arcNum(); } + }; + + + + +} //namespace lemon_omp diff --git a/ot/lp/network_simplex_simple.h b/ot/lp/network_simplex_simple.h index 630b595..3b46b9b 100644 --- a/ot/lp/network_simplex_simple.h +++ b/ot/lp/network_simplex_simple.h @@ -25,15 +25,17 @@ * */ -#ifndef LEMON_NETWORK_SIMPLEX_SIMPLE_H -#define LEMON_NETWORK_SIMPLEX_SIMPLE_H +#pragma once +#undef DEBUG_LVL #define DEBUG_LVL 0 #if DEBUG_LVL>0 #include #endif - +#undef EPSILON +#undef _EPSILON +#undef MAX_DEBUG_ITER #define EPSILON 2.2204460492503131e-15 #define _EPSILON 1e-8 #define MAX_DEBUG_ITER 100000 @@ -50,6 +52,7 @@ #include #include #include +#include #include #ifdef HASHMAP #include @@ -63,6 +66,8 @@ //#include "sparse_array_n.h" #include "full_bipartitegraph.h" +#undef INVALIDNODE +#undef INVALID #define INVALIDNODE -1 #define INVALID (-1) @@ -76,16 +81,16 @@ namespace lemon { class SparseValueVector { public: - SparseValueVector(int n=0) + SparseValueVector(size_t n=0) { } - void resize(int n=0){}; - T operator[](const int id) const + void resize(size_t n=0){}; + T operator[](const size_t id) const { #ifdef HASHMAP - typename stdext::hash_map::const_iterator it = data.find(id); + typename stdext::hash_map::const_iterator it = data.find(id); #else - typename std::map::const_iterator it = data.find(id); + typename std::map::const_iterator it = data.find(id); #endif if (it==data.end()) return 0; @@ -93,16 +98,16 @@ namespace lemon { return it->second; } - ProxyObject operator[](const int id) + ProxyObject operator[](const size_t id) { return ProxyObject( this, id ); } //private: #ifdef HASHMAP - stdext::hash_map data; + stdext::hash_map data; #else - std::map data; + std::map data; #endif }; @@ -110,7 +115,7 @@ namespace lemon { template class ProxyObject { public: - ProxyObject( SparseValueVector *v, int idx ){_v=v; _idx=idx;}; + ProxyObject( SparseValueVector *v, size_t idx ){_v=v; _idx=idx;}; ProxyObject & operator=( const T &v ) { // If we get here, we know that operator[] was called to perform a write access, // so we can insert an item in the vector if needed @@ -123,9 +128,9 @@ namespace lemon { // If we get here, we know that operator[] was called to perform a read access, // so we can simply return the existing object #ifdef HASHMAP - typename stdext::hash_map::iterator it = _v->data.find(_idx); + typename stdext::hash_map::iterator it = _v->data.find(_idx); #else - typename std::map::iterator it = _v->data.find(_idx); + typename std::map::iterator it = _v->data.find(_idx); #endif if (it==_v->data.end()) return 0; @@ -137,9 +142,9 @@ namespace lemon { { if (val==0) return; #ifdef HASHMAP - typename stdext::hash_map::iterator it = _v->data.find(_idx); + typename stdext::hash_map::iterator it = _v->data.find(_idx); #else - typename std::map::iterator it = _v->data.find(_idx); + typename std::map::iterator it = _v->data.find(_idx); #endif if (it==_v->data.end()) _v->data[_idx] = val; @@ -156,9 +161,9 @@ namespace lemon { { if (val==0) return; #ifdef HASHMAP - typename stdext::hash_map::iterator it = _v->data.find(_idx); + typename stdext::hash_map::iterator it = _v->data.find(_idx); #else - typename std::map::iterator it = _v->data.find(_idx); + typename std::map::iterator it = _v->data.find(_idx); #endif if (it==_v->data.end()) _v->data[_idx] = -val; @@ -173,7 +178,7 @@ namespace lemon { } SparseValueVector *_v; - int _idx; + size_t _idx; }; @@ -204,7 +209,7 @@ namespace lemon { /// /// \tparam GR The digraph type the algorithm runs on. /// \tparam V The number type used for flow amounts, capacity bounds - /// and supply values in the algorithm. By default, it is \c int. + /// and supply values in the algorithm. By default, it is \c int64_t. /// \tparam C The number type used for costs and potentials in the /// algorithm. By default, it is the same as \c V. /// @@ -214,7 +219,7 @@ namespace lemon { /// \note %NetworkSimplexSimple provides five different pivot rule /// implementations, from which the most efficient one is used /// by default. For more information, see \ref PivotRule. - template + template class NetworkSimplexSimple { public: @@ -228,7 +233,7 @@ namespace lemon { /// mixed order in the internal data structure. /// In special cases, it could lead to better overall performance, /// but it is usually slower. Therefore it is disabled by default. - NetworkSimplexSimple(const GR& graph, bool arc_mixing, int nbnodes, long long nb_arcs,int maxiters) : + NetworkSimplexSimple(const GR& graph, bool arc_mixing, int nbnodes, ArcsType nb_arcs, size_t maxiters) : _graph(graph), //_arc_id(graph), _arc_mixing(arc_mixing), _init_nb_nodes(nbnodes), _init_nb_arcs(nb_arcs), MAX(std::numeric_limits::max()), @@ -288,11 +293,11 @@ namespace lemon { private: - int max_iter; + size_t max_iter; TEMPLATE_DIGRAPH_TYPEDEFS(GR); typedef std::vector IntVector; - typedef std::vector UHalfIntVector; + typedef std::vector ArcVector; typedef std::vector ValueVector; typedef std::vector CostVector; // typedef SparseValueVector CostVector; @@ -315,9 +320,9 @@ namespace lemon { // Data related to the underlying digraph const GR &_graph; int _node_num; - int _arc_num; - int _all_arc_num; - int _search_arc_num; + ArcsType _arc_num; + ArcsType _all_arc_num; + ArcsType _search_arc_num; // Parameters of the problem SupplyType _stype; @@ -325,9 +330,9 @@ namespace lemon { inline int _node_id(int n) const {return _node_num-n-1;} ; - //IntArcMap _arc_id; - UHalfIntVector _source; - UHalfIntVector _target; +// IntArcMap _arc_id; + IntVector _source; // keep nodes as integers + IntVector _target; bool _arc_mixing; public: // Node and arc data @@ -341,7 +346,7 @@ namespace lemon { private: // Data for storing the spanning tree structure IntVector _parent; - IntVector _pred; + ArcVector _pred; IntVector _thread; IntVector _rev_thread; IntVector _succ_num; @@ -349,17 +354,17 @@ namespace lemon { IntVector _dirty_revs; BoolVector _forward; StateVector _state; - int _root; + ArcsType _root; // Temporary data used in the current pivot iteration - int in_arc, join, u_in, v_in, u_out, v_out; - int first, second, right, last; - int stem, par_stem, new_stem; + ArcsType in_arc, join, u_in, v_in, u_out, v_out; + ArcsType first, second, right, last; + ArcsType stem, par_stem, new_stem; Value delta; const Value MAX; - int mixingCoeff; + ArcsType mixingCoeff; public: @@ -373,27 +378,27 @@ namespace lemon { private: // thank you to DVK and MizardX from StackOverflow for this function! - inline int sequence(int k) const { - int smallv = (k > num_total_big_subsequence_numbers) & 1; + inline ArcsType sequence(ArcsType k) const { + ArcsType smallv = (k > num_total_big_subsequence_numbers) & 1; k -= num_total_big_subsequence_numbers * smallv; - int subsequence_length2 = subsequence_length- smallv; - int subsequence_num = (k / subsequence_length2) + num_big_subseqiences * smallv; - int subsequence_offset = (k % subsequence_length2) * mixingCoeff; + ArcsType subsequence_length2 = subsequence_length- smallv; + ArcsType subsequence_num = (k / subsequence_length2) + num_big_subseqiences * smallv; + ArcsType subsequence_offset = (k % subsequence_length2) * mixingCoeff; return subsequence_offset + subsequence_num; } - int subsequence_length; - int num_big_subseqiences; - int num_total_big_subsequence_numbers; + ArcsType subsequence_length; + ArcsType num_big_subseqiences; + ArcsType num_total_big_subsequence_numbers; - inline int getArcID(const Arc &arc) const + inline ArcsType getArcID(const Arc &arc) const { //int n = _arc_num-arc._id-1; - int n = _arc_num-GR::id(arc)-1; + ArcsType n = _arc_num-GR::id(arc)-1; - //int a = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff; - //int b = _arc_id[arc]; + //ArcsType a = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff; + //ArcsType b = _arc_id[arc]; if (_arc_mixing) return sequence(n); else @@ -401,16 +406,16 @@ namespace lemon { } // finally unused because too slow - inline int getSource(const int arc) const + inline ArcsType getSource(const ArcsType arc) const { - //int a = _source[arc]; + //ArcsType a = _source[arc]; //return a; - int n = _arc_num-arc-1; + ArcsType n = _arc_num-arc-1; if (_arc_mixing) n = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff; - int b; + ArcsType b; if (n>=0) b = _node_id(_graph.source(GR::arcFromId( n ) )); else @@ -436,17 +441,17 @@ namespace lemon { private: // References to the NetworkSimplexSimple class - const UHalfIntVector &_source; - const UHalfIntVector &_target; + const IntVector &_source; + const IntVector &_target; const CostVector &_cost; const StateVector &_state; const CostVector &_pi; - int &_in_arc; - int _search_arc_num; + ArcsType &_in_arc; + ArcsType _search_arc_num; // Pivot rule data - int _block_size; - int _next_arc; + ArcsType _block_size; + ArcsType _next_arc; NetworkSimplexSimple &_ns; public: @@ -460,17 +465,16 @@ namespace lemon { { // The main parameters of the pivot rule const double BLOCK_SIZE_FACTOR = 1.0; - const int MIN_BLOCK_SIZE = 10; + const ArcsType MIN_BLOCK_SIZE = 10; - _block_size = std::max( int(BLOCK_SIZE_FACTOR * - std::sqrt(double(_search_arc_num))), - MIN_BLOCK_SIZE ); + _block_size = std::max(ArcsType(BLOCK_SIZE_FACTOR * std::sqrt(double(_search_arc_num))), MIN_BLOCK_SIZE); } + // Find next entering arc bool findEnteringArc() { Cost c, min = 0; - int e; - int cnt = _block_size; + ArcsType e; + ArcsType cnt = _block_size; double a; for (e = _next_arc; e != _search_arc_num; ++e) { c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); @@ -516,7 +520,7 @@ namespace lemon { int _init_nb_nodes; - long long _init_nb_arcs; + ArcsType _init_nb_arcs; /// \name Parameters /// The parameters of the algorithm can be specified using these @@ -736,7 +740,7 @@ namespace lemon { for (int i = 0; i != _node_num; ++i) { _supply[i] = 0; } - for (int i = 0; i != _arc_num; ++i) { + for (ArcsType i = 0; i != _arc_num; ++i) { _cost[i] = 1; } _stype = GEQ; @@ -745,7 +749,7 @@ namespace lemon { - int divid (int x, int y) + int64_t divid (int64_t x, int64_t y) { return (x-x%y)/y; } @@ -775,7 +779,7 @@ namespace lemon { _node_num = _init_nb_nodes; _arc_num = _init_nb_arcs; int all_node_num = _node_num + 1; - int max_arc_num = _arc_num + 2 * _node_num; + ArcsType max_arc_num = _arc_num + 2 * _node_num; _source.resize(max_arc_num); _target.resize(max_arc_num); @@ -798,13 +802,13 @@ namespace lemon { //_arc_mixing=false; if (_arc_mixing) { // Store the arcs in a mixed order - int k = std::max(int(std::sqrt(double(_arc_num))), 10); + const ArcsType k = std::max(ArcsType(std::sqrt(double(_arc_num))), ArcsType(10)); mixingCoeff = k; subsequence_length = _arc_num / mixingCoeff + 1; num_big_subseqiences = _arc_num % mixingCoeff; num_total_big_subsequence_numbers = subsequence_length * num_big_subseqiences; - int i = 0, j = 0; + ArcsType i = 0, j = 0; Arc a; _graph.first(a); for (; a != INVALID; _graph.next(a)) { _source[i] = _node_id(_graph.source(a)); @@ -814,7 +818,7 @@ namespace lemon { } } else { // Store the arcs in the original order - int i = 0; + ArcsType i = 0; Arc a; _graph.first(a); for (; a != INVALID; _graph.next(a), ++i) { _source[i] = _node_id(_graph.source(a)); @@ -856,7 +860,7 @@ namespace lemon { Number totalCost() const { Number c = 0; for (ArcIt a(_graph); a != INVALID; ++a) { - int i = getArcID(a); + int64_t i = getArcID(a); c += Number(_flow[i]) * Number(_cost[i]); } return c; @@ -867,15 +871,15 @@ namespace lemon { Number c = 0; /*#ifdef HASHMAP - typename stdext::hash_map::const_iterator it; + typename stdext::hash_map::const_iterator it; #else - typename std::map::const_iterator it; + typename std::map::const_iterator it; #endif for (it = _flow.data.begin(); it!=_flow.data.end(); ++it) c += Number(it->second) * Number(_cost[it->first]); return c;*/ - for (unsigned long i=0; i<_flow.size(); i++) + for (ArcsType i=0; i<_flow.size(); i++) c += _flow[i] * Number(_cost[i]); return c; @@ -944,14 +948,14 @@ namespace lemon { // Initialize internal data structures bool init() { if (_node_num == 0) return false; - + // Check the sum of supply values _sum_supply = 0; for (int i = 0; i != _node_num; ++i) { _sum_supply += _supply[i]; } if ( fabs(_sum_supply) > _EPSILON ) return false; - + _sum_supply = 0; // Initialize artifical cost @@ -960,14 +964,14 @@ namespace lemon { ART_COST = std::numeric_limits::max() / 2 + 1; } else { ART_COST = 0; - for (int i = 0; i != _arc_num; ++i) { + for (ArcsType i = 0; i != _arc_num; ++i) { if (_cost[i] > ART_COST) ART_COST = _cost[i]; } ART_COST = (ART_COST + 1) * _node_num; } // Initialize arc maps - for (int i = 0; i != _arc_num; ++i) { + for (ArcsType i = 0; i != _arc_num; ++i) { //_flow[i] = 0; //by default, the sparse matrix is empty _state[i] = STATE_LOWER; } @@ -988,7 +992,7 @@ namespace lemon { // EQ supply constraints _search_arc_num = _arc_num; _all_arc_num = _arc_num + _node_num; - for (int u = 0, e = _arc_num; u != _node_num; ++u, ++e) { + for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) { _parent[u] = _root; _pred[u] = e; _thread[u] = u + 1; @@ -1016,8 +1020,8 @@ namespace lemon { else if (_sum_supply > 0) { // LEQ supply constraints _search_arc_num = _arc_num + _node_num; - int f = _arc_num + _node_num; - for (int u = 0, e = _arc_num; u != _node_num; ++u, ++e) { + ArcsType f = _arc_num + _node_num; + for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) { _parent[u] = _root; _thread[u] = u + 1; _rev_thread[u + 1] = u; @@ -1054,8 +1058,8 @@ namespace lemon { else { // GEQ supply constraints _search_arc_num = _arc_num + _node_num; - int f = _arc_num + _node_num; - for (int u = 0, e = _arc_num; u != _node_num; ++u, ++e) { + ArcsType f = _arc_num + _node_num; + for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) { _parent[u] = _root; _thread[u] = u + 1; _rev_thread[u + 1] = u; @@ -1120,9 +1124,9 @@ namespace lemon { second = _source[in_arc]; } delta = INF; - int result = 0; + char result = 0; Value d; - int e; + ArcsType e; // Search the cycle along the path form the first node to the root for (int u = first; u != join; u = _parent[u]) { @@ -1239,7 +1243,7 @@ namespace lemon { // Update _rev_thread using the new _thread values for (int i = 0; i != int(_dirty_revs.size()); ++i) { - u = _dirty_revs[i]; + int u = _dirty_revs[i]; _rev_thread[_thread[u]] = u; } @@ -1257,7 +1261,7 @@ namespace lemon { u = w; } _pred[u_in] = in_arc; - _forward[u_in] = ((unsigned int)u_in == _source[in_arc]); + _forward[u_in] = (u_in == _source[in_arc]); _succ_num[u_in] = old_succ_num; // Set limits for updating _last_succ form v_in and v_out @@ -1328,7 +1332,7 @@ namespace lemon { if (_sum_supply > 0) total -= _sum_supply; if (total <= 0) return true; - IntVector arc_vector; + ArcVector arc_vector; if (_sum_supply >= 0) { if (supply_nodes.size() == 1 && demand_nodes.size() == 1) { // Perform a reverse graph search from the sink to the source @@ -1345,7 +1349,7 @@ namespace lemon { Arc a; _graph.firstIn(a, v); for (; a != INVALID; _graph.nextIn(a)) { if (reached[u = _graph.source(a)]) continue; - int j = getArcID(a); + ArcsType j = getArcID(a); if (INF >= total) { arc_vector.push_back(j); reached[u] = true; @@ -1355,7 +1359,7 @@ namespace lemon { } } else { // Find the min. cost incomming arc for each demand node - for (int i = 0; i != int(demand_nodes.size()); ++i) { + for (int i = 0; i != demand_nodes.size(); ++i) { Node v = demand_nodes[i]; Cost c, min_cost = std::numeric_limits::max(); Arc min_arc = INVALID; @@ -1393,7 +1397,7 @@ namespace lemon { } // Perform heuristic initial pivots - for (int i = 0; i != int(arc_vector.size()); ++i) { + for (ArcsType i = 0; i != arc_vector.size(); ++i) { in_arc = arc_vector[i]; // l'erreur est probablement ici... if (_state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] - @@ -1423,7 +1427,7 @@ namespace lemon { // Perform heuristic initial pivots if (!initialPivots()) return UNBOUNDED; - int iter_number=0; + size_t iter_number=0; //pivot.setDantzig(true); // Execute the Network Simplex algorithm while (pivot.findEnteringArc()) { @@ -1443,7 +1447,7 @@ namespace lemon { double a; a= (fabs(_pi[_source[in_arc]])>=fabs(_pi[_target[in_arc]])) ? fabs(_pi[_source[in_arc]]) : fabs(_pi[_target[in_arc]]); a=a>=fabs(_cost[in_arc])?a:fabs(_cost[in_arc]); - for (int i=0; i<_flow.size(); i++) { + for (int64_t i=0; i<_flow.size(); i++) { sumFlow+=_state[i]*_flow[i]; } std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << iter_number << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n"; @@ -1482,12 +1486,12 @@ namespace lemon { double a; a= (fabs(_pi[_source[in_arc]])>=fabs(_pi[_target[in_arc]])) ? fabs(_pi[_source[in_arc]]) : fabs(_pi[_target[in_arc]]); a=a>=fabs(_cost[in_arc])?a:fabs(_cost[in_arc]); - for (int i=0; i<_flow.size(); i++) { + for (int64_t i=0; i<_flow.size(); i++) { sumFlow+=_state[i]*_flow[i]; } - + std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << niter << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n"; - + std::cout << "Arc in = (" << _node_id(_source[in_arc]) << ", " << _node_id(_target[in_arc]) <<")\n"; std::cout << "Supplies = (" << _supply[_source[in_arc]] << ", " << _supply[_target[in_arc]] << ")\n"; @@ -1505,7 +1509,7 @@ namespace lemon { #endif // Check feasibility if( retVal == OPTIMAL){ - for (int e = _search_arc_num; e != _all_arc_num; ++e) { + for (ArcsType e = _search_arc_num; e != _all_arc_num; ++e) { if (_flow[e] != 0){ if (fabs(_flow[e]) > _EPSILON) // change of the original code following issue #126 return INFEASIBLE; @@ -1521,20 +1525,20 @@ namespace lemon { if (_sum_supply == 0) { if (_stype == GEQ) { Cost max_pot = -std::numeric_limits::max(); - for (int i = 0; i != _node_num; ++i) { + for (ArcsType i = 0; i != _node_num; ++i) { if (_pi[i] > max_pot) max_pot = _pi[i]; } if (max_pot > 0) { - for (int i = 0; i != _node_num; ++i) + for (ArcsType i = 0; i != _node_num; ++i) _pi[i] -= max_pot; } } else { Cost min_pot = std::numeric_limits::max(); - for (int i = 0; i != _node_num; ++i) { + for (ArcsType i = 0; i != _node_num; ++i) { if (_pi[i] < min_pot) min_pot = _pi[i]; } if (min_pot < 0) { - for (int i = 0; i != _node_num; ++i) + for (ArcsType i = 0; i != _node_num; ++i) _pi[i] -= min_pot; } } @@ -1548,5 +1552,3 @@ namespace lemon { ///@} } //namespace lemon - -#endif //LEMON_NETWORK_SIMPLEX_H diff --git a/ot/lp/network_simplex_simple_omp.h b/ot/lp/network_simplex_simple_omp.h new file mode 100644 index 0000000..87e4c05 --- /dev/null +++ b/ot/lp/network_simplex_simple_omp.h @@ -0,0 +1,1699 @@ +/* -*- mode: C++; indent-tabs-mode: nil; -*- +* +* +* This file has been adapted by Nicolas Bonneel (2013), +* from network_simplex.h from LEMON, a generic C++ optimization library, +* to implement a lightweight network simplex for mass transport, more +* memory efficient than the original file. A previous version of this file +* is used as part of the Displacement Interpolation project, +* Web: http://www.cs.ubc.ca/labs/imager/tr/2011/DisplacementInterpolation/ +* +* Revisions: +* March 2015: added OpenMP parallelization +* March 2017: included Antoine Rolet's trick to make it more robust +* April 2018: IMPORTANT bug fix + uses 64bit integers (slightly slower but less risks of overflows), updated to a newer version of the algo by LEMON, sparse flow by default + minor edits. +* +* +**** Original file Copyright Notice : +* +* Copyright (C) 2003-2010 +* Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport +* (Egervary Research Group on Combinatorial Optimization, EGRES). +* +* Permission to use, modify and distribute this software is granted +* provided that this copyright notice appears in all copies. For +* precise terms see the accompanying LICENSE file. +* +* This software is provided "AS IS" with no warranty of any kind, +* express or implied, and with no claim as to its suitability for any +* purpose. +* +*/ + +#pragma once +#undef DEBUG_LVL +#define DEBUG_LVL 0 + +#if DEBUG_LVL>0 +#include +#endif + +#undef EPSILON +#undef _EPSILON +#undef MAX_DEBUG_ITER +#define EPSILON std::numeric_limits::epsilon()*10 +#define _EPSILON 1e-8 +#define MAX_DEBUG_ITER 100000 + +/// \ingroup min_cost_flow_algs +/// +/// \file +/// \brief Network Simplex algorithm for finding a minimum cost flow. + +// if your compiler has troubles with unorderedmaps, just comment the following line to use a slower std::map instead +#define HASHMAP // now handled with unorderedmaps instead of stdext::hash_map. Should be better supported. + +#define SPARSE_FLOW // a sparse flow vector will be 10-15% slower for small problems but uses less memory and becomes faster for large problems (40k total nodes) + +#include +#include +#include +#include +#ifdef HASHMAP +#include +#else +#include +#endif +//#include "core.h" +//#include "lmath.h" + +#ifdef OMP +#include +#endif +#include + + +//#include "sparse_array_n.h" +#include "full_bipartitegraph_omp.h" + +#undef INVALIDNODE +#undef INVALID +#define INVALIDNODE -1 +#define INVALID (-1) + +namespace lemon_omp { + + int64_t max_threads = -1; + + template + class ProxyObject; + + template + class SparseValueVector + { + public: + SparseValueVector(size_t n = 0) // parameter n for compatibility with standard vectors + { + } + void resize(size_t n = 0) {}; + T operator[](const size_t id) const + { +#ifdef HASHMAP + typename std::unordered_map::const_iterator it = data.find(id); +#else + typename std::map::const_iterator it = data.find(id); +#endif + if (it == data.end()) + return 0; + else + return it->second; + } + + ProxyObject operator[](const size_t id) + { + return ProxyObject(this, id); + } + + //private: +#ifdef HASHMAP + std::unordered_map data; +#else + std::map data; +#endif + + }; + + template + class ProxyObject { + public: + ProxyObject(SparseValueVector *v, size_t idx) { _v = v; _idx = idx; }; + ProxyObject & operator=(const T &v) { + // If we get here, we know that operator[] was called to perform a write access, + // so we can insert an item in the vector if needed + if (v != 0) + _v->data[_idx] = v; + return *this; + } + + operator T() { + // If we get here, we know that operator[] was called to perform a read access, + // so we can simply return the existing object +#ifdef HASHMAP + typename std::unordered_map::iterator it = _v->data.find(_idx); +#else + typename std::map::iterator it = _v->data.find(_idx); +#endif + if (it == _v->data.end()) + return 0; + else + return it->second; + } + + void operator+=(T val) + { + if (val == 0) return; +#ifdef HASHMAP + typename std::unordered_map::iterator it = _v->data.find(_idx); +#else + typename std::map::iterator it = _v->data.find(_idx); +#endif + if (it == _v->data.end()) + _v->data[_idx] = val; + else + { + T sum = it->second + val; + if (sum == 0) + _v->data.erase(it); + else + it->second = sum; + } + } + void operator-=(T val) + { + if (val == 0) return; +#ifdef HASHMAP + typename std::unordered_map::iterator it = _v->data.find(_idx); +#else + typename std::map::iterator it = _v->data.find(_idx); +#endif + if (it == _v->data.end()) + _v->data[_idx] = -val; + else + { + T sum = it->second - val; + if (sum == 0) + _v->data.erase(it); + else + it->second = sum; + } + } + + SparseValueVector *_v; + size_t _idx; + }; + + + + /// \addtogroup min_cost_flow_algs + /// @{ + + /// \brief Implementation of the primal Network Simplex algorithm + /// for finding a \ref min_cost_flow "minimum cost flow". + /// + /// \ref NetworkSimplexSimple implements the primal Network Simplex algorithm + /// for finding a \ref min_cost_flow "minimum cost flow" + /// \ref amo93networkflows, \ref dantzig63linearprog, + /// \ref kellyoneill91netsimplex. + /// This algorithm is a highly efficient specialized version of the + /// linear programming simplex method directly for the minimum cost + /// flow problem. + /// + /// In general, %NetworkSimplexSimple is the fastest implementation available + /// in LEMON for this problem. + /// Moreover, it supports both directions of the supply/demand inequality + /// constraints. For more information, see \ref SupplyType. + /// + /// Most of the parameters of the problem (except for the digraph) + /// can be given using separate functions, and the algorithm can be + /// executed using the \ref run() function. If some parameters are not + /// specified, then default values will be used. + /// + /// \tparam GR The digraph type the algorithm runs on. + /// \tparam V The number type used for flow amounts, capacity bounds + /// and supply values in the algorithm. By default, it is \c int. + /// \tparam C The number type used for costs and potentials in the + /// algorithm. By default, it is the same as \c V. + /// + /// \warning Both number types must be signed and all input data must + /// be integer. + /// + /// \note %NetworkSimplexSimple provides five different pivot rule + /// implementations, from which the most efficient one is used + /// by default. For more information, see \ref PivotRule. + template + class NetworkSimplexSimple + { + public: + + /// \brief Constructor. + /// + /// The constructor of the class. + /// + /// \param graph The digraph the algorithm runs on. + /// \param arc_mixing Indicate if the arcs have to be stored in a + /// mixed order in the internal data structure. + /// In special cases, it could lead to better overall performance, + /// but it is usually slower. Therefore it is disabled by default. + NetworkSimplexSimple(const GR& graph, bool arc_mixing, int nbnodes, ArcsType nb_arcs, size_t maxiters = 0, int numThreads=-1) : + _graph(graph), //_arc_id(graph), + _arc_mixing(arc_mixing), _init_nb_nodes(nbnodes), _init_nb_arcs(nb_arcs), + MAX(std::numeric_limits::max()), + INF(std::numeric_limits::has_infinity ? + std::numeric_limits::infinity() : MAX) + { + // Reset data structures + reset(); + max_iter = maxiters; +#ifdef OMP + if (max_threads < 0) { + max_threads = omp_get_max_threads(); + } + if (numThreads > 0 && numThreads<=max_threads){ + num_threads = numThreads; + } else if (numThreads == -1 || numThreads>max_threads) { + num_threads = max_threads; + } else { + num_threads = 1; + } + omp_set_num_threads(num_threads); +#else + num_threads = 1; +#endif + } + + /// The type of the flow amounts, capacity bounds and supply values + typedef V Value; + /// The type of the arc costs + typedef C Cost; + + public: + /// \brief Problem type constants for the \c run() function. + /// + /// Enum type containing the problem type constants that can be + /// returned by the \ref run() function of the algorithm. + enum ProblemType { + /// The problem has no feasible solution (flow). + INFEASIBLE, + /// The problem has optimal solution (i.e. it is feasible and + /// bounded), and the algorithm has found optimal flow and node + /// potentials (primal and dual solutions). + OPTIMAL, + /// The objective function of the problem is unbounded, i.e. + /// there is a directed cycle having negative total cost and + /// infinite upper bound. + UNBOUNDED, + // The maximum number of iteration has been reached + MAX_ITER_REACHED + }; + + /// \brief Constants for selecting the type of the supply constraints. + /// + /// Enum type containing constants for selecting the supply type, + /// i.e. the direction of the inequalities in the supply/demand + /// constraints of the \ref min_cost_flow "minimum cost flow problem". + /// + /// The default supply type is \c GEQ, the \c LEQ type can be + /// selected using \ref supplyType(). + /// The equality form is a special case of both supply types. + enum SupplyType { + /// This option means that there are "greater or equal" + /// supply/demand constraints in the definition of the problem. + GEQ, + /// This option means that there are "less or equal" + /// supply/demand constraints in the definition of the problem. + LEQ + }; + + + + private: + size_t max_iter; + int num_threads; + TEMPLATE_DIGRAPH_TYPEDEFS(GR); + + typedef std::vector IntVector; + typedef std::vector ArcVector; + typedef std::vector ValueVector; + typedef std::vector CostVector; + // typedef SparseValueVector CostVector; + typedef std::vector BoolVector; + // Note: vector is used instead of vector for efficiency reasons + + // State constants for arcs + enum ArcState { + STATE_UPPER = -1, + STATE_TREE = 0, + STATE_LOWER = 1 + }; + + typedef std::vector StateVector; + // Note: vector is used instead of vector for + // efficiency reasons + + private: + + // Data related to the underlying digraph + const GR &_graph; + int _node_num; + ArcsType _arc_num; + ArcsType _all_arc_num; + ArcsType _search_arc_num; + + // Parameters of the problem + SupplyType _stype; + Value _sum_supply; + + inline int _node_id(int n) const { return _node_num - n - 1; }; + + //IntArcMap _arc_id; + IntVector _source; // keep nodes as integers + IntVector _target; + bool _arc_mixing; + + // Node and arc data + CostVector _cost; + ValueVector _supply; +#ifdef SPARSE_FLOW + SparseValueVector _flow; +#else + ValueVector _flow; +#endif + + CostVector _pi; + + // Data for storing the spanning tree structure + IntVector _parent; + ArcVector _pred; + IntVector _thread; + IntVector _rev_thread; + IntVector _succ_num; + IntVector _last_succ; + IntVector _dirty_revs; + BoolVector _forward; + StateVector _state; + ArcsType _root; + + // Temporary data used in the current pivot iteration + ArcsType in_arc, join, u_in, v_in, u_out, v_out; + ArcsType first, second, right, last; + ArcsType stem, par_stem, new_stem; + Value delta; + + const Value MAX; + + ArcsType mixingCoeff; + + public: + + /// \brief Constant for infinite upper bounds (capacities). + /// + /// Constant for infinite upper bounds (capacities). + /// It is \c std::numeric_limits::infinity() if available, + /// \c std::numeric_limits::max() otherwise. + const Value INF; + + private: + + // thank you to DVK and MizardX from StackOverflow for this function! + inline ArcsType sequence(ArcsType k) const { + ArcsType smallv = (k > num_total_big_subsequence_numbers) & 1; + + k -= num_total_big_subsequence_numbers * smallv; + ArcsType subsequence_length2 = subsequence_length - smallv; + ArcsType subsequence_num = (k / subsequence_length2) + num_big_subsequences * smallv; + ArcsType subsequence_offset = (k % subsequence_length2) * mixingCoeff; + + return subsequence_offset + subsequence_num; + } + ArcsType subsequence_length; + ArcsType num_big_subsequences; + ArcsType num_total_big_subsequence_numbers; + + inline ArcsType getArcID(const Arc &arc) const + { + //int n = _arc_num-arc._id-1; + ArcsType n = _arc_num - GR::id(arc) - 1; + + //ArcsType a = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff; + //ArcsType b = _arc_id[arc]; + if (_arc_mixing) + return sequence(n); + else + return n; + } + + // finally unused because too slow + inline ArcsType getSource(const ArcsType arc) const + { + //ArcsType a = _source[arc]; + //return a; + + ArcsType n = _arc_num - arc - 1; + if (_arc_mixing) + n = mixingCoeff*(n%mixingCoeff) + n / mixingCoeff; + + ArcsType b; + if (n >= 0) + b = _node_id(_graph.source(GR::arcFromId(n))); + else + { + n = arc + 1 - _arc_num; + if (n <= _node_num) + b = _node_num; + else + if (n >= _graph._n1) + b = _graph._n1; + else + b = _graph._n1 - n; + } + + return b; + } + + + + // Implementation of the Block Search pivot rule + class BlockSearchPivotRule + { + private: + + // References to the NetworkSimplexSimple class + const IntVector &_source; + const IntVector &_target; + const CostVector &_cost; + const StateVector &_state; + const CostVector &_pi; + ArcsType &_in_arc; + ArcsType _search_arc_num; + + // Pivot rule data + ArcsType _block_size; + ArcsType _next_arc; + NetworkSimplexSimple &_ns; + + public: + + // Constructor + BlockSearchPivotRule(NetworkSimplexSimple &ns) : + _source(ns._source), _target(ns._target), + _cost(ns._cost), _state(ns._state), _pi(ns._pi), + _in_arc(ns.in_arc), _search_arc_num(ns._search_arc_num), + _next_arc(0), _ns(ns) + { + // The main parameters of the pivot rule + const double BLOCK_SIZE_FACTOR = 1; + const ArcsType MIN_BLOCK_SIZE = 10; + + _block_size = std::max(ArcsType(BLOCK_SIZE_FACTOR * std::sqrt(double(_search_arc_num))), MIN_BLOCK_SIZE); + } + + // Find next entering arc + bool findEnteringArc() { + Cost min_val = 0; + + ArcsType N = _ns.num_threads; + + std::vector minArray(N, 0); + std::vector arcId(N); + ArcsType bs = (ArcsType)ceil(_block_size / (double)N); + + for (ArcsType i = 0; i < _search_arc_num; i += _block_size) { + + ArcsType e; + int j; +#pragma omp parallel + { +#ifdef OMP + int t = omp_get_thread_num(); +#else + int t = 0; +#endif + +#pragma omp for schedule(static, bs) lastprivate(e) + for (j = 0; j < std::min(i + _block_size, _search_arc_num) - i; j++) { + e = (_next_arc + i + j); if (e >= _search_arc_num) e -= _search_arc_num; + Cost c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); + if (c < minArray[t]) { + minArray[t] = c; + arcId[t] = e; + } + } + } + for (int j = 0; j < N; j++) { + if (minArray[j] < min_val) { + min_val = minArray[j]; + _in_arc = arcId[j]; + } + } + Cost a = std::abs(_pi[_source[_in_arc]]) > std::abs(_pi[_target[_in_arc]]) ? std::abs(_pi[_source[_in_arc]]) : std::abs(_pi[_target[_in_arc]]); + a = a > std::abs(_cost[_in_arc]) ? a : std::abs(_cost[_in_arc]); + if (min_val < -EPSILON*a) { + _next_arc = e; + return true; + } + } + + Cost a = fabs(_pi[_source[_in_arc]]) > fabs(_pi[_target[_in_arc]]) ? fabs(_pi[_source[_in_arc]]) : fabs(_pi[_target[_in_arc]]); + a = a > fabs(_cost[_in_arc]) ? a : fabs(_cost[_in_arc]); + if (min_val >= -EPSILON*a) return false; + + return true; + } + + + // Find next entering arc + /*bool findEnteringArc() { + Cost min_val = 0; + int N = omp_get_max_threads(); + std::vector minArray(N); + std::vector arcId(N); + + ArcsType bs = (ArcsType)ceil(_block_size / (double)N); + for (ArcsType i = 0; i < _search_arc_num; i += _block_size) { + + ArcsType maxJ = std::min(i + _block_size, _search_arc_num) - i; + ArcsType j; +#pragma omp parallel + { + int t = omp_get_thread_num(); + Cost minV = 0; + ArcsType arcStart = _next_arc + i; + ArcsType arc = -1; +#pragma omp for schedule(static, bs) + for (j = 0; j < maxJ; j++) { + ArcsType e = arcStart + j; if (e >= _search_arc_num) e -= _search_arc_num; + Cost c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); + if (c < minV) { + minV = c; + arc = e; + } + } + + minArray[t] = minV; + arcId[t] = arc; + } + for (int j = 0; j < N; j++) { + if (minArray[j] < min_val) { + min_val = minArray[j]; + _in_arc = arcId[j]; + } + } + + //FIX by Antoine Rolet to avoid precision issues + Cost a = std::max(std::abs(_cost[_in_arc]), std::max(std::abs(_pi[_source[_in_arc]]), std::abs(_pi[_target[_in_arc]]))); + if (min_val <-std::numeric_limits::epsilon()*a) { + _next_arc = _next_arc + i + maxJ - 1; + if (_next_arc >= _search_arc_num) _next_arc -= _search_arc_num; + return true; + } + } + + if (min_val >= 0) { + return false; + } + + return true; + }*/ + + + /*bool findEnteringArc() { + Cost c, min = 0; + int cnt = _block_size; + int e, min_arc = _next_arc; + for (e = _next_arc; e < _search_arc_num; ++e) { + c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); + if (c < min) { + min = c; + min_arc = e; + + } + if (--cnt == 0) { + if (min < 0) break; + cnt = _block_size; + + } + + } + if (min == 0 || cnt > 0) { + for (e = 0; e < _next_arc; ++e) { + c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]); + if (c < min) { + min = c; + min_arc = e; + + } + if (--cnt == 0) { + if (min < 0) break; + cnt = _block_size; + + } + + } + + } + if (min >= 0) return false; + _in_arc = min_arc; + _next_arc = e; + return true; + }*/ + + + + }; //class BlockSearchPivotRule + + + + public: + + + + int _init_nb_nodes; + ArcsType _init_nb_arcs; + + /// \name Parameters + /// The parameters of the algorithm can be specified using these + /// functions. + + /// @{ + + + /// \brief Set the costs of the arcs. + /// + /// This function sets the costs of the arcs. + /// If it is not used before calling \ref run(), the costs + /// will be set to \c 1 on all arcs. + /// + /// \param map An arc map storing the costs. + /// Its \c Value type must be convertible to the \c Cost type + /// of the algorithm. + /// + /// \return (*this) + template + NetworkSimplexSimple& costMap(const CostMap& map) { + Arc a; _graph.first(a); + for (; a != INVALID; _graph.next(a)) { + _cost[getArcID(a)] = map[a]; + } + return *this; + } + + + /// \brief Set the costs of one arc. + /// + /// This function sets the costs of one arcs. + /// Done for memory reasons + /// + /// \param arc An arc. + /// \param arc A cost + /// + /// \return (*this) + template + NetworkSimplexSimple& setCost(const Arc& arc, const Value cost) { + _cost[getArcID(arc)] = cost; + return *this; + } + + + /// \brief Set the supply values of the nodes. + /// + /// This function sets the supply values of the nodes. + /// If neither this function nor \ref stSupply() is used before + /// calling \ref run(), the supply of each node will be set to zero. + /// + /// \param map A node map storing the supply values. + /// Its \c Value type must be convertible to the \c Value type + /// of the algorithm. + /// + /// \return (*this) + template + NetworkSimplexSimple& supplyMap(const SupplyMap& map) { + Node n; _graph.first(n); + for (; n != INVALIDNODE; _graph.next(n)) { + _supply[_node_id(n)] = map[n]; + } + return *this; + } + template + NetworkSimplexSimple& supplyMap(const SupplyMap* map1, int n1, const SupplyMap* map2, int n2) { + Node n; _graph.first(n); + for (; n != INVALIDNODE; _graph.next(n)) { + if (n + NetworkSimplexSimple& supplyMapAll(SupplyMap val1, int n1, SupplyMap val2, int n2) { + Node n; _graph.first(n); + for (; n != INVALIDNODE; _graph.next(n)) { + if (n(*this) + NetworkSimplexSimple& stSupply(const Node& s, const Node& t, Value k) { + for (int i = 0; i != _node_num; ++i) { + _supply[i] = 0; + } + _supply[_node_id(s)] = k; + _supply[_node_id(t)] = -k; + return *this; + } + + /// \brief Set the type of the supply constraints. + /// + /// This function sets the type of the supply/demand constraints. + /// If it is not used before calling \ref run(), the \ref GEQ supply + /// type will be used. + /// + /// For more information, see \ref SupplyType. + /// + /// \return (*this) + NetworkSimplexSimple& supplyType(SupplyType supply_type) { + _stype = supply_type; + return *this; + } + + /// @} + + /// \name Execution Control + /// The algorithm can be executed using \ref run(). + + /// @{ + + /// \brief Run the algorithm. + /// + /// This function runs the algorithm. + /// The paramters can be specified using functions \ref lowerMap(), + /// \ref upperMap(), \ref costMap(), \ref supplyMap(), \ref stSupply(), + /// \ref supplyType(). + /// For example, + /// \code + /// NetworkSimplexSimple ns(graph); + /// ns.lowerMap(lower).upperMap(upper).costMap(cost) + /// .supplyMap(sup).run(); + /// \endcode + /// + /// This function can be called more than once. All the given parameters + /// are kept for the next call, unless \ref resetParams() or \ref reset() + /// is used, thus only the modified parameters have to be set again. + /// If the underlying digraph was also modified after the construction + /// of the class (or the last \ref reset() call), then the \ref reset() + /// function must be called. + /// + /// \param pivot_rule The pivot rule that will be used during the + /// algorithm. For more information, see \ref PivotRule. + /// + /// \return \c INFEASIBLE if no feasible flow exists, + /// \n \c OPTIMAL if the problem has optimal solution + /// (i.e. it is feasible and bounded), and the algorithm has found + /// optimal flow and node potentials (primal and dual solutions), + /// \n \c UNBOUNDED if the objective function of the problem is + /// unbounded, i.e. there is a directed cycle having negative total + /// cost and infinite upper bound. + /// + /// \see ProblemType, PivotRule + /// \see resetParams(), reset() + ProblemType run() { +#if DEBUG_LVL>0 + std::cout << "OPTIMAL = " << OPTIMAL << "\nINFEASIBLE = " << INFEASIBLE << "\nUNBOUNDED = " << UNBOUNDED << "\nMAX_ITER_REACHED" << MAX_ITER_REACHED << "\n" ; +#endif + if (!init()) return INFEASIBLE; +#if DEBUG_LVL>0 + std::cout << "Init done, starting iterations\n"; +#endif + + return start(); + } + + /// \brief Reset all the parameters that have been given before. + /// + /// This function resets all the paramaters that have been given + /// before using functions \ref lowerMap(), \ref upperMap(), + /// \ref costMap(), \ref supplyMap(), \ref stSupply(), \ref supplyType(). + /// + /// It is useful for multiple \ref run() calls. Basically, all the given + /// parameters are kept for the next \ref run() call, unless + /// \ref resetParams() or \ref reset() is used. + /// If the underlying digraph was also modified after the construction + /// of the class or the last \ref reset() call, then the \ref reset() + /// function must be used, otherwise \ref resetParams() is sufficient. + /// + /// For example, + /// \code + /// NetworkSimplexSimple ns(graph); + /// + /// // First run + /// ns.lowerMap(lower).upperMap(upper).costMap(cost) + /// .supplyMap(sup).run(); + /// + /// // Run again with modified cost map (resetParams() is not called, + /// // so only the cost map have to be set again) + /// cost[e] += 100; + /// ns.costMap(cost).run(); + /// + /// // Run again from scratch using resetParams() + /// // (the lower bounds will be set to zero on all arcs) + /// ns.resetParams(); + /// ns.upperMap(capacity).costMap(cost) + /// .supplyMap(sup).run(); + /// \endcode + /// + /// \return (*this) + /// + /// \see reset(), run() + NetworkSimplexSimple& resetParams() { + for (int i = 0; i != _node_num; ++i) { + _supply[i] = 0; + } + for (ArcsType i = 0; i != _arc_num; ++i) { + _cost[i] = 1; + } + _stype = GEQ; + return *this; + } + + + /// \brief Reset the internal data structures and all the parameters + /// that have been given before. + /// + /// This function resets the internal data structures and all the + /// paramaters that have been given before using functions \ref lowerMap(), + /// \ref upperMap(), \ref costMap(), \ref supplyMap(), \ref stSupply(), + /// \ref supplyType(). + /// + /// It is useful for multiple \ref run() calls. Basically, all the given + /// parameters are kept for the next \ref run() call, unless + /// \ref resetParams() or \ref reset() is used. + /// If the underlying digraph was also modified after the construction + /// of the class or the last \ref reset() call, then the \ref reset() + /// function must be used, otherwise \ref resetParams() is sufficient. + /// + /// See \ref resetParams() for examples. + /// + /// \return (*this) + /// + /// \see resetParams(), run() + NetworkSimplexSimple& reset() { + // Resize vectors + _node_num = _init_nb_nodes; + _arc_num = _init_nb_arcs; + int all_node_num = _node_num + 1; + ArcsType max_arc_num = _arc_num + 2 * _node_num; + + _source.resize(max_arc_num); + _target.resize(max_arc_num); + + _cost.resize(max_arc_num); + _supply.resize(all_node_num); + _flow.resize(max_arc_num); + _pi.resize(all_node_num); + + _parent.resize(all_node_num); + _pred.resize(all_node_num); + _forward.resize(all_node_num); + _thread.resize(all_node_num); + _rev_thread.resize(all_node_num); + _succ_num.resize(all_node_num); + _last_succ.resize(all_node_num); + _state.resize(max_arc_num); + + + //_arc_mixing=false; + if (_arc_mixing && _node_num > 1) { + // Store the arcs in a mixed order + //ArcsType k = std::max(ArcsType(std::sqrt(double(_arc_num))), ArcsType(10)); + const ArcsType k = std::max(ArcsType(_arc_num / _node_num), ArcsType(3)); + mixingCoeff = k; + subsequence_length = _arc_num / mixingCoeff + 1; + num_big_subsequences = _arc_num % mixingCoeff; + num_total_big_subsequence_numbers = subsequence_length * num_big_subsequences; + +#pragma omp parallel for schedule(static) + for (Arc a = 0; a <= _graph.maxArcId(); a++) { // --a <=> _graph.next(a) , -1 == INVALID + ArcsType i = sequence(_graph.maxArcId()-a); + _source[i] = _node_id(_graph.source(a)); + _target[i] = _node_id(_graph.target(a)); + } + } else { + // Store the arcs in the original order + ArcsType i = 0; + Arc a; _graph.first(a); + for (; a != INVALID; _graph.next(a), ++i) { + _source[i] = _node_id(_graph.source(a)); + _target[i] = _node_id(_graph.target(a)); + //_arc_id[a] = i; + } + } + + // Reset parameters + resetParams(); + return *this; + } + + /// @} + + /// \name Query Functions + /// The results of the algorithm can be obtained using these + /// functions.\n + /// The \ref run() function must be called before using them. + + /// @{ + + /// \brief Return the total cost of the found flow. + /// + /// This function returns the total cost of the found flow. + /// Its complexity is O(e). + /// + /// \note The return type of the function can be specified as a + /// template parameter. For example, + /// \code + /// ns.totalCost(); + /// \endcode + /// It is useful if the total cost cannot be stored in the \c Cost + /// type of the algorithm, which is the default return type of the + /// function. + /// + /// \pre \ref run() must be called before using this function. + /*template + Number totalCost() const { + Number c = 0; + for (ArcIt a(_graph); a != INVALID; ++a) { + int i = getArcID(a); + c += Number(_flow[i]) * Number(_cost[i]); + } + return c; + }*/ + + template + Number totalCost() const { + Number c = 0; + +#ifdef SPARSE_FLOW + #ifdef HASHMAP + typename std::unordered_map::const_iterator it; + #else + typename std::map::const_iterator it; + #endif + for (it = _flow.data.begin(); it!=_flow.data.end(); ++it) + c += Number(it->second) * Number(_cost[it->first]); + return c; +#else + for (ArcsType i = 0; i<_flow.size(); i++) + c += _flow[i] * Number(_cost[i]); + return c; +#endif + } + +#ifndef DOXYGEN + Cost totalCost() const { + return totalCost(); + } +#endif + + /// \brief Return the flow on the given arc. + /// + /// This function returns the flow on the given arc. + /// + /// \pre \ref run() must be called before using this function. + Value flow(const Arc& a) const { + return _flow[getArcID(a)]; + } + + /// \brief Return the flow map (the primal solution). + /// + /// This function copies the flow value on each arc into the given + /// map. The \c Value type of the algorithm must be convertible to + /// the \c Value type of the map. + /// + /// \pre \ref run() must be called before using this function. + template + void flowMap(FlowMap &map) const { + Arc a; _graph.first(a); + for (; a != INVALID; _graph.next(a)) { + map.set(a, _flow[getArcID(a)]); + } + } + + /// \brief Return the potential (dual value) of the given node. + /// + /// This function returns the potential (dual value) of the + /// given node. + /// + /// \pre \ref run() must be called before using this function. + Cost potential(const Node& n) const { + return _pi[_node_id(n)]; + } + + /// \brief Return the potential map (the dual solution). + /// + /// This function copies the potential (dual value) of each node + /// into the given map. + /// The \c Cost type of the algorithm must be convertible to the + /// \c Value type of the map. + /// + /// \pre \ref run() must be called before using this function. + template + void potentialMap(PotentialMap &map) const { + Node n; _graph.first(n); + for (; n != INVALID; _graph.next(n)) { + map.set(n, _pi[_node_id(n)]); + } + } + + /// @} + + private: + + // Initialize internal data structures + bool init() { + if (_node_num == 0) return false; + + // Check the sum of supply values + _sum_supply = 0; + for (int i = 0; i != _node_num; ++i) { + _sum_supply += _supply[i]; + } + /*if (!((_stype == GEQ && _sum_supply <= 0) || + (_stype == LEQ && _sum_supply >= 0))) return false;*/ + + + // Initialize artifical cost + Cost ART_COST; + if (std::numeric_limits::is_exact) { + ART_COST = std::numeric_limits::max() / 2 + 1; + } else { + ART_COST = 0; + for (ArcsType i = 0; i != _arc_num; ++i) { + if (_cost[i] > ART_COST) ART_COST = _cost[i]; + } + ART_COST = (ART_COST + 1) * _node_num; + } + + // Initialize arc maps + for (ArcsType i = 0; i != _arc_num; ++i) { +#ifndef SPARSE_FLOW + _flow[i] = 0; //by default, the sparse matrix is empty +#endif + _state[i] = STATE_LOWER; + } +#ifdef SPARSE_FLOW + _flow = SparseValueVector(); +#endif + + // Set data for the artificial root node + _root = _node_num; + _parent[_root] = -1; + _pred[_root] = -1; + _thread[_root] = 0; + _rev_thread[0] = _root; + _succ_num[_root] = _node_num + 1; + _last_succ[_root] = _root - 1; + _supply[_root] = -_sum_supply; + _pi[_root] = 0; + + // Add artificial arcs and initialize the spanning tree data structure + if (_sum_supply == 0) { + // EQ supply constraints + _search_arc_num = _arc_num; + _all_arc_num = _arc_num + _node_num; + for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) { + _parent[u] = _root; + _pred[u] = e; + _thread[u] = u + 1; + _rev_thread[u + 1] = u; + _succ_num[u] = 1; + _last_succ[u] = u; + _state[e] = STATE_TREE; + if (_supply[u] >= 0) { + _forward[u] = true; + _pi[u] = 0; + _source[e] = u; + _target[e] = _root; + _flow[e] = _supply[u]; + _cost[e] = 0; + } else { + _forward[u] = false; + _pi[u] = ART_COST; + _source[e] = _root; + _target[e] = u; + _flow[e] = -_supply[u]; + _cost[e] = ART_COST; + } + } + } else if (_sum_supply > 0) { + // LEQ supply constraints + _search_arc_num = _arc_num + _node_num; + ArcsType f = _arc_num + _node_num; + for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) { + _parent[u] = _root; + _thread[u] = u + 1; + _rev_thread[u + 1] = u; + _succ_num[u] = 1; + _last_succ[u] = u; + if (_supply[u] >= 0) { + _forward[u] = true; + _pi[u] = 0; + _pred[u] = e; + _source[e] = u; + _target[e] = _root; + _flow[e] = _supply[u]; + _cost[e] = 0; + _state[e] = STATE_TREE; + } else { + _forward[u] = false; + _pi[u] = ART_COST; + _pred[u] = f; + _source[f] = _root; + _target[f] = u; + _flow[f] = -_supply[u]; + _cost[f] = ART_COST; + _state[f] = STATE_TREE; + _source[e] = u; + _target[e] = _root; + //_flow[e] = 0; //by default, the sparse matrix is empty + _cost[e] = 0; + _state[e] = STATE_LOWER; + ++f; + } + } + _all_arc_num = f; + } else { + // GEQ supply constraints + _search_arc_num = _arc_num + _node_num; + ArcsType f = _arc_num + _node_num; + for (ArcsType u = 0, e = _arc_num; u != _node_num; ++u, ++e) { + _parent[u] = _root; + _thread[u] = u + 1; + _rev_thread[u + 1] = u; + _succ_num[u] = 1; + _last_succ[u] = u; + if (_supply[u] <= 0) { + _forward[u] = false; + _pi[u] = 0; + _pred[u] = e; + _source[e] = _root; + _target[e] = u; + _flow[e] = -_supply[u]; + _cost[e] = 0; + _state[e] = STATE_TREE; + } else { + _forward[u] = true; + _pi[u] = -ART_COST; + _pred[u] = f; + _source[f] = u; + _target[f] = _root; + _flow[f] = _supply[u]; + _state[f] = STATE_TREE; + _cost[f] = ART_COST; + _source[e] = _root; + _target[e] = u; + //_flow[e] = 0; //by default, the sparse matrix is empty + _cost[e] = 0; + _state[e] = STATE_LOWER; + ++f; + } + } + _all_arc_num = f; + } + + return true; + } + + // Find the join node + void findJoinNode() { + int u = _source[in_arc]; + int v = _target[in_arc]; + while (u != v) { + if (_succ_num[u] < _succ_num[v]) { + u = _parent[u]; + } else { + v = _parent[v]; + } + } + join = u; + } + + // Find the leaving arc of the cycle and returns true if the + // leaving arc is not the same as the entering arc + bool findLeavingArc() { + // Initialize first and second nodes according to the direction + // of the cycle + if (_state[in_arc] == STATE_LOWER) { + first = _source[in_arc]; + second = _target[in_arc]; + } else { + first = _target[in_arc]; + second = _source[in_arc]; + } + delta = INF; + char result = 0; + Value d; + ArcsType e; + + // Search the cycle along the path form the first node to the root + for (int u = first; u != join; u = _parent[u]) { + e = _pred[u]; + d = _forward[u] ? _flow[e] : INF; + if (d < delta) { + delta = d; + u_out = u; + result = 1; + } + } + // Search the cycle along the path form the second node to the root + for (int u = second; u != join; u = _parent[u]) { + e = _pred[u]; + d = _forward[u] ? INF : _flow[e]; + if (d <= delta) { + delta = d; + u_out = u; + result = 2; + } + } + + if (result == 1) { + u_in = first; + v_in = second; + } else { + u_in = second; + v_in = first; + } + return result != 0; + } + + // Change _flow and _state vectors + void changeFlow(bool change) { + // Augment along the cycle + if (delta > 0) { + Value val = _state[in_arc] * delta; + _flow[in_arc] += val; + for (int u = _source[in_arc]; u != join; u = _parent[u]) { + _flow[_pred[u]] += _forward[u] ? -val : val; + } + for (int u = _target[in_arc]; u != join; u = _parent[u]) { + _flow[_pred[u]] += _forward[u] ? val : -val; + } + } + // Update the state of the entering and leaving arcs + if (change) { + _state[in_arc] = STATE_TREE; + _state[_pred[u_out]] = + (_flow[_pred[u_out]] == 0) ? STATE_LOWER : STATE_UPPER; + } else { + _state[in_arc] = -_state[in_arc]; + } + } + + // Update the tree structure + void updateTreeStructure() { + int old_rev_thread = _rev_thread[u_out]; + int old_succ_num = _succ_num[u_out]; + int old_last_succ = _last_succ[u_out]; + v_out = _parent[u_out]; + + // Check if u_in and u_out coincide + if (u_in == u_out) { + // Update _parent, _pred, _pred_dir + _parent[u_in] = v_in; + _pred[u_in] = in_arc; + _forward[u_in] = (u_in == _source[in_arc]); + + // Update _thread and _rev_thread + if (_thread[v_in] != u_out) { + ArcsType after = _thread[old_last_succ]; + _thread[old_rev_thread] = after; + _rev_thread[after] = old_rev_thread; + after = _thread[v_in]; + _thread[v_in] = u_out; + _rev_thread[u_out] = v_in; + _thread[old_last_succ] = after; + _rev_thread[after] = old_last_succ; + } + } else { + // Handle the case when old_rev_thread equals to v_in + // (it also means that join and v_out coincide) + int thread_continue = old_rev_thread == v_in ? + _thread[old_last_succ] : _thread[v_in]; + + // Update _thread and _parent along the stem nodes (i.e. the nodes + // between u_in and u_out, whose parent have to be changed) + int stem = u_in; // the current stem node + int par_stem = v_in; // the new parent of stem + int next_stem; // the next stem node + int last = _last_succ[u_in]; // the last successor of stem + int before, after = _thread[last]; + _thread[v_in] = u_in; + _dirty_revs.clear(); + _dirty_revs.push_back(v_in); + while (stem != u_out) { + // Insert the next stem node into the thread list + next_stem = _parent[stem]; + _thread[last] = next_stem; + _dirty_revs.push_back(last); + + // Remove the subtree of stem from the thread list + before = _rev_thread[stem]; + _thread[before] = after; + _rev_thread[after] = before; + + // Change the parent node and shift stem nodes + _parent[stem] = par_stem; + par_stem = stem; + stem = next_stem; + + // Update last and after + last = _last_succ[stem] == _last_succ[par_stem] ? + _rev_thread[par_stem] : _last_succ[stem]; + after = _thread[last]; + } + _parent[u_out] = par_stem; + _thread[last] = thread_continue; + _rev_thread[thread_continue] = last; + _last_succ[u_out] = last; + + // Remove the subtree of u_out from the thread list except for + // the case when old_rev_thread equals to v_in + if (old_rev_thread != v_in) { + _thread[old_rev_thread] = after; + _rev_thread[after] = old_rev_thread; + } + + // Update _rev_thread using the new _thread values + for (int i = 0; i != int(_dirty_revs.size()); ++i) { + int u = _dirty_revs[i]; + _rev_thread[_thread[u]] = u; + } + + // Update _pred, _pred_dir, _last_succ and _succ_num for the + // stem nodes from u_out to u_in + int tmp_sc = 0, tmp_ls = _last_succ[u_out]; + for (int u = u_out, p = _parent[u]; u != u_in; u = p, p = _parent[u]) { + _pred[u] = _pred[p]; + _forward[u] = !_forward[p]; + tmp_sc += _succ_num[u] - _succ_num[p]; + _succ_num[u] = tmp_sc; + _last_succ[p] = tmp_ls; + } + _pred[u_in] = in_arc; + _forward[u_in] = (u_in == _source[in_arc]); + _succ_num[u_in] = old_succ_num; + } + + // Update _last_succ from v_in towards the root + int up_limit_out = _last_succ[join] == v_in ? join : -1; + int last_succ_out = _last_succ[u_out]; + for (int u = v_in; u != -1 && _last_succ[u] == v_in; u = _parent[u]) { + _last_succ[u] = last_succ_out; + } + + // Update _last_succ from v_out towards the root + if (join != old_rev_thread && v_in != old_rev_thread) { + for (int u = v_out; u != up_limit_out && _last_succ[u] == old_last_succ; + u = _parent[u]) { + _last_succ[u] = old_rev_thread; + } + } else if (last_succ_out != old_last_succ) { + for (int u = v_out; u != up_limit_out && _last_succ[u] == old_last_succ; + u = _parent[u]) { + _last_succ[u] = last_succ_out; + } + } + + // Update _succ_num from v_in to join + for (int u = v_in; u != join; u = _parent[u]) { + _succ_num[u] += old_succ_num; + } + // Update _succ_num from v_out to join + for (int u = v_out; u != join; u = _parent[u]) { + _succ_num[u] -= old_succ_num; + } + } + + void updatePotential() { + Cost sigma = _pi[v_in] - _pi[u_in] - + ((_forward[u_in])?_cost[in_arc]:(-_cost[in_arc])); + int end = _thread[_last_succ[u_in]]; + for (int u = u_in; u != end; u = _thread[u]) { + _pi[u] += sigma; + } + } + + + // Heuristic initial pivots + bool initialPivots() { + Value curr, total = 0; + std::vector supply_nodes, demand_nodes; + Node u; _graph.first(u); + for (; u != INVALIDNODE; _graph.next(u)) { + curr = _supply[_node_id(u)]; + if (curr > 0) { + total += curr; + supply_nodes.push_back(u); + } else if (curr < 0) { + demand_nodes.push_back(u); + } + } + if (_sum_supply > 0) total -= _sum_supply; + if (total <= 0) return true; + + ArcVector arc_vector; + if (_sum_supply >= 0) { + if (supply_nodes.size() == 1 && demand_nodes.size() == 1) { + // Perform a reverse graph search from the sink to the source + //typename GR::template NodeMap reached(_graph, false); + BoolVector reached(_node_num, false); + Node s = supply_nodes[0], t = demand_nodes[0]; + std::vector stack; + reached[t] = true; + stack.push_back(t); + while (!stack.empty()) { + Node u, v = stack.back(); + stack.pop_back(); + if (v == s) break; + Arc a; _graph.firstIn(a, v); + for (; a != INVALID; _graph.nextIn(a)) { + if (reached[u = _graph.source(a)]) continue; + ArcsType j = getArcID(a); + arc_vector.push_back(j); + reached[u] = true; + stack.push_back(u); + } + } + } else { + arc_vector.resize(demand_nodes.size()); + // Find the min. cost incomming arc for each demand node +#pragma omp parallel for + for (int i = 0; i < demand_nodes.size(); ++i) { + Node v = demand_nodes[i]; + Cost min_cost = std::numeric_limits::max(); + Arc min_arc = INVALID; + Arc a; _graph.firstIn(a, v); + for (; a != INVALID; _graph.nextIn(a)) { + Cost c = _cost[getArcID(a)]; + if (c < min_cost) { + min_cost = c; + min_arc = a; + } + } + arc_vector[i] = getArcID(min_arc); + } + arc_vector.erase(std::remove(arc_vector.begin(), arc_vector.end(), INVALID), arc_vector.end()); + } + } else { + arc_vector.resize(supply_nodes.size()); + // Find the min. cost outgoing arc for each supply node +#pragma omp parallel for + for (int i = 0; i < int(supply_nodes.size()); ++i) { + Node u = supply_nodes[i]; + Cost min_cost = std::numeric_limits::max(); + Arc min_arc = INVALID; + Arc a; _graph.firstOut(a, u); + for (; a != INVALID; _graph.nextOut(a)) { + Cost c = _cost[getArcID(a)]; + if (c < min_cost) { + min_cost = c; + min_arc = a; + } + } + arc_vector[i] = getArcID(min_arc); + } + arc_vector.erase(std::remove(arc_vector.begin(), arc_vector.end(), INVALID), arc_vector.end()); + } + + // Perform heuristic initial pivots + for (ArcsType i = 0; i != ArcsType(arc_vector.size()); ++i) { + in_arc = arc_vector[i]; + if (_state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] - + _pi[_target[in_arc]]) >= 0) continue; + findJoinNode(); + bool change = findLeavingArc(); + if (delta >= MAX) return false; + changeFlow(change); + if (change) { + updateTreeStructure(); + updatePotential(); + } + } + return true; + } + + // Execute the algorithm + ProblemType start() { + return start(); + } + + template + ProblemType start() { + PivotRuleImpl pivot(*this); + ProblemType retVal = OPTIMAL; + + // Perform heuristic initial pivots + if (!initialPivots()) return UNBOUNDED; + + size_t iter_number = 0; + // Execute the Network Simplex algorithm + while (pivot.findEnteringArc()) { + if ((++iter_number <= max_iter&&max_iter > 0) || max_iter<=0) { +#if DEBUG_LVL>0 + if(iter_number>MAX_DEBUG_ITER) + break; + if(iter_number%1000==0||iter_number%1000==1){ + Cost curCost=totalCost(); + Value sumFlow=0; + Cost a; + a= (fabs(_pi[_source[in_arc]])>=fabs(_pi[_target[in_arc]])) ? fabs(_pi[_source[in_arc]]) : fabs(_pi[_target[in_arc]]); + a=a>=fabs(_cost[in_arc])?a:fabs(_cost[in_arc]); + for (int i=0; i<_flow.size(); i++) { + sumFlow+=_state[i]*_flow[i]; + } + std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << iter_number << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n"; + std::cout << "Arc in = (" << _node_id(_source[in_arc]) << ", " << _node_id(_target[in_arc]) <<")\n"; + std::cout << "Supplies = (" << _supply[_source[in_arc]] << ", " << _supply[_target[in_arc]] << ")\n"; + std::cout << _cost[in_arc] << "\n"; + std::cout << _pi[_source[in_arc]] << "\n"; + std::cout << _pi[_target[in_arc]] << "\n"; + std::cout << a << "\n"; + } +#endif + + findJoinNode(); + bool change = findLeavingArc(); + if (delta >= MAX) return UNBOUNDED; + changeFlow(change); + if (change) { + updateTreeStructure(); + updatePotential(); + } + +#if DEBUG_LVL>0 + else{ + std::cout << "No change\n"; + } +#endif + +#if DEBUG_LVL>1 + std::cout << "Arc in = (" << _source[in_arc] << ", " << _target[in_arc] << ")\n"; +#endif + + + } else { + char errMess[1000]; + sprintf( errMess, "RESULT MIGHT BE INACURATE\nMax number of iteration reached, currently \%d. Sometimes iterations go on in cycle even though the solution has been reached, to check if it's the case here have a look at the minimal reduced cost. If it is very close to machine precision, you might actually have the correct solution, if not try setting the maximum number of iterations a bit higher\n",iter_number ); + std::cerr << errMess; + retVal = MAX_ITER_REACHED; + break; + } + + } + + + +#if DEBUG_LVL>0 + Cost curCost=totalCost(); + Value sumFlow=0; + Cost a; + a= (fabs(_pi[_source[in_arc]])>=fabs(_pi[_target[in_arc]])) ? fabs(_pi[_source[in_arc]]) : fabs(_pi[_target[in_arc]]); + a=a>=fabs(_cost[in_arc])?a:fabs(_cost[in_arc]); + for (int i=0; i<_flow.size(); i++) { + sumFlow+=_state[i]*_flow[i]; + } + + std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << niter << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n"; + + std::cout << "Arc in = (" << _node_id(_source[in_arc]) << ", " << _node_id(_target[in_arc]) <<")\n"; + std::cout << "Supplies = (" << _supply[_source[in_arc]] << ", " << _supply[_target[in_arc]] << ")\n"; + +#endif + + + +#if DEBUG_LVL>1 + sumFlow=0; + for (int i=0; i<_flow.size(); i++) { + sumFlow+=_state[i]*_flow[i]; + if (_state[i]==STATE_TREE) { + std::cout << "Non zero value at (" << _node_num+1-_source[i] << ", " << _node_num+1-_target[i] << ")\n"; + } + } + std::cout << "Sum of the flow " << sumFlow << "\n"<< niter <<" iterations, current cost=" << totalCost() << "\n"; +#endif + + + + //Check feasibility + if(retVal == OPTIMAL){ + for (ArcsType e = _search_arc_num; e != _all_arc_num; ++e) { + if (_flow[e] != 0){ + if (fabs(_flow[e]) > _EPSILON) // change of the original code following issue #126 + return INFEASIBLE; + else + _flow[e]=0; + } + } + } + + // Shift potentials to meet the requirements of the GEQ/LEQ type + // optimality conditions + if (_sum_supply == 0) { + if (_stype == GEQ) { + Cost max_pot = -std::numeric_limits::max(); + for (ArcsType i = 0; i != _node_num; ++i) { + if (_pi[i] > max_pot) max_pot = _pi[i]; + } + if (max_pot > 0) { + for (ArcsType i = 0; i != _node_num; ++i) + _pi[i] -= max_pot; + } + } else { + Cost min_pot = std::numeric_limits::max(); + for (ArcsType i = 0; i != _node_num; ++i) { + if (_pi[i] < min_pot) min_pot = _pi[i]; + } + if (min_pot < 0) { + for (ArcsType i = 0; i != _node_num; ++i) + _pi[i] -= min_pot; + } + } + } + + return retVal; + } + + }; //class NetworkSimplexSimple + + ///@} + +} //namespace lemon_omp diff --git a/ot/utils.py b/ot/utils.py index 4dac0c5..6a782e6 100644 --- a/ot/utils.py +++ b/ot/utils.py @@ -7,7 +7,6 @@ Various useful functions # # License: MIT License -import multiprocessing from functools import reduce import time @@ -311,38 +310,11 @@ def label_normalization(y, start=0): return y -def fun(f, q_in, q_out): - """ Utility function for parmap with no serializing problems """ - while True: - i, x = q_in.get() - if i is None: - break - q_out.put((i, f(x))) - - -def parmap(f, X, nprocs=multiprocessing.cpu_count()): - """ paralell map for multiprocessing (only map on windows)""" - - if not sys.platform.endswith('win32') and not sys.platform.endswith('darwin'): - - q_in = multiprocessing.Queue(1) - q_out = multiprocessing.Queue() - - proc = [multiprocessing.Process(target=fun, args=(f, q_in, q_out)) - for _ in range(nprocs)] - for p in proc: - p.daemon = True - p.start() - - sent = [q_in.put((i, x)) for i, x in enumerate(X)] - [q_in.put((None, None)) for _ in range(nprocs)] - res = [q_out.get() for _ in range(len(sent))] - - [p.join() for p in proc] - - return [x for i, x in sorted(res)] - else: - return list(map(f, X)) +def parmap(f, X, nprocs="default"): + """ paralell map for multiprocessing. + The function has been deprecated and only performs a regular map. + """ + return list(map(f, X)) def check_params(**kwargs): diff --git a/setup.py b/setup.py index 37a5824..ef95eaf 100644 --- a/setup.py +++ b/setup.py @@ -11,6 +11,8 @@ from setuptools.extension import Extension import numpy from Cython.Build import cythonize +sys.path.append(os.path.join("ot", "helpers")) +from openmp_helpers import check_openmp_support # dirty but working __version__ = re.search( @@ -30,7 +32,14 @@ if 'clean' in sys.argv[1:]: os.remove('ot/lp/emd_wrap.cpp') # add platform dependant optional compilation argument -compile_args = ["-O3"] +openmp_supported, flags = check_openmp_support() +compile_args = ["/O2" if sys.platform == "win32" else "-O3"] +link_args = [] + +if openmp_supported: + compile_args += flags + ["/DOMP" if sys.platform == 'win32' else "-DOMP"] + link_args += flags + if sys.platform.startswith('darwin'): compile_args.append("-stdlib=libc++") sdk_path = subprocess.check_output(['xcrun', '--show-sdk-path']) @@ -52,6 +61,7 @@ setup( language="c++", include_dirs=[numpy.get_include(), os.path.join(ROOT, 'ot/lp')], extra_compile_args=compile_args, + extra_link_args=link_args )), platforms=['linux', 'macosx', 'windows'], download_url='https://github.com/PythonOT/POT/archive/{}.tar.gz'.format(__version__), diff --git a/test/test_helpers.py b/test/test_helpers.py new file mode 100644 index 0000000..8bd0015 --- /dev/null +++ b/test/test_helpers.py @@ -0,0 +1,26 @@ +"""Tests for helpers functions """ + +# Author: Remi Flamary +# +# License: MIT License + +import os +import sys + +sys.path.append(os.path.join("ot", "helpers")) + +from openmp_helpers import get_openmp_flag, check_openmp_support # noqa +from pre_build_helpers import _get_compiler, compile_test_program # noqa + + +def test_helpers(): + + compiler = _get_compiler() + + get_openmp_flag(compiler) + + s = '#include \n#include \n\nint main(void) {\n\tprintf("Hello world!\\n");\n\treturn 0;\n}' + output, _ = compile_test_program(s) + assert len(output) == 1 and output[0] == "Hello world!" + + check_openmp_support() -- cgit v1.2.3