8 files changed, 2965 insertions, 0 deletions
diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h
new file mode 100644
index 0000000..f42e222
--- /dev/null
+++ b/ot/lp/EMD.h
@@ -0,0 +1,35 @@
+/* This file is a c++ wrapper function for computing the transportation cost
+ * between two vectors given a cost matrix.
+ *
+ * It was written by Antoine Rolet (2014) and mainly consists of a wrapper
+ * of the code written by Nicolas Bonneel available on this page
+ *          http://people.seas.harvard.edu/~nbonneel/FastTransport/
+ *
+ * It was then modified to make it more amenable to python inline calling
+ *
+ * Please give relevant credit to the original author (Nicolas Bonneel) if
+ * you use this code for a publication.
+ *
+ */
+
+
+#ifndef EMD_H
+#define EMD_H
+
+#include <iostream>
+#include <vector>
+#include "network_simplex_simple.h"
+
+using namespace lemon;
+typedef unsigned int node_id_type;
+
+enum ProblemType {
+    INFEASIBLE,
+    OPTIMAL,
+    UNBOUNDED,
+	MAX_ITER_REACHED
+};
+
+int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter);
+
+#endif
diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp
new file mode 100644
index 0000000..fc7ca63
--- /dev/null
+++ b/ot/lp/EMD_wrapper.cpp
@@ -0,0 +1,107 @@
+/* This file is a c++ wrapper function for computing the transportation cost
+ * between two vectors given a cost matrix.
+ *
+ * It was written by Antoine Rolet (2014) and mainly consists of a wrapper
+ * of the code written by Nicolas Bonneel available on this page
+ *          http://people.seas.harvard.edu/~nbonneel/FastTransport/
+ *
+ * It was then modified to make it more amenable to python inline calling
+ *
+ * Please give relevant credit to the original author (Nicolas Bonneel) if
+ * you use this code for a publication.
+ *
+ */
+
+#include "EMD.h"
+
+
+int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G,
+                double* alpha, double* beta, double *cost, int maxIter)  {
+// beware M and C anre strored in row major C style!!!
+    int n, m, i, cur;
+
+    typedef FullBipartiteDigraph Digraph;
+  DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
+
+  // Get the number of non zero coordinates for r and c
+    n=0;
+    for (int i=0; i<n1; i++) {
+        double val=*(X+i);
+        if (val>0) {
+            n++;
+        }else if(val<0){
+			return INFEASIBLE;
+		}
+    }
+    m=0;
+    for (int i=0; i<n2; i++) {
+        double val=*(Y+i);
+        if (val>0) {
+            m++;
+        }else if(val<0){
+			return INFEASIBLE;
+		}
+    }
+
+    // Define the graph
+
+    std::vector<int> indI(n), indJ(m);
+    std::vector<double> weights1(n), weights2(m);
+    Digraph di(n, m);
+    NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, maxIter);
+
+    // Set supply and demand, don't account for 0 values (faster)
+
+    cur=0;
+    for (int i=0; i<n1; i++) {
+        double val=*(X+i);
+        if (val>0) {
+            weights1[ cur ] = val;
+            indI[cur++]=i;
+        }
+    }
+
+    // Demand is actually negative supply...
+
+    cur=0;
+    for (int i=0; i<n2; i++) {
+        double val=*(Y+i);
+        if (val>0) {
+            weights2[ cur ] = -val;
+            indJ[cur++]=i;
+        }
+    }
+
+
+    net.supplyMap(&weights1[0], n, &weights2[0], m);
+
+    // Set the cost of each edge
+    for (int i=0; i<n; i++) {
+        for (int j=0; j<m; j++) {
+            double val=*(D+indI[i]*n2+indJ[j]);
+            net.setCost(di.arcFromId(i*m+j), val);
+        }
+    }
+
+
+    // Solve the problem with the network simplex algorithm
+
+    int ret=net.run();
+    if (ret==(int)net.OPTIMAL || ret==(int)net.MAX_ITER_REACHED) {
+        *cost = 0;
+        Arc a; di.first(a);
+        for (; a != INVALID; di.next(a)) {
+            int i = di.source(a);
+            int j = di.target(a);
+            double flow = net.flow(a);
+            *cost += flow * (*(D+indI[i]*n2+indJ[j-n]));
+            *(G+indI[i]*n2+indJ[j-n]) = flow;
+            *(alpha + indI[i]) = -net.potential(i);
+            *(beta + indJ[j-n]) = net.potential(j);
+        }
+
+    }
+
+
+    return ret;
+}
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
new file mode 100644
index 0000000..0c92810
--- /dev/null
+++ b/ot/lp/__init__.py
@@ -0,0 +1,618 @@
+# -*- coding: utf-8 -*-
+"""
+Solvers for the original linear program OT problem
+
+
+
+"""
+
+# Author: Remi Flamary <remi.flamary@unice.fr>
+#
+# License: MIT License
+
+import multiprocessing
+import sys
+import numpy as np
+from scipy.sparse import coo_matrix
+
+from .import cvx
+
+# import compiled emd
+from .emd_wrap import emd_c, check_result, emd_1d_sorted
+from ..utils import parmap
+from .cvx import barycenter
+from ..utils import dist
+
+__all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
+         'emd_1d', 'emd2_1d', 'wasserstein_1d']
+
+
+def emd(a, b, M, numItermax=100000, log=False):
+    r"""Solves the Earth Movers distance problem and returns the OT matrix
+
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F
+
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma\geq 0
+    where :
+
+    - M is the metric cost matrix
+    - a and b are the sample weights
+
+    .. warning::
+        Note that the M matrix needs to be a C-order numpy.array in float64 
+        format.
+
+    Uses the algorithm proposed in [1]_
+
+    Parameters
+    ----------
+    a : (ns,) numpy.ndarray, float64
+        Source histogram (uniform weight if empty list)
+    b : (nt,) numpy.ndarray, float64
+        Target histogram (uniform weight if empty list)
+    M : (ns,nt) numpy.ndarray, float64
+        Loss matrix (c-order array with type float64)
+    numItermax : int, optional (default=100000)
+        The maximum number of iterations before stopping the optimization
+        algorithm if it has not converged.
+    log: bool, optional (default=False)
+        If True, returns a dictionary containing the cost and dual
+        variables. Otherwise returns only the optimal transportation matrix.
+
+    Returns
+    -------
+    gamma: (ns x nt) numpy.ndarray
+        Optimal transportation matrix for the given parameters
+    log: dict
+        If input log is true, a dictionary containing the cost and dual
+        variables and exit status
+
+
+    Examples
+    --------
+
+    Simple example with obvious solution. The function emd accepts lists and
+    perform automatic conversion to numpy arrays
+
+    >>> import ot
+    >>> a=[.5,.5]
+    >>> b=[.5,.5]
+    >>> M=[[0.,1.],[1.,0.]]
+    >>> ot.emd(a,b,M)
+    array([[0.5, 0. ],
+           [0. , 0.5]])
+
+    References
+    ----------
+
+    .. [1] Bonneel, N., Van De Panne, M., Paris, S., & Heidrich, W.
+        (2011, December).  Displacement interpolation using Lagrangian mass
+        transport. In ACM Transactions on Graphics (TOG) (Vol. 30, No. 6, p.
+        158). ACM.
+
+    See Also
+    --------
+    ot.bregman.sinkhorn : Entropic regularized OT
+    ot.optim.cg : General regularized OT"""
+
+    a = np.asarray(a, dtype=np.float64)
+    b = np.asarray(b, dtype=np.float64)
+    M = np.asarray(M, dtype=np.float64)
+
+    # if empty array given then use uniform distributions
+    if len(a) == 0:
+        a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
+    if len(b) == 0:
+        b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
+
+    G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
+    result_code_string = check_result(result_code)
+    if log:
+        log = {}
+        log['cost'] = cost
+        log['u'] = u
+        log['v'] = v
+        log['warning'] = result_code_string
+        log['result_code'] = result_code
+        return G, log
+    return G
+
+
+def emd2(a, b, M, processes=multiprocessing.cpu_count(),
+         numItermax=100000, log=False, return_matrix=False):
+    r"""Solves the Earth Movers distance problem and returns the loss
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F
+
+        s.t. \gamma 1 = a
+             \gamma^T 1= b
+             \gamma\geq 0
+    where :
+
+    - M is the metric cost matrix
+    - a and b are the sample weights
+
+    .. warning::
+        Note that the M matrix needs to be a C-order numpy.array in float64 
+        format.
+
+    Uses the algorithm proposed in [1]_
+
+    Parameters
+    ----------
+    a : (ns,) numpy.ndarray, float64
+        Source histogram (uniform weight if empty list)
+    b : (nt,) numpy.ndarray, float64
+        Target histogram (uniform weight if empty list)
+    M : (ns,nt) numpy.ndarray, float64
+        Loss matrix (c-order array with type float64)
+    processes : int, optional (default=nb cpu)
+        Nb of processes used for multiple emd computation (not used on windows)
+    numItermax : int, optional (default=100000)
+        The maximum number of iterations before stopping the optimization
+        algorithm if it has not converged.
+    log: boolean, optional (default=False)
+        If True, returns a dictionary containing the cost and dual
+        variables. Otherwise returns only the optimal transportation cost.
+    return_matrix: boolean, optional (default=False)
+        If True, returns the optimal transportation matrix in the log.
+
+    Returns
+    -------
+    gamma: (ns x nt) ndarray
+        Optimal transportation matrix for the given parameters
+    log: dictnp
+        If input log is true, a dictionary containing the cost and dual
+        variables and exit status
+
+
+    Examples
+    --------
+
+    Simple example with obvious solution. The function emd accepts lists and
+    perform automatic conversion to numpy arrays
+
+
+    >>> import ot
+    >>> a=[.5,.5]
+    >>> b=[.5,.5]
+    >>> M=[[0.,1.],[1.,0.]]
+    >>> ot.emd2(a,b,M)
+    0.0
+
+    References
+    ----------
+
+    .. [1] Bonneel, N., Van De Panne, M., Paris, S., & Heidrich, W.
+        (2011, December).  Displacement interpolation using Lagrangian mass
+        transport. In ACM Transactions on Graphics (TOG) (Vol. 30, No. 6, p.
+        158). ACM.
+
+    See Also
+    --------
+    ot.bregman.sinkhorn : Entropic regularized OT
+    ot.optim.cg : General regularized OT"""
+
+    a = np.asarray(a, dtype=np.float64)
+    b = np.asarray(b, dtype=np.float64)
+    M = np.asarray(M, dtype=np.float64)
+
+    # problem with pikling Forks
+    if sys.platform.endswith('win32'):
+        processes=1
+
+    # if empty array given then use uniform distributions
+    if len(a) == 0:
+        a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
+    if len(b) == 0:
+        b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
+
+    if log or return_matrix:
+        def f(b):
+            G, cost, u, v, resultCode = emd_c(a, b, M, numItermax)
+            result_code_string = check_result(resultCode)
+            log = {}
+            if return_matrix:
+                log['G'] = G
+            log['u'] = u
+            log['v'] = v
+            log['warning'] = result_code_string
+            log['result_code'] = resultCode
+            return [cost, log]
+    else:
+        def f(b):
+            G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
+            check_result(result_code)
+            return cost
+
+    if len(b.shape) == 1:
+        return f(b)
+    nb = b.shape[1]
+
+    if processes>1:
+        res = parmap(f, [b[:, i] for i in range(nb)], processes)
+    else:
+        res = list(map(f, [b[:, i].copy() for i in range(nb)]))
+
+    return res
+
+
+
+def free_support_barycenter(measures_locations, measures_weights, X_init, b=None, weights=None, numItermax=100, stopThr=1e-7, verbose=False, log=None):
+    """
+    Solves the free support (locations of the barycenters are optimized, not the weights) Wasserstein barycenter problem (i.e. the weighted Frechet mean for the 2-Wasserstein distance)
+
+    The function solves the Wasserstein barycenter problem when the barycenter measure is constrained to be supported on k atoms.
+    This problem is considered in [1] (Algorithm 2). There are two differences with the following codes:
+    - we do not optimize over the weights
+    - we do not do line search for the locations updates, we use i.e. theta = 1 in [1] (Algorithm 2). This can be seen as a discrete implementation of the fixed-point algorithm of [2] proposed in the continuous setting.
+
+    Parameters
+    ----------
+    measures_locations : list of (k_i,d) numpy.ndarray
+        The discrete support of a measure supported on k_i locations of a d-dimensional space (k_i can be different for each element of the list)
+    measures_weights : list of (k_i,) numpy.ndarray
+        Numpy arrays where each numpy array has k_i non-negatives values summing to one representing the weights of each discrete input measure
+
+    X_init : (k,d) np.ndarray
+        Initialization of the support locations (on k atoms) of the barycenter
+    b : (k,) np.ndarray
+        Initialization of the weights of the barycenter (non-negatives, sum to 1)
+    weights : (k,) np.ndarray
+        Initialization of the coefficients of the barycenter (non-negatives, sum to 1)
+
+    numItermax : int, optional
+        Max number of iterations
+    stopThr : float, optional
+        Stop threshold on error (>0)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+
+    Returns
+    -------
+    X : (k,d) np.ndarray
+        Support locations (on k atoms) of the barycenter
+
+    References
+    ----------
+
+    .. [1] Cuturi, Marco, and Arnaud Doucet. "Fast computation of Wasserstein barycenters." International Conference on Machine Learning. 2014.
+
+    .. [2]  Álvarez-Esteban, Pedro C., et al. "A fixed-point approach to barycenters in Wasserstein space." Journal of Mathematical Analysis and Applications 441.2 (2016): 744-762.
+
+    """
+
+    iter_count = 0
+
+    N = len(measures_locations)
+    k = X_init.shape[0]
+    d = X_init.shape[1]
+    if b is None:
+        b = np.ones((k,))/k
+    if weights is None:
+        weights = np.ones((N,)) / N
+
+    X = X_init
+
+    log_dict = {}
+    displacement_square_norms = []
+
+    displacement_square_norm = stopThr + 1.
+
+    while ( displacement_square_norm > stopThr and iter_count < numItermax ):
+
+        T_sum = np.zeros((k, d))
+
+        for (measure_locations_i, measure_weights_i, weight_i) in zip(measures_locations, measures_weights, weights.tolist()):
+
+            M_i = dist(X, measure_locations_i)
+            T_i = emd(b, measure_weights_i, M_i)
+            T_sum = T_sum + weight_i * np.reshape(1. / b, (-1, 1)) * np.matmul(T_i, measure_locations_i)
+
+        displacement_square_norm = np.sum(np.square(T_sum-X))
+        if log:
+            displacement_square_norms.append(displacement_square_norm)
+
+        X = T_sum
+
+        if verbose:
+            print('iteration %d, displacement_square_norm=%f\n', iter_count, displacement_square_norm)
+
+        iter_count += 1
+
+    if log:
+        log_dict['displacement_square_norms'] = displacement_square_norms
+        return X, log_dict
+    else:
+        return X
+
+
+def emd_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
+           log=False):
+    r"""Solves the Earth Movers distance problem between 1d measures and returns
+    the OT matrix
+
+
+    .. math::
+        \gamma = arg\min_\gamma \sum_i \sum_j \gamma_{ij} d(x_a[i], x_b[j])
+
+        s.t. \gamma 1 = a,
+             \gamma^T 1= b,
+             \gamma\geq 0
+    where :
+
+    - d is the metric
+    - x_a and x_b are the samples
+    - a and b are the sample weights
+
+    When 'minkowski' is used as a metric, :math:`d(x, y) = |x - y|^p`.
+
+    Uses the algorithm detailed in [1]_
+
+    Parameters
+    ----------
+    x_a : (ns,) or (ns, 1) ndarray, float64
+        Source dirac locations (on the real line)
+    x_b : (nt,) or (ns, 1) ndarray, float64
+        Target dirac locations (on the real line)
+    a : (ns,) ndarray, float64, optional
+        Source histogram (default is uniform weight)
+    b : (nt,) ndarray, float64, optional
+        Target histogram (default is uniform weight)
+    metric: str, optional (default='sqeuclidean')
+        Metric to be used. Only strings listed in :func:`ot.dist` are accepted.
+        Due to implementation details, this function runs faster when
+        `'sqeuclidean'`, `'cityblock'`,  or `'euclidean'` metrics are used.
+    p: float, optional (default=1.0)
+         The p-norm to apply for if metric='minkowski'
+    dense: boolean, optional (default=True)
+        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
+        Otherwise returns a sparse representation using scipy's `coo_matrix`
+        format. Due to implementation details, this function runs faster when
+        `'sqeuclidean'`, `'minkowski'`, `'cityblock'`,  or `'euclidean'` metrics
+        are used.
+    log: boolean, optional (default=False)
+        If True, returns a dictionary containing the cost.
+        Otherwise returns only the optimal transportation matrix.
+
+    Returns
+    -------
+    gamma: (ns, nt) ndarray
+        Optimal transportation matrix for the given parameters
+    log: dict
+        If input log is True, a dictionary containing the cost
+
+
+    Examples
+    --------
+
+    Simple example with obvious solution. The function emd_1d accepts lists and
+    performs automatic conversion to numpy arrays
+
+    >>> import ot
+    >>> a=[.5, .5]
+    >>> b=[.5, .5]
+    >>> x_a = [2., 0.]
+    >>> x_b = [0., 3.]
+    >>> ot.emd_1d(x_a, x_b, a, b)
+    array([[0. , 0.5],
+           [0.5, 0. ]])
+    >>> ot.emd_1d(x_a, x_b)
+    array([[0. , 0.5],
+           [0.5, 0. ]])
+
+    References
+    ----------
+
+    .. [1]  Peyré, G., & Cuturi, M. (2017). "Computational Optimal
+        Transport", 2018.
+
+    See Also
+    --------
+    ot.lp.emd : EMD for multidimensional distributions
+    ot.lp.emd2_1d : EMD for 1d distributions (returns cost instead of the
+        transportation matrix)
+    """
+    a = np.asarray(a, dtype=np.float64)
+    b = np.asarray(b, dtype=np.float64)
+    x_a = np.asarray(x_a, dtype=np.float64)
+    x_b = np.asarray(x_b, dtype=np.float64)
+
+    assert (x_a.ndim == 1 or x_a.ndim == 2 and x_a.shape[1] == 1), \
+        "emd_1d should only be used with monodimensional data"
+    assert (x_b.ndim == 1 or x_b.ndim == 2 and x_b.shape[1] == 1), \
+        "emd_1d should only be used with monodimensional data"
+
+    # if empty array given then use uniform distributions
+    if a.ndim == 0 or len(a) == 0:
+        a = np.ones((x_a.shape[0],), dtype=np.float64) / x_a.shape[0]
+    if b.ndim == 0 or len(b) == 0:
+        b = np.ones((x_b.shape[0],), dtype=np.float64) / x_b.shape[0]
+
+    x_a_1d = x_a.reshape((-1, ))
+    x_b_1d = x_b.reshape((-1, ))
+    perm_a = np.argsort(x_a_1d)
+    perm_b = np.argsort(x_b_1d)
+
+    G_sorted, indices, cost = emd_1d_sorted(a, b,
+                                            x_a_1d[perm_a], x_b_1d[perm_b],
+                                            metric=metric, p=p)
+    G = coo_matrix((G_sorted, (perm_a[indices[:, 0]], perm_b[indices[:, 1]])),
+                   shape=(a.shape[0], b.shape[0]))
+    if dense:
+        G = G.toarray()
+    if log:
+        log = {'cost': cost}
+        return G, log
+    return G
+
+
+def emd2_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
+            log=False):
+    r"""Solves the Earth Movers distance problem between 1d measures and returns
+    the loss
+
+
+    .. math::
+        \gamma = arg\min_\gamma \sum_i \sum_j \gamma_{ij} d(x_a[i], x_b[j])
+
+        s.t. \gamma 1 = a,
+             \gamma^T 1= b,
+             \gamma\geq 0
+    where :
+
+    - d is the metric
+    - x_a and x_b are the samples
+    - a and b are the sample weights
+
+    When 'minkowski' is used as a metric, :math:`d(x, y) = |x - y|^p`.
+
+    Uses the algorithm detailed in [1]_
+
+    Parameters
+    ----------
+    x_a : (ns,) or (ns, 1) ndarray, float64
+        Source dirac locations (on the real line)
+    x_b : (nt,) or (ns, 1) ndarray, float64
+        Target dirac locations (on the real line)
+    a : (ns,) ndarray, float64, optional
+        Source histogram (default is uniform weight)
+    b : (nt,) ndarray, float64, optional
+        Target histogram (default is uniform weight)
+    metric: str, optional (default='sqeuclidean')
+        Metric to be used. Only strings listed in :func:`ot.dist` are accepted.
+        Due to implementation details, this function runs faster when
+        `'sqeuclidean'`, `'minkowski'`, `'cityblock'`,  or `'euclidean'` metrics
+        are used.
+    p: float, optional (default=1.0)
+         The p-norm to apply for if metric='minkowski'
+    dense: boolean, optional (default=True)
+        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
+        Otherwise returns a sparse representation using scipy's `coo_matrix`
+        format. Only used if log is set to True. Due to implementation details,
+        this function runs faster when dense is set to False.
+    log: boolean, optional (default=False)
+        If True, returns a dictionary containing the transportation matrix.
+        Otherwise returns only the loss.
+
+    Returns
+    -------
+    loss: float
+        Cost associated to the optimal transportation
+    log: dict
+        If input log is True, a dictionary containing the Optimal transportation
+        matrix for the given parameters
+
+
+    Examples
+    --------
+
+    Simple example with obvious solution. The function emd2_1d accepts lists and
+    performs automatic conversion to numpy arrays
+
+    >>> import ot
+    >>> a=[.5, .5]
+    >>> b=[.5, .5]
+    >>> x_a = [2., 0.]
+    >>> x_b = [0., 3.]
+    >>> ot.emd2_1d(x_a, x_b, a, b)
+    0.5
+    >>> ot.emd2_1d(x_a, x_b)
+    0.5
+
+    References
+    ----------
+
+    .. [1]  Peyré, G., & Cuturi, M. (2017). "Computational Optimal
+        Transport", 2018.
+
+    See Also
+    --------
+    ot.lp.emd2 : EMD for multidimensional distributions
+    ot.lp.emd_1d : EMD for 1d distributions (returns the transportation matrix
+        instead of the cost)
+    """
+    # If we do not return G (log==False), then we should not to cast it to dense
+    # (useless overhead)
+    G, log_emd = emd_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric=metric, p=p,
+                        dense=dense and log, log=True)
+    cost = log_emd['cost']
+    if log:
+        log_emd = {'G': G}
+        return cost, log_emd
+    return cost
+
+
+def wasserstein_1d(x_a, x_b, a=None, b=None, p=1.):
+    r"""Solves the p-Wasserstein distance problem between 1d measures and returns
+    the distance
+
+    .. math::
+        \min_\gamma \left( \sum_i \sum_j \gamma_{ij} \|x_a[i] - x_b[j]\|^p \right)^{1/p}
+
+        s.t. \gamma 1 = a,
+             \gamma^T 1= b,
+             \gamma\geq 0
+
+    where :
+
+    - x_a and x_b are the samples
+    - a and b are the sample weights
+
+    Uses the algorithm detailed in [1]_
+
+    Parameters
+    ----------
+    x_a : (ns,) or (ns, 1) ndarray, float64
+        Source dirac locations (on the real line)
+    x_b : (nt,) or (ns, 1) ndarray, float64
+        Target dirac locations (on the real line)
+    a : (ns,) ndarray, float64, optional
+        Source histogram (default is uniform weight)
+    b : (nt,) ndarray, float64, optional
+        Target histogram (default is uniform weight)
+    p: float, optional (default=1.0)
+         The order of the p-Wasserstein distance to be computed
+
+    Returns
+    -------
+    dist: float
+        p-Wasserstein distance
+
+
+    Examples
+    --------
+
+    Simple example with obvious solution. The function wasserstein_1d accepts
+    lists and performs automatic conversion to numpy arrays
+
+    >>> import ot
+    >>> a=[.5, .5]
+    >>> b=[.5, .5]
+    >>> x_a = [2., 0.]
+    >>> x_b = [0., 3.]
+    >>> ot.wasserstein_1d(x_a, x_b, a, b)
+    0.5
+    >>> ot.wasserstein_1d(x_a, x_b)
+    0.5
+
+    References
+    ----------
+
+    .. [1]  Peyré, G., & Cuturi, M. (2017). "Computational Optimal
+        Transport", 2018.
+
+    See Also
+    --------
+    ot.lp.emd_1d : EMD for 1d distributions
+    """
+    cost_emd = emd2_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
+                       dense=False, log=False)
+    return np.power(cost_emd, 1. / p)
diff --git a/ot/lp/core.h b/ot/lp/core.h
new file mode 100644
index 0000000..04dddf7
--- /dev/null
+++ b/ot/lp/core.h
@@ -0,0 +1,103 @@
+/* -*- mode: C++; indent-tabs-mode: nil; -*-
+ *
+ * This file has been adapted by Nicolas Bonneel (2013), 
+ * from full_graph.h from LEMON, a generic C++ optimization library,
+ * to make the other files independant from the rest of 
+ * the original library.
+ * 
+ *
+ **** Original file Copyright Notice :
+ * Copyright (C) 2003-2010
+ * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport
+ * (Egervary Research Group on Combinatorial Optimization, EGRES).
+ *
+ * Permission to use, modify and distribute this software is granted
+ * provided that this copyright notice appears in all copies. For
+ * precise terms see the accompanying LICENSE file.
+ *
+ * This software is provided "AS IS" with no warranty of any kind,
+ * express or implied, and with no claim as to its suitability for any
+ * purpose.
+ *
+ */
+
+#ifndef LEMON_CORE_H
+#define LEMON_CORE_H
+
+#include <vector>
+#include <algorithm>
+
+
+// Disable the following warnings when compiling with MSVC:
+// C4250: 'class1' : inherits 'class2::member' via dominance
+// C4355: 'this' : used in base member initializer list
+// C4503: 'function' : decorated name length exceeded, name was truncated
+// C4800: 'type' : forcing value to bool 'true' or 'false' (performance warning)
+// C4996: 'function': was declared deprecated
+#ifdef _MSC_VER
+#pragma warning( disable : 4250 4355 4503 4800 4996 )
+#endif
+
+///\file
+///\brief LEMON core utilities.
+///
+///This header file contains core utilities for LEMON.
+///It is automatically included by all graph types, therefore it usually
+///do not have to be included directly.
+
+namespace lemon {
+
+  /// \brief Dummy type to make it easier to create invalid iterators.
+  ///
+  /// Dummy type to make it easier to create invalid iterators.
+  /// See \ref INVALID for the usage.
+  struct Invalid {
+  public:
+    bool operator==(Invalid) { return true;  }
+    bool operator!=(Invalid) { return false; }
+    bool operator< (Invalid) { return false; }
+  };
+
+  /// \brief Invalid iterators.
+  ///
+  /// \ref Invalid is a global type that converts to each iterator
+  /// in such a way that the value of the target iterator will be invalid.
+#ifdef LEMON_ONLY_TEMPLATES
+  const Invalid INVALID = Invalid();
+#else
+  extern const Invalid INVALID;
+#endif
+
+  /// \addtogroup gutils
+  /// @{
+
+  ///Create convenience typedefs for the digraph types and iterators
+
+  ///This \c \#define creates convenient type definitions for the following
+  ///types of \c Digraph: \c Node,  \c NodeIt, \c Arc, \c ArcIt, \c InArcIt,
+  ///\c OutArcIt, \c BoolNodeMap, \c IntNodeMap, \c DoubleNodeMap,
+  ///\c BoolArcMap, \c IntArcMap, \c DoubleArcMap.
+  ///
+  ///\note If the graph type is a dependent type, ie. the graph type depend
+  ///on a template parameter, then use \c TEMPLATE_DIGRAPH_TYPEDEFS()
+  ///macro.
+#define DIGRAPH_TYPEDEFS(Digraph)                                       \
+  typedef Digraph::Node Node;                                           \
+  typedef Digraph::Arc Arc;                                             \
+
+
+  ///Create convenience typedefs for the digraph types and iterators
+
+  ///\see DIGRAPH_TYPEDEFS
+  ///
+  ///\note Use this macro, if the graph type is a dependent type,
+  ///ie. the graph type depend on a template parameter.
+#define TEMPLATE_DIGRAPH_TYPEDEFS(Digraph)                              \
+  typedef typename Digraph::Node Node;                                  \
+  typedef typename Digraph::Arc Arc;                                    \
+
+ 
+
+} //namespace lemon
+
+#endif
diff --git a/ot/lp/cvx.py b/ot/lp/cvx.py
new file mode 100644
index 0000000..8e763be
--- /dev/null
+++ b/ot/lp/cvx.py
@@ -0,0 +1,147 @@
+# -*- coding: utf-8 -*-
+"""
+LP solvers for optimal transport using cvxopt
+"""
+
+# Author: Remi Flamary <remi.flamary@unice.fr>
+#
+# License: MIT License
+
+import numpy as np
+import scipy as sp
+import scipy.sparse as sps
+
+
+try:
+    import cvxopt
+    from cvxopt import solvers, matrix, spmatrix
+except ImportError:
+    cvxopt = False
+
+
+def scipy_sparse_to_spmatrix(A):
+    """Efficient conversion from scipy sparse matrix to cvxopt sparse matrix"""
+    coo = A.tocoo()
+    SP = spmatrix(coo.data.tolist(), coo.row.tolist(), coo.col.tolist(), size=A.shape)
+    return SP
+
+
+def barycenter(A, M, weights=None, verbose=False, log=False, solver='interior-point'):
+    """Compute the Wasserstein barycenter of distributions A
+
+     The function solves the following optimization problem [16]:
+
+    .. math::
+       \mathbf{a} = arg\min_\mathbf{a} \sum_i W_{1}(\mathbf{a},\mathbf{a}_i)
+
+    where :
+
+    - :math:`W_1(\cdot,\cdot)` is the Wasserstein distance (see ot.emd.sinkhorn)
+    - :math:`\mathbf{a}_i` are training distributions in the columns of matrix :math:`\mathbf{A}`
+
+    The linear program is solved using the interior point solver from scipy.optimize.
+    If cvxopt solver if installed it can use cvxopt
+
+    Note that this problem do not scale well (both in memory and computational time).
+
+    Parameters
+    ----------
+    A : np.ndarray (d,n)
+        n training distributions a_i of size d
+    M : np.ndarray (d,d)
+        loss matrix   for OT
+    reg : float
+        Regularization term >0
+    weights : np.ndarray (n,)
+        Weights of each histogram a_i on the simplex (barycentric coodinates)
+    verbose : bool, optional
+        Print information along iterations
+    log : bool, optional
+        record log if True
+    solver : string, optional
+        the solver used, default 'interior-point' use the lp solver from
+        scipy.optimize. None, or 'glpk' or 'mosek' use the solver from cvxopt.
+
+    Returns
+    -------
+    a : (d,) ndarray
+        Wasserstein barycenter
+    log : dict
+        log dictionary return only if log==True in parameters
+
+
+    References
+    ----------
+
+    .. [16] Agueh, M., & Carlier, G. (2011). Barycenters in the Wasserstein space. SIAM Journal on Mathematical Analysis, 43(2), 904-924.
+
+
+
+    """
+
+    if weights is None:
+        weights = np.ones(A.shape[1]) / A.shape[1]
+    else:
+        assert(len(weights) == A.shape[1])
+
+    n_distributions = A.shape[1]
+    n = A.shape[0]
+
+    n2 = n * n
+    c = np.zeros((0))
+    b_eq1 = np.zeros((0))
+    for i in range(n_distributions):
+        c = np.concatenate((c, M.ravel() * weights[i]))
+        b_eq1 = np.concatenate((b_eq1, A[:, i]))
+    c = np.concatenate((c, np.zeros(n)))
+
+    lst_idiag1 = [sps.kron(sps.eye(n), np.ones((1, n))) for i in range(n_distributions)]
+    #  row constraints
+    A_eq1 = sps.hstack((sps.block_diag(lst_idiag1), sps.coo_matrix((n_distributions * n, n))))
+
+    # columns constraints
+    lst_idiag2 = []
+    lst_eye = []
+    for i in range(n_distributions):
+        if i == 0:
+            lst_idiag2.append(sps.kron(np.ones((1, n)), sps.eye(n)))
+            lst_eye.append(-sps.eye(n))
+        else:
+            lst_idiag2.append(sps.kron(np.ones((1, n)), sps.eye(n - 1, n)))
+            lst_eye.append(-sps.eye(n - 1, n))
+
+    A_eq2 = sps.hstack((sps.block_diag(lst_idiag2), sps.vstack(lst_eye)))
+    b_eq2 = np.zeros((A_eq2.shape[0]))
+
+    # full problem
+    A_eq = sps.vstack((A_eq1, A_eq2))
+    b_eq = np.concatenate((b_eq1, b_eq2))
+
+    if not cvxopt or solver in ['interior-point']:
+        # cvxopt not installed or interior point
+
+        if solver is None:
+            solver = 'interior-point'
+
+        options = {'sparse': True, 'disp': verbose}
+        sol = sp.optimize.linprog(c, A_eq=A_eq, b_eq=b_eq, method=solver,
+                                  options=options)
+        x = sol.x
+        b = x[-n:]
+
+    else:
+
+        h = np.zeros((n_distributions * n2 + n))
+        G = -sps.eye(n_distributions * n2 + n)
+
+        sol = solvers.lp(matrix(c), scipy_sparse_to_spmatrix(G), matrix(h),
+                         A=scipy_sparse_to_spmatrix(A_eq), b=matrix(b_eq),
+                         solver=solver)
+
+        x = np.array(sol['x'])
+        b = x[-n:].ravel()
+
+    if log:
+        return b, sol
+    else:
+        return b
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
new file mode 100644
index 0000000..2b6c495
--- /dev/null
+++ b/ot/lp/emd_wrap.pyx
@@ -0,0 +1,187 @@
+# -*- coding: utf-8 -*-
+"""
+Cython linker with C solver
+"""
+
+# Author: Remi Flamary <remi.flamary@unice.fr>
+#
+# License: MIT License
+
+import numpy as np
+cimport numpy as np
+
+from ..utils import dist
+
+cimport cython
+cimport libc.math as math
+
+import warnings
+
+
+cdef extern from "EMD.h":
+    int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter)
+    cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED
+
+
+def check_result(result_code):
+    if result_code == OPTIMAL:
+        return None
+
+    if result_code == INFEASIBLE:
+        message = "Problem infeasible. Check that a and b are in the simplex"
+    elif result_code == UNBOUNDED:
+        message = "Problem unbounded"
+    elif result_code == MAX_ITER_REACHED:
+        message = "numItermax reached before optimality. Try to increase numItermax."
+    warnings.warn(message)
+    return message
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"]  b, np.ndarray[double, ndim=2, mode="c"]  M, int max_iter):
+    """
+        Solves the Earth Movers distance problem and returns the optimal transport matrix
+
+        gamm=emd(a,b,M)
+
+    .. math::
+        \gamma = arg\min_\gamma <\gamma,M>_F
+
+        s.t. \gamma 1 = a
+
+             \gamma^T 1= b
+
+             \gamma\geq 0
+    where :
+
+    - M is the metric cost matrix
+    - a and b are the sample weights
+
+    .. warning::
+        Note that the M matrix needs to be a C-order :py.cls:`numpy.array`
+
+    Parameters
+    ----------
+    a : (ns,) numpy.ndarray, float64
+        source histogram
+    b : (nt,) numpy.ndarray, float64
+        target histogram
+    M : (ns,nt) numpy.ndarray, float64
+        loss matrix
+    max_iter : int
+        The maximum number of iterations before stopping the optimization
+        algorithm if it has not converged.
+
+
+    Returns
+    -------
+    gamma: (ns x nt) numpy.ndarray
+        Optimal transportation matrix for the given parameters
+
+    """
+    cdef int n1= M.shape[0]
+    cdef int n2= M.shape[1]
+
+    cdef double cost=0
+    cdef np.ndarray[double, ndim=2, mode="c"] G=np.zeros([n1, n2])
+    cdef np.ndarray[double, ndim=1, mode="c"] alpha=np.zeros(n1)
+    cdef np.ndarray[double, ndim=1, mode="c"] beta=np.zeros(n2)
+
+
+    if not len(a):
+        a=np.ones((n1,))/n1
+
+    if not len(b):
+        b=np.ones((n2,))/n2
+
+    # calling the function
+    cdef int result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
+
+    return G, cost, alpha, beta, result_code
+
+
+@cython.boundscheck(False)
+@cython.wraparound(False)
+def emd_1d_sorted(np.ndarray[double, ndim=1, mode="c"] u_weights,
+                  np.ndarray[double, ndim=1, mode="c"] v_weights,
+                  np.ndarray[double, ndim=1, mode="c"] u,
+                  np.ndarray[double, ndim=1, mode="c"] v,
+                  str metric='sqeuclidean',
+                  double p=1.):
+    r"""
+    Solves the Earth Movers distance problem between sorted 1d measures and
+    returns the OT matrix and the associated cost
+
+    Parameters
+    ----------
+    u_weights : (ns,) ndarray, float64
+        Source histogram
+    v_weights : (nt,) ndarray, float64
+        Target histogram
+    u : (ns,) ndarray, float64
+        Source dirac locations (on the real line)
+    v : (nt,) ndarray, float64
+        Target dirac locations (on the real line)
+    metric: str, optional (default='sqeuclidean')
+        Metric to be used. Only strings listed in :func:`ot.dist` are accepted.
+        Due to implementation details, this function runs faster when
+        `'sqeuclidean'`, `'minkowski'`, `'cityblock'`,  or `'euclidean'` metrics
+        are used.
+    p: float, optional (default=1.0)
+         The p-norm to apply for if metric='minkowski'
+
+    Returns
+    -------
+    gamma: (n, ) ndarray, float64
+        Values in the Optimal transportation matrix
+    indices: (n, 2) ndarray, int64
+        Indices of the values stored in gamma for the Optimal transportation
+        matrix
+    cost
+        cost associated to the optimal transportation
+    """
+    cdef double cost = 0.
+    cdef int n = u_weights.shape[0]
+    cdef int m = v_weights.shape[0]
+
+    cdef int i = 0
+    cdef double w_i = u_weights[0]
+    cdef int j = 0
+    cdef double w_j = v_weights[0]
+
+    cdef double m_ij = 0.
+
+    cdef np.ndarray[double, ndim=1, mode="c"] G = np.zeros((n + m - 1, ),
+                                                           dtype=np.float64)
+    cdef np.ndarray[long, ndim=2, mode="c"] indices = np.zeros((n + m - 1, 2),
+                                                              dtype=np.int)
+    cdef int cur_idx = 0
+    while i < n and j < m:
+        if metric == 'sqeuclidean':
+            m_ij = (u[i] - v[j]) * (u[i] - v[j])
+        elif metric == 'cityblock' or metric == 'euclidean':
+            m_ij = math.fabs(u[i] - v[j])
+        elif metric == 'minkowski':
+            m_ij = math.pow(math.fabs(u[i] - v[j]), p)
+        else:
+            m_ij = dist(u[i].reshape((1, 1)), v[j].reshape((1, 1)),
+                        metric=metric)[0, 0]
+        if w_i < w_j or j == m - 1:
+            cost += m_ij * w_i
+            G[cur_idx] = w_i
+            indices[cur_idx, 0] = i
+            indices[cur_idx, 1] = j
+            i += 1
+            w_j -= w_i
+            w_i = u_weights[i]
+        else:
+            cost += m_ij * w_j
+            G[cur_idx] = w_j
+            indices[cur_idx, 0] = i
+            indices[cur_idx, 1] = j
+            j += 1
+            w_i -= w_j
+            w_j = v_weights[j]
+        cur_idx += 1
+    return G[:cur_idx], indices[:cur_idx], cost
diff --git a/ot/lp/full_bipartitegraph.h b/ot/lp/full_bipartitegraph.h
new file mode 100644
index 0000000..87a1bec
--- /dev/null
+++ b/ot/lp/full_bipartitegraph.h
@@ -0,0 +1,215 @@
+/* -*- mode: C++; indent-tabs-mode: nil; -*-
+ *
+ * This file has been adapted by Nicolas Bonneel (2013), 
+ * from full_graph.h from LEMON, a generic C++ optimization library,
+ * to implement a lightweight fully connected bipartite graph. A previous
+ * version of this file is used as part of the Displacement Interpolation 
+ * project, 
+ * Web: http://www.cs.ubc.ca/labs/imager/tr/2011/DisplacementInterpolation/
+ * 
+ *
+ **** Original file Copyright Notice :
+ * Copyright (C) 2003-2010
+ * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport
+ * (Egervary Research Group on Combinatorial Optimization, EGRES).
+ *
+ * Permission to use, modify and distribute this software is granted
+ * provided that this copyright notice appears in all copies. For
+ * precise terms see the accompanying LICENSE file.
+ *
+ * This software is provided "AS IS" with no warranty of any kind,
+ * express or implied, and with no claim as to its suitability for any
+ * purpose.
+ *
+ */
+
+#ifndef LEMON_FULL_BIPARTITE_GRAPH_H
+#define LEMON_FULL_BIPARTITE_GRAPH_H
+
+#include "core.h"
+
+///\ingroup graphs
+///\file
+///\brief FullBipartiteDigraph and FullBipartiteGraph classes.
+
+
+namespace lemon {
+
+
+  class FullBipartiteDigraphBase {
+  public:
+
+    typedef FullBipartiteDigraphBase Digraph;
+
+    //class Node;
+	typedef int Node;
+    //class Arc;
+	typedef long long Arc;
+
+  protected:
+
+    int _node_num;
+    long long _arc_num;
+	
+    FullBipartiteDigraphBase() {}
+
+    void construct(int n1, int n2) { _node_num = n1+n2; _arc_num = n1 * n2; _n1=n1; _n2=n2;}
+
+  public:
+
+	int _n1, _n2;
+
+
+    Node operator()(int ix) const { return Node(ix); }
+    static int index(const Node& node) { return node; }
+
+    Arc arc(const Node& s, const Node& t) const {
+		if (s<_n1 && t>=_n1)
+			return Arc(s * _n2 + (t-_n1) );
+		else
+			return Arc(-1);
+    }
+
+    int nodeNum() const { return _node_num; }
+    long long arcNum() const { return _arc_num; }
+
+    int maxNodeId() const { return _node_num - 1; }
+    long long maxArcId() const { return _arc_num - 1; }
+
+    Node source(Arc arc) const { return arc / _n2; }
+    Node target(Arc arc) const { return (arc % _n2) + _n1; }
+
+    static int id(Node node) { return node; }
+    static long long id(Arc arc) { return arc; }
+
+    static Node nodeFromId(int id) { return Node(id);}
+    static Arc arcFromId(int id) { return Arc(id);}
+
+
+    Arc findArc(Node s, Node t, Arc prev = -1) const {
+      return prev == -1 ? arc(s, t) : -1;
+    }
+
+    void first(Node& node) const {
+      node = _node_num - 1;
+    }
+
+    static void next(Node& node) {
+      --node;
+    }
+
+    void first(Arc& arc) const {
+      arc = _arc_num - 1;
+    }
+
+    static void next(Arc& arc) {
+      --arc;
+    }
+
+    void firstOut(Arc& arc, const Node& node) const {
+		if (node>=_n1)
+			arc = -1;
+		else
+			arc = (node + 1) * _n2 - 1;
+    }
+
+    void nextOut(Arc& arc) const {
+      if (arc % _n2 == 0) arc = 0;
+      --arc;
+    }
+
+    void firstIn(Arc& arc, const Node& node) const {
+		if (node<_n1)
+			arc = -1;
+		else
+			arc = _arc_num + node - _node_num;
+    }
+
+    void nextIn(Arc& arc) const {
+      arc -= _n2;
+      if (arc < 0) arc = -1;
+    }
+
+  };
+
+  /// \ingroup graphs
+  ///
+  /// \brief A directed full graph class.
+  ///
+  /// FullBipartiteDigraph is a simple and fast implmenetation of directed full
+  /// (complete) graphs. It contains an arc from each node to each node
+  /// (including a loop for each node), therefore the number of arcs
+  /// is the square of the number of nodes.
+  /// This class is completely static and it needs constant memory space.
+  /// Thus you can neither add nor delete nodes or arcs, however
+  /// the structure can be resized using resize().
+  ///
+  /// This type fully conforms to the \ref concepts::Digraph "Digraph concept".
+  /// Most of its member functions and nested classes are documented
+  /// only in the concept class.
+  ///
+  /// This class provides constant time counting for nodes and arcs.
+  ///
+  /// \note FullBipartiteDigraph and FullBipartiteGraph classes are very similar,
+  /// but there are two differences. While this class conforms only
+  /// to the \ref concepts::Digraph "Digraph" concept, FullBipartiteGraph
+  /// conforms to the \ref concepts::Graph "Graph" concept,
+  /// moreover FullBipartiteGraph does not contain a loop for each
+  /// node as this class does.
+  ///
+  /// \sa FullBipartiteGraph
+  class FullBipartiteDigraph : public FullBipartiteDigraphBase {
+    typedef FullBipartiteDigraphBase Parent;
+
+  public:
+
+    /// \brief Default constructor.
+    ///
+    /// Default constructor. The number of nodes and arcs will be zero.
+    FullBipartiteDigraph() { construct(0,0); }
+
+    /// \brief Constructor
+    ///
+    /// Constructor.
+    /// \param n The number of the nodes.
+    FullBipartiteDigraph(int n1, int n2) { construct(n1, n2); }
+
+
+    /// \brief Returns the node with the given index.
+    ///
+    /// Returns the node with the given index. Since this structure is
+    /// completely static, the nodes can be indexed with integers from
+    /// the range <tt>[0..nodeNum()-1]</tt>.
+    /// The index of a node is the same as its ID.
+    /// \sa index()
+    Node operator()(int ix) const { return Parent::operator()(ix); }
+
+    /// \brief Returns the index of the given node.
+    ///
+    /// Returns the index of the given node. Since this structure is
+    /// completely static, the nodes can be indexed with integers from
+    /// the range <tt>[0..nodeNum()-1]</tt>.
+    /// The index of a node is the same as its ID.
+    /// \sa operator()()
+    static int index(const Node& node) { return Parent::index(node); }
+
+    /// \brief Returns the arc connecting the given nodes.
+    ///
+    /// Returns the arc connecting the given nodes.
+    /*Arc arc(Node u, Node v) const {
+      return Parent::arc(u, v);
+    }*/
+
+    /// \brief Number of nodes.
+    int nodeNum() const { return Parent::nodeNum(); }
+    /// \brief Number of arcs.
+    long long arcNum() const { return Parent::arcNum(); }
+  };
+
+
+
+
+} //namespace lemon
+
+
+#endif //LEMON_FULL_GRAPH_H
diff --git a/ot/lp/network_simplex_simple.h b/ot/lp/network_simplex_simple.h
new file mode 100644
index 0000000..7c6a4ce
--- /dev/null
+++ b/ot/lp/network_simplex_simple.h
@@ -0,0 +1,1553 @@
+/* -*- mode: C++; indent-tabs-mode: nil; -*-
+ *
+ *
+ * This file has been adapted by Nicolas Bonneel (2013),
+ * from network_simplex.h from LEMON, a generic C++ optimization library,
+ * to implement a lightweight network simplex for mass transport, more
+ * memory efficient that the original file. A previous version of this file
+ * is used as part of the Displacement Interpolation project,
+ * Web: http://www.cs.ubc.ca/labs/imager/tr/2011/DisplacementInterpolation/
+ *
+ *
+ **** Original file Copyright Notice :
+ *
+ * Copyright (C) 2003-2010
+ * Egervary Jeno Kombinatorikus Optimalizalasi Kutatocsoport
+ * (Egervary Research Group on Combinatorial Optimization, EGRES).
+ *
+ * Permission to use, modify and distribute this software is granted
+ * provided that this copyright notice appears in all copies. For
+ * precise terms see the accompanying LICENSE file.
+ *
+ * This software is provided "AS IS" with no warranty of any kind,
+ * express or implied, and with no claim as to its suitability for any
+ * purpose.
+ *
+ */
+
+#ifndef LEMON_NETWORK_SIMPLEX_SIMPLE_H
+#define LEMON_NETWORK_SIMPLEX_SIMPLE_H
+#define DEBUG_LVL 0
+
+#if DEBUG_LVL>0
+#include <iomanip>
+#endif
+
+
+#define EPSILON 2.2204460492503131e-15
+#define _EPSILON 1e-8
+#define MAX_DEBUG_ITER 100000
+
+
+/// \ingroup min_cost_flow_algs
+///
+/// \file
+/// \brief Network Simplex algorithm for finding a minimum cost flow.
+
+// if your compiler has troubles with stdext or hashmaps, just comment the following line to use a slower std::map instead
+//#define HASHMAP
+
+#include <vector>
+#include <limits>
+#include <algorithm>
+#include <cstdio>
+#ifdef HASHMAP
+#include <hash_map>
+#else
+#include <map>
+#endif
+#include <cmath>
+//#include "core.h"
+//#include "lmath.h"
+
+//#include "sparse_array_n.h"
+#include "full_bipartitegraph.h"
+
+#define INVALIDNODE -1
+#define INVALID (-1)
+
+namespace lemon {
+
+
+    template <typename T>
+	class ProxyObject;
+
+	template<typename T>
+	class SparseValueVector
+	{
+	public:
+		SparseValueVector(int n=0)
+		{
+		}
+		void resize(int n=0){};
+		T operator[](const int id) const
+		{
+#ifdef HASHMAP
+			typename stdext::hash_map<int,T>::const_iterator it = data.find(id);
+#else
+			typename std::map<int,T>::const_iterator it = data.find(id);
+#endif
+			if (it==data.end())
+				return 0;
+			else
+				return it->second;
+		}
+
+		ProxyObject<T> operator[](const int id)
+		{
+			return ProxyObject<T>( this, id );
+		}
+
+        //private:
+#ifdef HASHMAP
+		stdext::hash_map<int,T> data;
+#else
+		std::map<int,T> data;
+#endif
+
+	};
+
+	template <typename T>
+	class ProxyObject {
+	public:
+		ProxyObject( SparseValueVector<T> *v, int idx ){_v=v; _idx=idx;};
+		ProxyObject<T> & operator=( const T &v ) {
+			// If we get here, we know that operator[] was called to perform a write access,
+			// so we can insert an item in the vector if needed
+			if (v!=0)
+				_v->data[_idx]=v;
+			return *this;
+		}
+
+		operator T() {
+			// If we get here, we know that operator[] was called to perform a read access,
+			// so we can simply return the existing object
+#ifdef HASHMAP
+			typename stdext::hash_map<int,T>::iterator it = _v->data.find(_idx);
+#else
+			typename std::map<int,T>::iterator it = _v->data.find(_idx);
+#endif
+			if (it==_v->data.end())
+				return 0;
+			else
+				return it->second;
+		}
+
+		void operator+=(T val)
+		{
+			if (val==0) return;
+#ifdef HASHMAP
+			typename stdext::hash_map<int,T>::iterator it = _v->data.find(_idx);
+#else
+			typename std::map<int,T>::iterator it = _v->data.find(_idx);
+#endif
+			if (it==_v->data.end())
+				_v->data[_idx] = val;
+			else
+			{
+				T sum = it->second + val;
+				if (sum==0)
+					_v->data.erase(it);
+				else
+					it->second = sum;
+			}
+		}
+		void operator-=(T val)
+		{
+			if (val==0) return;
+#ifdef HASHMAP
+			typename stdext::hash_map<int,T>::iterator it = _v->data.find(_idx);
+#else
+			typename std::map<int,T>::iterator it = _v->data.find(_idx);
+#endif
+			if (it==_v->data.end())
+				_v->data[_idx] = -val;
+			else
+			{
+				T sum = it->second - val;
+				if (sum==0)
+					_v->data.erase(it);
+				else
+					it->second = sum;
+			}
+		}
+
+		SparseValueVector<T> *_v;
+		int _idx;
+	};
+
+
+
+    /// \addtogroup min_cost_flow_algs
+    /// @{
+
+    /// \brief Implementation of the primal Network Simplex algorithm
+    /// for finding a \ref min_cost_flow "minimum cost flow".
+    ///
+    /// \ref NetworkSimplexSimple implements the primal Network Simplex algorithm
+    /// for finding a \ref min_cost_flow "minimum cost flow"
+    /// \ref amo93networkflows, \ref dantzig63linearprog,
+    /// \ref kellyoneill91netsimplex.
+    /// This algorithm is a highly efficient specialized version of the
+    /// linear programming simplex method directly for the minimum cost
+    /// flow problem.
+    ///
+    /// In general, %NetworkSimplexSimple is the fastest implementation available
+    /// in LEMON for this problem.
+    /// Moreover, it supports both directions of the supply/demand inequality
+    /// constraints. For more information, see \ref SupplyType.
+    ///
+    /// Most of the parameters of the problem (except for the digraph)
+    /// can be given using separate functions, and the algorithm can be
+    /// executed using the \ref run() function. If some parameters are not
+    /// specified, then default values will be used.
+    ///
+    /// \tparam GR The digraph type the algorithm runs on.
+    /// \tparam V The number type used for flow amounts, capacity bounds
+    /// and supply values in the algorithm. By default, it is \c int.
+    /// \tparam C The number type used for costs and potentials in the
+    /// algorithm. By default, it is the same as \c V.
+    ///
+    /// \warning Both number types must be signed and all input data must
+    /// be integer.
+    ///
+    /// \note %NetworkSimplexSimple provides five different pivot rule
+    /// implementations, from which the most efficient one is used
+    /// by default. For more information, see \ref PivotRule.
+    template <typename GR, typename V = int, typename C = V, typename NodesType = unsigned short int>
+    class NetworkSimplexSimple
+    {
+    public:
+
+        /// \brief Constructor.
+        ///
+        /// The constructor of the class.
+        ///
+        /// \param graph The digraph the algorithm runs on.
+        /// \param arc_mixing Indicate if the arcs have to be stored in a
+        /// mixed order in the internal data structure.
+        /// In special cases, it could lead to better overall performance,
+        /// but it is usually slower. Therefore it is disabled by default.
+        NetworkSimplexSimple(const GR& graph, bool arc_mixing, int nbnodes, long long nb_arcs,int maxiters) :
+        _graph(graph),  //_arc_id(graph),
+        _arc_mixing(arc_mixing), _init_nb_nodes(nbnodes), _init_nb_arcs(nb_arcs),
+        MAX(std::numeric_limits<Value>::max()),
+        INF(std::numeric_limits<Value>::has_infinity ?
+            std::numeric_limits<Value>::infinity() : MAX)
+        {
+            // Reset data structures
+            reset();
+            max_iter=maxiters;
+        }
+
+        /// The type of the flow amounts, capacity bounds and supply values
+        typedef V Value;
+        /// The type of the arc costs
+        typedef C Cost;
+
+    public:
+
+        /// \brief Problem type constants for the \c run() function.
+        ///
+        /// Enum type containing the problem type constants that can be
+        /// returned by the \ref run() function of the algorithm.
+        enum ProblemType {
+            /// The problem has no feasible solution (flow).
+            INFEASIBLE,
+            /// The problem has optimal solution (i.e. it is feasible and
+            /// bounded), and the algorithm has found optimal flow and node
+            /// potentials (primal and dual solutions).
+            OPTIMAL,
+            /// The objective function of the problem is unbounded, i.e.
+            /// there is a directed cycle having negative total cost and
+            /// infinite upper bound.
+            UNBOUNDED,
+			/// The maximum number of iteration has been reached
+			MAX_ITER_REACHED
+        };
+
+        /// \brief Constants for selecting the type of the supply constraints.
+        ///
+        /// Enum type containing constants for selecting the supply type,
+        /// i.e. the direction of the inequalities in the supply/demand
+        /// constraints of the \ref min_cost_flow "minimum cost flow problem".
+        ///
+        /// The default supply type is \c GEQ, the \c LEQ type can be
+        /// selected using \ref supplyType().
+        /// The equality form is a special case of both supply types.
+        enum SupplyType {
+            /// This option means that there are <em>"greater or equal"</em>
+            /// supply/demand constraints in the definition of the problem.
+            GEQ,
+            /// This option means that there are <em>"less or equal"</em>
+            /// supply/demand constraints in the definition of the problem.
+            LEQ
+        };
+
+
+
+    private:
+
+        int max_iter;
+        TEMPLATE_DIGRAPH_TYPEDEFS(GR);
+
+        typedef std::vector<int> IntVector;
+        typedef std::vector<NodesType> UHalfIntVector;
+        typedef std::vector<Value> ValueVector;
+        typedef std::vector<Cost> CostVector;
+        //	typedef SparseValueVector<Cost> CostVector;
+        typedef std::vector<char> BoolVector;
+        // Note: vector<char> is used instead of vector<bool> for efficiency reasons
+
+        // State constants for arcs
+        enum ArcState {
+            STATE_UPPER = -1,
+            STATE_TREE  =  0,
+            STATE_LOWER =  1
+        };
+
+        typedef std::vector<signed char> StateVector;
+        // Note: vector<signed char> is used instead of vector<ArcState> for
+        // efficiency reasons
+
+    private:
+
+        // Data related to the underlying digraph
+        const GR &_graph;
+        int _node_num;
+        int _arc_num;
+        int _all_arc_num;
+        int _search_arc_num;
+
+        // Parameters of the problem
+        SupplyType _stype;
+        Value _sum_supply;
+
+        inline int _node_id(int n) const {return _node_num-n-1;} ;
+
+	    //IntArcMap _arc_id;
+        UHalfIntVector _source;
+        UHalfIntVector _target;
+        bool _arc_mixing;
+    public:
+        // Node and arc data
+        CostVector _cost;
+        ValueVector _supply;
+        ValueVector _flow;
+        //SparseValueVector<Value> _flow;
+        CostVector _pi;
+
+
+    private:
+        // Data for storing the spanning tree structure
+        IntVector _parent;
+        IntVector _pred;
+        IntVector _thread;
+        IntVector _rev_thread;
+        IntVector _succ_num;
+        IntVector _last_succ;
+        IntVector _dirty_revs;
+        BoolVector _forward;
+        StateVector _state;
+        int _root;
+
+        // Temporary data used in the current pivot iteration
+        int in_arc, join, u_in, v_in, u_out, v_out;
+        int first, second, right, last;
+        int stem, par_stem, new_stem;
+        Value delta;
+
+        const Value MAX;
+
+        int mixingCoeff;
+
+    public:
+
+        /// \brief Constant for infinite upper bounds (capacities).
+        ///
+        /// Constant for infinite upper bounds (capacities).
+        /// It is \c std::numeric_limits<Value>::infinity() if available,
+        /// \c std::numeric_limits<Value>::max() otherwise.
+        const Value INF;
+
+    private:
+
+        // thank you to DVK and MizardX from StackOverflow for this function!
+        inline int sequence(int k) const {
+            int smallv = (k > num_total_big_subsequence_numbers) & 1;
+
+            k -= num_total_big_subsequence_numbers * smallv;
+            int subsequence_length2 = subsequence_length- smallv;
+            int subsequence_num = (k / subsequence_length2) + num_big_subseqiences * smallv;
+            int subsequence_offset = (k % subsequence_length2) * mixingCoeff;
+
+            return subsequence_offset + subsequence_num;
+        }
+        int subsequence_length;
+        int num_big_subseqiences;
+        int num_total_big_subsequence_numbers;
+
+        inline int getArcID(const Arc &arc) const
+        {
+            //int n = _arc_num-arc._id-1;
+            int n = _arc_num-GR::id(arc)-1;
+
+            //int a = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff;
+            //int b = _arc_id[arc];
+            if (_arc_mixing)
+                return sequence(n);
+            else
+                return n;
+        }
+
+        // finally unused because too slow
+        inline int getSource(const int arc) const
+        {
+            //int a = _source[arc];
+            //return a;
+
+            int n = _arc_num-arc-1;
+            if (_arc_mixing)
+                n = mixingCoeff*(n%mixingCoeff) + n/mixingCoeff;
+
+            int b;
+            if (n>=0)
+                b = _node_id(_graph.source(GR::arcFromId( n ) ));
+            else
+            {
+                n = arc+1-_arc_num;
+                if ( n<=_node_num)
+                    b = _node_num;
+                else
+                    if ( n>=_graph._n1)
+                        b = _graph._n1;
+                    else
+                        b = _graph._n1-n;
+            }
+
+            return b;
+        }
+
+
+
+        // Implementation of the Block Search pivot rule
+        class BlockSearchPivotRule
+        {
+        private:
+
+            // References to the NetworkSimplexSimple class
+            const UHalfIntVector  &_source;
+            const UHalfIntVector  &_target;
+            const CostVector &_cost;
+            const StateVector &_state;
+            const CostVector &_pi;
+            int &_in_arc;
+            int _search_arc_num;
+
+            // Pivot rule data
+            int _block_size;
+            int _next_arc;
+            NetworkSimplexSimple &_ns;
+
+        public:
+
+            // Constructor
+            BlockSearchPivotRule(NetworkSimplexSimple &ns) :
+            _source(ns._source), _target(ns._target),
+            _cost(ns._cost), _state(ns._state), _pi(ns._pi),
+            _in_arc(ns.in_arc), _search_arc_num(ns._search_arc_num),
+            _next_arc(0),_ns(ns)
+            {
+                // The main parameters of the pivot rule
+                const double BLOCK_SIZE_FACTOR = 1.0;
+                const int MIN_BLOCK_SIZE = 10;
+
+                _block_size = std::max( int(BLOCK_SIZE_FACTOR *
+                                            std::sqrt(double(_search_arc_num))),
+                                       MIN_BLOCK_SIZE );
+            }
+            // Find next entering arc
+            bool findEnteringArc() {
+                Cost c, min = 0;
+                int e;
+                int cnt = _block_size;
+                double a;
+                    for (e = _next_arc; e != _search_arc_num; ++e) {
+                        c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
+                        if (c < min) {
+                            min = c;
+                            _in_arc = e;
+                        }
+                        if (--cnt == 0) {
+                            a=fabs(_pi[_source[_in_arc]])>fabs(_pi[_target[_in_arc]]) ? fabs(_pi[_source[_in_arc]]):fabs(_pi[_target[_in_arc]]);
+                            a=a>fabs(_cost[_in_arc])?a:fabs(_cost[_in_arc]);
+                            if (min <  -EPSILON*a) goto search_end;
+                            cnt = _block_size;
+                        }
+                    }
+                    for (e = 0; e != _next_arc; ++e) {
+                        c = _state[e] * (_cost[e] + _pi[_source[e]] - _pi[_target[e]]);
+                        if (c < min) {
+                            min = c;
+                            _in_arc = e;
+                        }
+                        if (--cnt == 0) {
+                            a=fabs(_pi[_source[_in_arc]])>fabs(_pi[_target[_in_arc]]) ? fabs(_pi[_source[_in_arc]]):fabs(_pi[_target[_in_arc]]);
+                            a=a>fabs(_cost[_in_arc])?a:fabs(_cost[_in_arc]);
+                            if (min <  -EPSILON*a) goto search_end;
+                            cnt = _block_size;
+                        }
+                    }
+                    a=fabs(_pi[_source[_in_arc]])>fabs(_pi[_target[_in_arc]]) ? fabs(_pi[_source[_in_arc]]):fabs(_pi[_target[_in_arc]]);
+                    a=a>fabs(_cost[_in_arc])?a:fabs(_cost[_in_arc]);
+                    if (min >=  -EPSILON*a) return false;
+
+            search_end:
+                _next_arc = e;
+                return true;
+            }
+
+        }; //class BlockSearchPivotRule
+
+
+
+    public:
+
+
+
+        int _init_nb_nodes;
+        long long _init_nb_arcs;
+
+        /// \name Parameters
+        /// The parameters of the algorithm can be specified using these
+        /// functions.
+
+        /// @{
+
+
+        /// \brief Set the costs of the arcs.
+        ///
+        /// This function sets the costs of the arcs.
+        /// If it is not used before calling \ref run(), the costs
+        /// will be set to \c 1 on all arcs.
+        ///
+        /// \param map An arc map storing the costs.
+        /// Its \c Value type must be convertible to the \c Cost type
+        /// of the algorithm.
+        ///
+        /// \return <tt>(*this)</tt>
+        template<typename CostMap>
+        NetworkSimplexSimple& costMap(const CostMap& map) {
+            Arc a; _graph.first(a);
+            for (; a != INVALID; _graph.next(a)) {
+                _cost[getArcID(a)] = map[a];
+            }
+            return *this;
+        }
+
+
+        /// \brief Set the costs of one arc.
+        ///
+        /// This function sets the costs of one arcs.
+        /// Done for memory reasons
+        ///
+        /// \param arc An arc.
+        /// \param arc A cost
+        ///
+        /// \return <tt>(*this)</tt>
+        template<typename Value>
+        NetworkSimplexSimple& setCost(const Arc& arc, const Value cost) {
+            _cost[getArcID(arc)] = cost;
+            return *this;
+        }
+
+
+        /// \brief Set the supply values of the nodes.
+        ///
+        /// This function sets the supply values of the nodes.
+        /// If neither this function nor \ref stSupply() is used before
+        /// calling \ref run(), the supply of each node will be set to zero.
+        ///
+        /// \param map A node map storing the supply values.
+        /// Its \c Value type must be convertible to the \c Value type
+        /// of the algorithm.
+        ///
+        /// \return <tt>(*this)</tt>
+        template<typename SupplyMap>
+        NetworkSimplexSimple& supplyMap(const SupplyMap& map) {
+            Node n; _graph.first(n);
+            for (; n != INVALIDNODE; _graph.next(n)) {
+                _supply[_node_id(n)] = map[n];
+            }
+            return *this;
+        }
+        template<typename SupplyMap>
+        NetworkSimplexSimple& supplyMap(const SupplyMap* map1, int n1, const SupplyMap* map2, int n2) {
+            Node n; _graph.first(n);
+            for (; n != INVALIDNODE; _graph.next(n)) {
+                if (n<n1)
+                    _supply[_node_id(n)] = map1[n];
+                else
+                    _supply[_node_id(n)] = map2[n-n1];
+            }
+            return *this;
+        }
+        template<typename SupplyMap>
+        NetworkSimplexSimple& supplyMapAll(SupplyMap val1, int n1, SupplyMap val2, int n2) {
+            Node n; _graph.first(n);
+            for (; n != INVALIDNODE; _graph.next(n)) {
+                if (n<n1)
+                    _supply[_node_id(n)] = val1;
+                else
+                    _supply[_node_id(n)] = val2;
+            }
+            return *this;
+        }
+
+        /// \brief Set single source and target nodes and a supply value.
+        ///
+        /// This function sets a single source node and a single target node
+        /// and the required flow value.
+        /// If neither this function nor \ref supplyMap() is used before
+        /// calling \ref run(), the supply of each node will be set to zero.
+        ///
+        /// Using this function has the same effect as using \ref supplyMap()
+        /// with such a map in which \c k is assigned to \c s, \c -k is
+        /// assigned to \c t and all other nodes have zero supply value.
+        ///
+        /// \param s The source node.
+        /// \param t The target node.
+        /// \param k The required amount of flow from node \c s to node \c t
+        /// (i.e. the supply of \c s and the demand of \c t).
+        ///
+        /// \return <tt>(*this)</tt>
+        NetworkSimplexSimple& stSupply(const Node& s, const Node& t, Value k) {
+            for (int i = 0; i != _node_num; ++i) {
+                _supply[i] = 0;
+            }
+            _supply[_node_id(s)] =  k;
+            _supply[_node_id(t)] = -k;
+            return *this;
+        }
+
+        /// \brief Set the type of the supply constraints.
+        ///
+        /// This function sets the type of the supply/demand constraints.
+        /// If it is not used before calling \ref run(), the \ref GEQ supply
+        /// type will be used.
+        ///
+        /// For more information, see \ref SupplyType.
+        ///
+        /// \return <tt>(*this)</tt>
+        NetworkSimplexSimple& supplyType(SupplyType supply_type) {
+            _stype = supply_type;
+            return *this;
+        }
+
+        /// @}
+
+        /// \name Execution Control
+        /// The algorithm can be executed using \ref run().
+
+        /// @{
+
+        /// \brief Run the algorithm.
+        ///
+        /// This function runs the algorithm.
+        /// The paramters can be specified using functions \ref lowerMap(),
+        /// \ref upperMap(), \ref costMap(), \ref supplyMap(), \ref stSupply(),
+        /// \ref supplyType().
+        /// For example,
+        /// \code
+        ///   NetworkSimplexSimple<ListDigraph> ns(graph);
+        ///   ns.lowerMap(lower).upperMap(upper).costMap(cost)
+        ///     .supplyMap(sup).run();
+        /// \endcode
+        ///
+        /// This function can be called more than once. All the given parameters
+        /// are kept for the next call, unless \ref resetParams() or \ref reset()
+        /// is used, thus only the modified parameters have to be set again.
+        /// If the underlying digraph was also modified after the construction
+        /// of the class (or the last \ref reset() call), then the \ref reset()
+        /// function must be called.
+        ///
+        /// \param pivot_rule The pivot rule that will be used during the
+        /// algorithm. For more information, see \ref PivotRule.
+        ///
+        /// \return \c INFEASIBLE if no feasible flow exists,
+        /// \n \c OPTIMAL if the problem has optimal solution
+        /// (i.e. it is feasible and bounded), and the algorithm has found
+        /// optimal flow and node potentials (primal and dual solutions),
+        /// \n \c UNBOUNDED if the objective function of the problem is
+        /// unbounded, i.e. there is a directed cycle having negative total
+        /// cost and infinite upper bound.
+        ///
+        /// \see ProblemType, PivotRule
+        /// \see resetParams(), reset()
+        ProblemType run() {
+#if DEBUG_LVL>0
+            std::cout << "OPTIMAL = " << OPTIMAL << "\nINFEASIBLE = " << INFEASIBLE << "\nUNBOUNDED = " << UNBOUNDED << "\nMAX_ITER_REACHED" << MAX_ITER_REACHED\n";
+#endif
+
+            if (!init()) return INFEASIBLE;
+#if DEBUG_LVL>0
+            std::cout << "Init done, starting iterations\n";
+#endif
+            return start();
+        }
+
+        /// \brief Reset all the parameters that have been given before.
+        ///
+        /// This function resets all the paramaters that have been given
+        /// before using functions \ref lowerMap(), \ref upperMap(),
+        /// \ref costMap(), \ref supplyMap(), \ref stSupply(), \ref supplyType().
+        ///
+        /// It is useful for multiple \ref run() calls. Basically, all the given
+        /// parameters are kept for the next \ref run() call, unless
+        /// \ref resetParams() or \ref reset() is used.
+        /// If the underlying digraph was also modified after the construction
+        /// of the class or the last \ref reset() call, then the \ref reset()
+        /// function must be used, otherwise \ref resetParams() is sufficient.
+        ///
+        /// For example,
+        /// \code
+        ///   NetworkSimplexSimple<ListDigraph> ns(graph);
+        ///
+        ///   // First run
+        ///   ns.lowerMap(lower).upperMap(upper).costMap(cost)
+        ///     .supplyMap(sup).run();
+        ///
+        ///   // Run again with modified cost map (resetParams() is not called,
+        ///   // so only the cost map have to be set again)
+        ///   cost[e] += 100;
+        ///   ns.costMap(cost).run();
+        ///
+        ///   // Run again from scratch using resetParams()
+        ///   // (the lower bounds will be set to zero on all arcs)
+        ///   ns.resetParams();
+        ///   ns.upperMap(capacity).costMap(cost)
+        ///     .supplyMap(sup).run();
+        /// \endcode
+        ///
+        /// \return <tt>(*this)</tt>
+        ///
+        /// \see reset(), run()
+        NetworkSimplexSimple& resetParams() {
+            for (int i = 0; i != _node_num; ++i) {
+                _supply[i] = 0;
+            }
+            for (int i = 0; i != _arc_num; ++i) {
+                _cost[i] = 1;
+            }
+            _stype = GEQ;
+            return *this;
+        }
+
+
+
+        int divid (int x, int y)
+        {
+            return (x-x%y)/y;
+        }
+
+        /// \brief Reset the internal data structures and all the parameters
+        /// that have been given before.
+        ///
+        /// This function resets the internal data structures and all the
+        /// paramaters that have been given before using functions \ref lowerMap(),
+        /// \ref upperMap(), \ref costMap(), \ref supplyMap(), \ref stSupply(),
+        /// \ref supplyType().
+        ///
+        /// It is useful for multiple \ref run() calls. Basically, all the given
+        /// parameters are kept for the next \ref run() call, unless
+        /// \ref resetParams() or \ref reset() is used.
+        /// If the underlying digraph was also modified after the construction
+        /// of the class or the last \ref reset() call, then the \ref reset()
+        /// function must be used, otherwise \ref resetParams() is sufficient.
+        ///
+        /// See \ref resetParams() for examples.
+        ///
+        /// \return <tt>(*this)</tt>
+        ///
+        /// \see resetParams(), run()
+        NetworkSimplexSimple& reset() {
+            // Resize vectors
+            _node_num = _init_nb_nodes;
+            _arc_num = _init_nb_arcs;
+            int all_node_num = _node_num + 1;
+            int max_arc_num = _arc_num + 2 * _node_num;
+
+            _source.resize(max_arc_num);
+            _target.resize(max_arc_num);
+
+            _cost.resize(max_arc_num);
+            _supply.resize(all_node_num);
+            _flow.resize(max_arc_num);
+            _pi.resize(all_node_num);
+
+            _parent.resize(all_node_num);
+            _pred.resize(all_node_num);
+            _forward.resize(all_node_num);
+            _thread.resize(all_node_num);
+            _rev_thread.resize(all_node_num);
+            _succ_num.resize(all_node_num);
+            _last_succ.resize(all_node_num);
+            _state.resize(max_arc_num);
+
+
+            //_arc_mixing=false;
+            if (_arc_mixing) {
+                // Store the arcs in a mixed order
+                int k = std::max(int(std::sqrt(double(_arc_num))), 10);
+                mixingCoeff = k;
+                subsequence_length = _arc_num / mixingCoeff + 1;
+                num_big_subseqiences = _arc_num % mixingCoeff;
+                num_total_big_subsequence_numbers = subsequence_length * num_big_subseqiences;
+
+                int i = 0, j = 0;
+                Arc a; _graph.first(a);
+                for (; a != INVALID; _graph.next(a)) {
+                    _source[i] = _node_id(_graph.source(a));
+                    _target[i] = _node_id(_graph.target(a));
+                    //_arc_id[a] = i;
+                    if ((i += k) >= _arc_num) i = ++j;
+                }
+            } else {
+                // Store the arcs in the original order
+                int i = 0;
+                Arc a; _graph.first(a);
+                for (; a != INVALID; _graph.next(a), ++i) {
+                    _source[i] = _node_id(_graph.source(a));
+                    _target[i] = _node_id(_graph.target(a));
+                    //_arc_id[a] = i;
+                }
+            }
+
+            // Reset parameters
+            resetParams();
+            return *this;
+        }
+
+        /// @}
+
+        /// \name Query Functions
+        /// The results of the algorithm can be obtained using these
+        /// functions.\n
+        /// The \ref run() function must be called before using them.
+
+        /// @{
+
+        /// \brief Return the total cost of the found flow.
+        ///
+        /// This function returns the total cost of the found flow.
+        /// Its complexity is O(e).
+        ///
+        /// \note The return type of the function can be specified as a
+        /// template parameter. For example,
+        /// \code
+        ///   ns.totalCost<double>();
+        /// \endcode
+        /// It is useful if the total cost cannot be stored in the \c Cost
+        /// type of the algorithm, which is the default return type of the
+        /// function.
+        ///
+        /// \pre \ref run() must be called before using this function.
+        /*template <typename Number>
+         Number totalCost() const {
+         Number c = 0;
+         for (ArcIt a(_graph); a != INVALID; ++a) {
+         int i = getArcID(a);
+         c += Number(_flow[i]) * Number(_cost[i]);
+         }
+         return c;
+         }*/
+
+        template <typename Number>
+        Number totalCost() const {
+            Number c = 0;
+
+            /*#ifdef HASHMAP
+             typename stdext::hash_map<int, Value>::const_iterator it;
+             #else
+             typename std::map<int, Value>::const_iterator it;
+             #endif
+             for (it = _flow.data.begin(); it!=_flow.data.end(); ++it)
+             c += Number(it->second) * Number(_cost[it->first]);
+             return c;*/
+
+            for (int i=0; i<_flow.size(); i++)
+                c += _flow[i] * Number(_cost[i]);
+            return c;
+
+        }
+
+#ifndef DOXYGEN
+        Cost totalCost() const {
+            return totalCost<Cost>();
+        }
+#endif
+
+        /// \brief Return the flow on the given arc.
+        ///
+        /// This function returns the flow on the given arc.
+        ///
+        /// \pre \ref run() must be called before using this function.
+        Value flow(const Arc& a) const {
+            return _flow[getArcID(a)];
+        }
+
+        /// \brief Return the flow map (the primal solution).
+        ///
+        /// This function copies the flow value on each arc into the given
+        /// map. The \c Value type of the algorithm must be convertible to
+        /// the \c Value type of the map.
+        ///
+        /// \pre \ref run() must be called before using this function.
+        template <typename FlowMap>
+        void flowMap(FlowMap &map) const {
+            Arc a; _graph.first(a);
+            for (; a != INVALID; _graph.next(a)) {
+                map.set(a, _flow[getArcID(a)]);
+            }
+        }
+
+        /// \brief Return the potential (dual value) of the given node.
+        ///
+        /// This function returns the potential (dual value) of the
+        /// given node.
+        ///
+        /// \pre \ref run() must be called before using this function.
+        Cost potential(const Node& n) const {
+            return _pi[_node_id(n)];
+        }
+
+        /// \brief Return the potential map (the dual solution).
+        ///
+        /// This function copies the potential (dual value) of each node
+        /// into the given map.
+        /// The \c Cost type of the algorithm must be convertible to the
+        /// \c Value type of the map.
+        ///
+        /// \pre \ref run() must be called before using this function.
+        template <typename PotentialMap>
+        void potentialMap(PotentialMap &map) const {
+            Node n; _graph.first(n);
+            for (; n != INVALID; _graph.next(n)) {
+                map.set(n, _pi[_node_id(n)]);
+            }
+        }
+
+        /// @}
+
+    private:
+
+        // Initialize internal data structures
+        bool init() {
+            if (_node_num == 0) return false;
+            
+            // Check the sum of supply values
+            _sum_supply = 0;
+            for (int i = 0; i != _node_num; ++i) {
+                _sum_supply += _supply[i];
+            }
+            if ( fabs(_sum_supply) > _EPSILON ) return false;
+            
+			_sum_supply = 0;
+
+            // Initialize artifical cost
+            Cost ART_COST;
+            if (std::numeric_limits<Cost>::is_exact) {
+                ART_COST = std::numeric_limits<Cost>::max() / 2 + 1;
+            } else {
+                ART_COST = 0;
+                for (int i = 0; i != _arc_num; ++i) {
+                    if (_cost[i] > ART_COST) ART_COST = _cost[i];
+                }
+                ART_COST = (ART_COST + 1) * _node_num;
+            }
+
+            // Initialize arc maps
+            for (int i = 0; i != _arc_num; ++i) {
+                //_flow[i] = 0; //by default, the sparse matrix is empty
+                _state[i] = STATE_LOWER;
+            }
+
+            // Set data for the artificial root node
+            _root = _node_num;
+            _parent[_root] = -1;
+            _pred[_root] = -1;
+            _thread[_root] = 0;
+            _rev_thread[0] = _root;
+            _succ_num[_root] = _node_num + 1;
+            _last_succ[_root] = _root - 1;
+            _supply[_root] = -_sum_supply;
+            _pi[_root] = 0;
+
+            // Add artificial arcs and initialize the spanning tree data structure
+            if (_sum_supply == 0) {
+                // EQ supply constraints
+                _search_arc_num = _arc_num;
+                _all_arc_num = _arc_num + _node_num;
+                for (int u = 0, e = _arc_num; u != _node_num; ++u, ++e) {
+                    _parent[u] = _root;
+                    _pred[u] = e;
+                    _thread[u] = u + 1;
+                    _rev_thread[u + 1] = u;
+                    _succ_num[u] = 1;
+                    _last_succ[u] = u;
+                    _state[e] = STATE_TREE;
+                    if (_supply[u] >= 0) {
+                        _forward[u] = true;
+                        _pi[u] = 0;
+                        _source[e] = u;
+                        _target[e] = _root;
+                        _flow[e] = _supply[u];
+                        _cost[e] = 0;
+                    } else {
+                        _forward[u] = false;
+                        _pi[u] = ART_COST;
+                        _source[e] = _root;
+                        _target[e] = u;
+                        _flow[e] = -_supply[u];
+                        _cost[e] = ART_COST;
+                    }
+                }
+            }
+            else if (_sum_supply > 0) {
+                // LEQ supply constraints
+                _search_arc_num = _arc_num + _node_num;
+                int f = _arc_num + _node_num;
+                for (int u = 0, e = _arc_num; u != _node_num; ++u, ++e) {
+                    _parent[u] = _root;
+                    _thread[u] = u + 1;
+                    _rev_thread[u + 1] = u;
+                    _succ_num[u] = 1;
+                    _last_succ[u] = u;
+                    if (_supply[u] >= 0) {
+                        _forward[u] = true;
+                        _pi[u] = 0;
+                        _pred[u] = e;
+                        _source[e] = u;
+                        _target[e] = _root;
+                        _flow[e] = _supply[u];
+                        _cost[e] = 0;
+                        _state[e] = STATE_TREE;
+                    } else {
+                        _forward[u] = false;
+                        _pi[u] = ART_COST;
+                        _pred[u] = f;
+                        _source[f] = _root;
+                        _target[f] = u;
+                        _flow[f] = -_supply[u];
+                        _cost[f] = ART_COST;
+                        _state[f] = STATE_TREE;
+                        _source[e] = u;
+                        _target[e] = _root;
+                        //_flow[e] = 0;  //by default, the sparse matrix is empty
+                        _cost[e] = 0;
+                        _state[e] = STATE_LOWER;
+                        ++f;
+                    }
+                }
+                _all_arc_num = f;
+            }
+            else {
+                // GEQ supply constraints
+                _search_arc_num = _arc_num + _node_num;
+                int f = _arc_num + _node_num;
+                for (int u = 0, e = _arc_num; u != _node_num; ++u, ++e) {
+                    _parent[u] = _root;
+                    _thread[u] = u + 1;
+                    _rev_thread[u + 1] = u;
+                    _succ_num[u] = 1;
+                    _last_succ[u] = u;
+                    if (_supply[u] <= 0) {
+                        _forward[u] = false;
+                        _pi[u] = 0;
+                        _pred[u] = e;
+                        _source[e] = _root;
+                        _target[e] = u;
+                        _flow[e] = -_supply[u];
+                        _cost[e] = 0;
+                        _state[e] = STATE_TREE;
+                    } else {
+                        _forward[u] = true;
+                        _pi[u] = -ART_COST;
+                        _pred[u] = f;
+                        _source[f] = u;
+                        _target[f] = _root;
+                        _flow[f] = _supply[u];
+                        _state[f] = STATE_TREE;
+                        _cost[f] = ART_COST;
+                        _source[e] = _root;
+                        _target[e] = u;
+                        //_flow[e] = 0; //by default, the sparse matrix is empty
+                        _cost[e] = 0;
+                        _state[e] = STATE_LOWER;
+                        ++f;
+                    }
+                }
+                _all_arc_num = f;
+            }
+
+            return true;
+        }
+
+        // Find the join node
+        void findJoinNode() {
+            int u = _source[in_arc];
+            int v = _target[in_arc];
+            while (u != v) {
+                if (_succ_num[u] < _succ_num[v]) {
+                    u = _parent[u];
+                } else {
+                    v = _parent[v];
+                }
+            }
+            join = u;
+        }
+
+        // Find the leaving arc of the cycle and returns true if the
+        // leaving arc is not the same as the entering arc
+        bool findLeavingArc() {
+            // Initialize first and second nodes according to the direction
+            // of the cycle
+            if (_state[in_arc] == STATE_LOWER) {
+                first  = _source[in_arc];
+                second = _target[in_arc];
+            } else {
+                first  = _target[in_arc];
+                second = _source[in_arc];
+            }
+            delta = INF;
+            int result = 0;
+            Value d;
+            int e;
+
+            // Search the cycle along the path form the first node to the root
+            for (int u = first; u != join; u = _parent[u]) {
+                e = _pred[u];
+                d = _forward[u] ? _flow[e] : INF ;
+                if (d < delta) {
+                    delta = d;
+                    u_out = u;
+                    result = 1;
+                }
+            }
+            // Search the cycle along the path form the second node to the root
+            for (int u = second; u != join; u = _parent[u]) {
+                e = _pred[u];
+                d = _forward[u] ? INF  : _flow[e];
+                if (d <= delta) {
+                    delta = d;
+                    u_out = u;
+                    result = 2;
+                }
+            }
+
+            if (result == 1) {
+                u_in = first;
+                v_in = second;
+            } else {
+                u_in = second;
+                v_in = first;
+            }
+            return result != 0;
+        }
+
+        // Change _flow and _state vectors
+        void changeFlow(bool change) {
+            // Augment along the cycle
+            if (delta > 0) {
+                Value val = _state[in_arc] * delta;
+                _flow[in_arc] += val;
+                for (int u = _source[in_arc]; u != join; u = _parent[u]) {
+                    _flow[_pred[u]] += _forward[u] ? -val : val;
+                }
+                for (int u = _target[in_arc]; u != join; u = _parent[u]) {
+                    _flow[_pred[u]] += _forward[u] ? val : -val;
+                }
+            }
+            // Update the state of the entering and leaving arcs
+            if (change) {
+                _state[in_arc] = STATE_TREE;
+                _state[_pred[u_out]] =
+                (_flow[_pred[u_out]] == 0) ? STATE_LOWER : STATE_UPPER;
+            } else {
+                _state[in_arc] = -_state[in_arc];
+            }
+        }
+
+        // Update the tree structure
+        void updateTreeStructure() {
+            int u, w;
+            int old_rev_thread = _rev_thread[u_out];
+            int old_succ_num = _succ_num[u_out];
+            int old_last_succ = _last_succ[u_out];
+            v_out = _parent[u_out];
+
+            u = _last_succ[u_in];  // the last successor of u_in
+            right = _thread[u];    // the node after it
+
+            // Handle the case when old_rev_thread equals to v_in
+            // (it also means that join and v_out coincide)
+            if (old_rev_thread == v_in) {
+                last = _thread[_last_succ[u_out]];
+            } else {
+                last = _thread[v_in];
+            }
+
+            // Update _thread and _parent along the stem nodes (i.e. the nodes
+            // between u_in and u_out, whose parent have to be changed)
+            _thread[v_in] = stem = u_in;
+            _dirty_revs.clear();
+            _dirty_revs.push_back(v_in);
+            par_stem = v_in;
+            while (stem != u_out) {
+                // Insert the next stem node into the thread list
+                new_stem = _parent[stem];
+                _thread[u] = new_stem;
+                _dirty_revs.push_back(u);
+
+                // Remove the subtree of stem from the thread list
+                w = _rev_thread[stem];
+                _thread[w] = right;
+                _rev_thread[right] = w;
+
+                // Change the parent node and shift stem nodes
+                _parent[stem] = par_stem;
+                par_stem = stem;
+                stem = new_stem;
+
+                // Update u and right
+                u = _last_succ[stem] == _last_succ[par_stem] ?
+                _rev_thread[par_stem] : _last_succ[stem];
+                right = _thread[u];
+            }
+            _parent[u_out] = par_stem;
+            _thread[u] = last;
+            _rev_thread[last] = u;
+            _last_succ[u_out] = u;
+
+            // Remove the subtree of u_out from the thread list except for
+            // the case when old_rev_thread equals to v_in
+            // (it also means that join and v_out coincide)
+            if (old_rev_thread != v_in) {
+                _thread[old_rev_thread] = right;
+                _rev_thread[right] = old_rev_thread;
+            }
+
+            // Update _rev_thread using the new _thread values
+            for (int i = 0; i != int(_dirty_revs.size()); ++i) {
+                u = _dirty_revs[i];
+                _rev_thread[_thread[u]] = u;
+            }
+
+            // Update _pred, _forward, _last_succ and _succ_num for the
+            // stem nodes from u_out to u_in
+            int tmp_sc = 0, tmp_ls = _last_succ[u_out];
+            u = u_out;
+            while (u != u_in) {
+                w = _parent[u];
+                _pred[u] = _pred[w];
+                _forward[u] = !_forward[w];
+                tmp_sc += _succ_num[u] - _succ_num[w];
+                _succ_num[u] = tmp_sc;
+                _last_succ[w] = tmp_ls;
+                u = w;
+            }
+            _pred[u_in] = in_arc;
+            _forward[u_in] = (u_in == _source[in_arc]);
+            _succ_num[u_in] = old_succ_num;
+
+            // Set limits for updating _last_succ form v_in and v_out
+            // towards the root
+            int up_limit_in = -1;
+            int up_limit_out = -1;
+            if (_last_succ[join] == v_in) {
+                up_limit_out = join;
+            } else {
+                up_limit_in = join;
+            }
+
+            // Update _last_succ from v_in towards the root
+            for (u = v_in; u != up_limit_in && _last_succ[u] == v_in;
+                 u = _parent[u]) {
+                _last_succ[u] = _last_succ[u_out];
+            }
+            // Update _last_succ from v_out towards the root
+            if (join != old_rev_thread && v_in != old_rev_thread) {
+                for (u = v_out; u != up_limit_out && _last_succ[u] == old_last_succ;
+                     u = _parent[u]) {
+                    _last_succ[u] = old_rev_thread;
+                }
+            } else {
+                for (u = v_out; u != up_limit_out && _last_succ[u] == old_last_succ;
+                     u = _parent[u]) {
+                    _last_succ[u] = _last_succ[u_out];
+                }
+            }
+
+            // Update _succ_num from v_in to join
+            for (u = v_in; u != join; u = _parent[u]) {
+                _succ_num[u] += old_succ_num;
+            }
+            // Update _succ_num from v_out to join
+            for (u = v_out; u != join; u = _parent[u]) {
+                _succ_num[u] -= old_succ_num;
+            }
+        }
+
+        // Update potentials
+        void updatePotential() {
+            Cost sigma = _forward[u_in] ?
+            _pi[v_in] - _pi[u_in] - _cost[_pred[u_in]] :
+            _pi[v_in] - _pi[u_in] + _cost[_pred[u_in]];
+            // Update potentials in the subtree, which has been moved
+            int end = _thread[_last_succ[u_in]];
+            for (int u = u_in; u != end; u = _thread[u]) {
+                _pi[u] += sigma;
+            }
+        }
+
+        // Heuristic initial pivots
+        bool initialPivots() {
+            Value curr, total = 0;
+            std::vector<Node> supply_nodes, demand_nodes;
+            Node u; _graph.first(u);
+            for (; u != INVALIDNODE; _graph.next(u)) {
+                curr = _supply[_node_id(u)];
+                if (curr > 0) {
+                    total += curr;
+                    supply_nodes.push_back(u);
+                }
+                else if (curr < 0) {
+                    demand_nodes.push_back(u);
+                }
+            }
+            if (_sum_supply > 0) total -= _sum_supply;
+            if (total <= 0) return true;
+
+            IntVector arc_vector;
+            if (_sum_supply >= 0) {
+                if (supply_nodes.size() == 1 && demand_nodes.size() == 1) {
+                    // Perform a reverse graph search from the sink to the source
+                    //typename GR::template NodeMap<bool> reached(_graph, false);
+                    BoolVector reached(_node_num, false);
+                    Node s = supply_nodes[0], t = demand_nodes[0];
+                    std::vector<Node> stack;
+                    reached[t] = true;
+                    stack.push_back(t);
+                    while (!stack.empty()) {
+                        Node u, v = stack.back();
+                        stack.pop_back();
+                        if (v == s) break;
+                        Arc a; _graph.firstIn(a, v);
+                        for (; a != INVALID; _graph.nextIn(a)) {
+                            if (reached[u = _graph.source(a)]) continue;
+                            int j = getArcID(a);
+                            if (INF >= total) {
+                                arc_vector.push_back(j);
+                                reached[u] = true;
+                                stack.push_back(u);
+                            }
+                        }
+                    }
+                } else {
+                    // Find the min. cost incomming arc for each demand node
+                    for (int i = 0; i != int(demand_nodes.size()); ++i) {
+                        Node v = demand_nodes[i];
+                        Cost c, min_cost = std::numeric_limits<Cost>::max();
+                        Arc min_arc = INVALID;
+                        Arc a; _graph.firstIn(a, v);
+                        for (; a != INVALID; _graph.nextIn(a)) {
+                            c = _cost[getArcID(a)];
+                            if (c < min_cost) {
+                                min_cost = c;
+                                min_arc = a;
+                            }
+                        }
+                        if (min_arc != INVALID) {
+                            arc_vector.push_back(getArcID(min_arc));
+                        }
+                    }
+                }
+            } else {
+                // Find the min. cost outgoing arc for each supply node
+                for (int i = 0; i != int(supply_nodes.size()); ++i) {
+                    Node u = supply_nodes[i];
+                    Cost c, min_cost = std::numeric_limits<Cost>::max();
+                    Arc min_arc = INVALID;
+                    Arc a; _graph.firstOut(a, u);
+                    for (; a != INVALID; _graph.nextOut(a)) {
+                        c = _cost[getArcID(a)];
+                        if (c < min_cost) {
+                            min_cost = c;
+                            min_arc = a;
+                        }
+                    }
+                    if (min_arc != INVALID) {
+                        arc_vector.push_back(getArcID(min_arc));
+                    }
+                }
+            }
+
+            // Perform heuristic initial pivots
+            for (int i = 0; i != int(arc_vector.size()); ++i) {
+                in_arc = arc_vector[i];
+                // l'erreur est probablement ici...
+                if (_state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -
+                                      _pi[_target[in_arc]]) >= 0) continue;
+                findJoinNode();
+                bool change = findLeavingArc();
+                if (delta >= MAX) return false;
+                changeFlow(change);
+                if (change) {
+                    updateTreeStructure();
+                    updatePotential();
+                }
+            }
+            return true;
+        }
+
+        // Execute the algorithm
+        ProblemType start() {
+            return start<BlockSearchPivotRule>();
+        }
+
+        template <typename PivotRuleImpl>
+        ProblemType start() {
+            PivotRuleImpl pivot(*this);
+            double prevCost=-1;
+			ProblemType retVal = OPTIMAL;
+
+            // Perform heuristic initial pivots
+            if (!initialPivots()) return UNBOUNDED;
+
+            int iter_number=0;
+            //pivot.setDantzig(true);
+            // Execute the Network Simplex algorithm
+            while (pivot.findEnteringArc()) {
+                if(max_iter > 0 && ++iter_number>=max_iter&&max_iter>0){
+                    char errMess[1000];
+                    sprintf( errMess, "RESULT MIGHT BE INACURATE\nMax number of iteration reached, currently \%d. Sometimes iterations go on in cycle even though the solution has been reached, to check if it's the case here have a look at the minimal reduced cost. If it is very close to machine precision, you might actually have the correct solution, if not try setting the maximum number of iterations a bit higher\n",iter_number );
+                    std::cerr << errMess;
+					retVal = MAX_ITER_REACHED;
+                    break;
+                }
+#if DEBUG_LVL>0
+                if(iter_number>MAX_DEBUG_ITER)
+                    break;
+                if(iter_number%1000==0||iter_number%1000==1){
+                    double curCost=totalCost();
+                    double sumFlow=0;
+                    double a;
+                    a= (fabs(_pi[_source[in_arc]])>=fabs(_pi[_target[in_arc]])) ? fabs(_pi[_source[in_arc]]) : fabs(_pi[_target[in_arc]]);
+                    a=a>=fabs(_cost[in_arc])?a:fabs(_cost[in_arc]);
+                    for (int i=0; i<_flow.size(); i++) {
+                        sumFlow+=_state[i]*_flow[i];
+                    }
+                    std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << iter_number << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n";
+                    std::cout << "Arc in = (" << _node_id(_source[in_arc]) << ", " << _node_id(_target[in_arc]) <<")\n";
+                    std::cout << "Supplies = (" << _supply[_source[in_arc]] << ", " << _supply[_target[in_arc]] << ")\n";
+                    std::cout << _cost[in_arc] << "\n";
+                    std::cout << _pi[_source[in_arc]] << "\n";
+                    std::cout << _pi[_target[in_arc]] << "\n";
+                    std::cout << a << "\n";
+                }
+#endif
+
+                findJoinNode();
+                bool change = findLeavingArc();
+                if (delta >= MAX) return UNBOUNDED;
+                changeFlow(change);
+                if (change) {
+                    updateTreeStructure();
+                    updatePotential();
+                }
+#if DEBUG_LVL>0
+                else{
+                    std::cout << "No change\n";
+                }
+#endif
+#if DEBUG_LVL>1
+                std::cout << "Arc in = (" << _source[in_arc] << ", " << _target[in_arc] << ")\n";
+#endif
+
+            }
+
+
+#if DEBUG_LVL>0
+                double curCost=totalCost();
+                double sumFlow=0;
+                double a;
+                a= (fabs(_pi[_source[in_arc]])>=fabs(_pi[_target[in_arc]])) ? fabs(_pi[_source[in_arc]]) : fabs(_pi[_target[in_arc]]);
+                a=a>=fabs(_cost[in_arc])?a:fabs(_cost[in_arc]);
+                for (int i=0; i<_flow.size(); i++) {
+                    sumFlow+=_state[i]*_flow[i];
+                }
+            
+                std::cout << "Sum of the flow " << std::setprecision(20) << sumFlow << "\n" << niter << " iterations, current cost=" << curCost << "\nReduced cost=" << _state[in_arc] * (_cost[in_arc] + _pi[_source[in_arc]] -_pi[_target[in_arc]]) << "\nPrecision = "<< -EPSILON*(a) << "\n";
+            
+                std::cout << "Arc in = (" << _node_id(_source[in_arc]) << ", " << _node_id(_target[in_arc]) <<")\n";
+                std::cout << "Supplies = (" << _supply[_source[in_arc]] << ", " << _supply[_target[in_arc]] << ")\n";
+
+#endif
+
+#if DEBUG_LVL>1
+            sumFlow=0;
+            for (int i=0; i<_flow.size(); i++) {
+                sumFlow+=_state[i]*_flow[i];
+                if (_state[i]==STATE_TREE) {
+                    std::cout << "Non zero value at (" << _node_num+1-_source[i] << ", " << _node_num+1-_target[i] << ")\n";
+                }
+            }
+            std::cout << "Sum of the flow " << sumFlow << "\n"<< niter <<" iterations, current cost=" << totalCost() << "\n";
+#endif
+            // Check feasibility
+			if( retVal == OPTIMAL){
+                for (int e = _search_arc_num; e != _all_arc_num; ++e) {
+                    if (_flow[e] != 0){
+                        if (abs(_flow[e]) > EPSILON)
+                            return INFEASIBLE;
+                        else
+                            _flow[e]=0;
+
+                    }
+                }
+			}
+
+            // Shift potentials to meet the requirements of the GEQ/LEQ type
+            // optimality conditions
+            if (_sum_supply == 0) {
+                if (_stype == GEQ) {
+                    Cost max_pot = -std::numeric_limits<Cost>::max();
+                    for (int i = 0; i != _node_num; ++i) {
+                        if (_pi[i] > max_pot) max_pot = _pi[i];
+                    }
+                    if (max_pot > 0) {
+                        for (int i = 0; i != _node_num; ++i)
+                            _pi[i] -= max_pot;
+                    }
+                } else {
+                    Cost min_pot = std::numeric_limits<Cost>::max();
+                    for (int i = 0; i != _node_num; ++i) {
+                        if (_pi[i] < min_pot) min_pot = _pi[i];
+                    }
+                    if (min_pot < 0) {
+                        for (int i = 0; i != _node_num; ++i)
+                            _pi[i] -= min_pot;
+                    }
+                }
+            }
+
+            return retVal;
+        }
+
+    }; //class NetworkSimplexSimple
+
+    ///@}
+
+} //namespace lemon
+
+#endif //LEMON_NETWORK_SIMPLEX_H