From 4a6883e0ce2fd9f3edd374d54c4c219d876ceb76 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Mon, 2 Dec 2019 09:37:54 +0100
Subject: nothing explodes and it compiles

---
 ot/lp/__init__.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

(limited to 'ot/lp/__init__.py')

diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 0c92810..4fec7d9 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -27,7 +27,7 @@ __all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
          'emd_1d', 'emd2_1d', 'wasserstein_1d']
 
 
-def emd(a, b, M, numItermax=100000, log=False):
+def emd(a, b, M, numItermax=100000, log=False, sparse=False):
     r"""Solves the Earth Movers distance problem and returns the OT matrix
 
 
@@ -109,7 +109,12 @@ def emd(a, b, M, numItermax=100000, log=False):
     if len(b) == 0:
         b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
 
-    G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
+    if sparse:
+        Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax,sparse)
+        G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+    else:
+        G, cost, u, v, result_code = emd_c(a, b, M, numItermax,sparse)
+
     result_code_string = check_result(result_code)
     if log:
         log = {}
@@ -123,7 +128,7 @@ def emd(a, b, M, numItermax=100000, log=False):
 
 
 def emd2(a, b, M, processes=multiprocessing.cpu_count(),
-         numItermax=100000, log=False, return_matrix=False):
+         numItermax=100000, log=False, sparse=False, return_matrix=False):
     r"""Solves the Earth Movers distance problem and returns the loss
 
     .. math::
@@ -214,19 +219,29 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
 
     if log or return_matrix:
         def f(b):
-            G, cost, u, v, resultCode = emd_c(a, b, M, numItermax)
-            result_code_string = check_result(resultCode)
+
+            if sparse:
+                Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax,sparse)
+                G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+            else:
+                G, cost, u, v, result_code = emd_c(a, b, M, numItermax,sparse)
+
+            result_code_string = check_result(result_code)
             log = {}
             if return_matrix:
                 log['G'] = G
             log['u'] = u
             log['v'] = v
             log['warning'] = result_code_string
-            log['result_code'] = resultCode
+            log['result_code'] = result_code
             return [cost, log]
     else:
         def f(b):
-            G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
+            if sparse:
+                Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax,sparse)
+                G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+            else:
+                G, cost, u, v, result_code = emd_c(a, b, M, numItermax,sparse)
             check_result(result_code)
             return cost
 
-- 
cgit v1.2.3


From a6a654de5e78dd388a793fbd26f60045b05d519c Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Mon, 2 Dec 2019 11:31:32 +0100
Subject: proper documentation and parameter

---
 ot/lp/EMD.h           |  2 +-
 ot/lp/EMD_wrapper.cpp |  3 ++-
 ot/lp/__init__.py     | 16 ++++++++++++++--
 ot/lp/emd_wrap.pyx    | 10 ++++++----
 test/test_ot.py       |  2 +-
 5 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'ot/lp/__init__.py')

diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h
index bc513d2..9896091 100644
--- a/ot/lp/EMD.h
+++ b/ot/lp/EMD.h
@@ -33,7 +33,7 @@ enum ProblemType {
 int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter);
 
 int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, 
-                    long *iG, long *jG, double *G,
+                    long *iG, long *jG, double *G, long * nG,
                     double* alpha, double* beta, double *cost, int maxIter);
 
 #endif
diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp
index 2aa44c1..9be2cdc 100644
--- a/ot/lp/EMD_wrapper.cpp
+++ b/ot/lp/EMD_wrapper.cpp
@@ -108,7 +108,7 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G,
 
 
 int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, 
-                    long *iG, long *jG, double *G,
+                    long *iG, long *jG, double *G, long * nG,
                     double* alpha, double* beta, double *cost, int maxIter)  {
     // beware M and C anre strored in row major C style!!!
 
@@ -202,6 +202,7 @@ int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D,
                 cur++;
             }
         }
+        *nG=cur; // nb of value +1 for numpy indexing
 
     }
 
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 4fec7d9..d476071 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -27,7 +27,7 @@ __all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
          'emd_1d', 'emd2_1d', 'wasserstein_1d']
 
 
-def emd(a, b, M, numItermax=100000, log=False, sparse=False):
+def emd(a, b, M, numItermax=100000, log=False, dense=True):
     r"""Solves the Earth Movers distance problem and returns the OT matrix
 
 
@@ -62,6 +62,10 @@ def emd(a, b, M, numItermax=100000, log=False, sparse=False):
     log: bool, optional (default=False)
         If True, returns a dictionary containing the cost and dual
         variables. Otherwise returns only the optimal transportation matrix.
+    dense: boolean, optional (default=True)
+        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
+        Otherwise returns a sparse representation using scipy's `coo_matrix`
+        format.
 
     Returns
     -------
@@ -103,6 +107,8 @@ def emd(a, b, M, numItermax=100000, log=False, sparse=False):
     b = np.asarray(b, dtype=np.float64)
     M = np.asarray(M, dtype=np.float64)
 
+    sparse= not dense
+
     # if empty array given then use uniform distributions
     if len(a) == 0:
         a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
@@ -128,7 +134,7 @@ def emd(a, b, M, numItermax=100000, log=False, sparse=False):
 
 
 def emd2(a, b, M, processes=multiprocessing.cpu_count(),
-         numItermax=100000, log=False, sparse=False, return_matrix=False):
+         numItermax=100000, log=False, dense=True, return_matrix=False):
     r"""Solves the Earth Movers distance problem and returns the loss
 
     .. math::
@@ -166,6 +172,10 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
         variables. Otherwise returns only the optimal transportation cost.
     return_matrix: boolean, optional (default=False)
         If True, returns the optimal transportation matrix in the log.
+    dense: boolean, optional (default=True)
+        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
+        Otherwise returns a sparse representation using scipy's `coo_matrix`
+        format.       
 
     Returns
     -------
@@ -207,6 +217,8 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     b = np.asarray(b, dtype=np.float64)
     M = np.asarray(M, dtype=np.float64)
 
+    sparse=not dense
+
     # problem with pikling Forks
     if sys.platform.endswith('win32'):
         processes=1
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index f183995..4b6cdce 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -21,7 +21,7 @@ import warnings
 cdef extern from "EMD.h":
     int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter)
     int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, 
-                    long *iG, long *jG, double *G,
+                    long *iG, long *jG, double *G, long * nG,
                     double* alpha, double* beta, double *cost, int maxIter)
     cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED
 
@@ -75,7 +75,8 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
     max_iter : int
         The maximum number of iterations before stopping the optimization
         algorithm if it has not converged.
-
+    sparse : bool
+        Returning a sparse transport matrix if set to True
 
     Returns
     -------
@@ -87,6 +88,7 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
     cdef int n2= M.shape[1]
     cdef int nmax=n1+n2-1
     cdef int result_code = 0
+    cdef int nG=0
 
     cdef double cost=0
     cdef np.ndarray[double, ndim=1, mode="c"] alpha=np.zeros(n1)
@@ -111,10 +113,10 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
         jG=np.zeros(nmax,dtype=np.int)
 
 
-        result_code = EMD_wrap_return_sparse(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <long*> iG.data, <long*> jG.data, <double*> Gv.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
+        result_code = EMD_wrap_return_sparse(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <long*> iG.data, <long*> jG.data, <double*> Gv.data, <long*> &nG, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
 
 
-        return Gv, iG, jG, cost, alpha, beta, result_code
+        return Gv[:nG], iG[:nG], jG[:nG], cost, alpha, beta, result_code
 
 
     else:
diff --git a/test/test_ot.py b/test/test_ot.py
index 4d59e12..7b44fd1 100644
--- a/test/test_ot.py
+++ b/test/test_ot.py
@@ -131,7 +131,7 @@ def test_emd_sparse():
 
     G = ot.emd([], [], M)
 
-    Gs = ot.emd([], [], M, sparse=True)
+    Gs = ot.emd([], [], M, dense=False)
 
     # check G is the same
     np.testing.assert_allclose(G, Gs.todense())
-- 
cgit v1.2.3


From 92233f79e098f1930248d815e66c0a929508af59 Mon Sep 17 00:00:00 2001
From: Kilian <kilian.fatras@gmail.com>
Date: Mon, 9 Dec 2019 15:56:48 +0100
Subject: add assert for emd dimension mismatch

---
 ot/lp/__init__.py |  6 ++++++
 test/test_ot.py   | 16 ++++++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'ot/lp/__init__.py')

diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 0c92810..f77c3d7 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -109,6 +109,9 @@ def emd(a, b, M, numItermax=100000, log=False):
     if len(b) == 0:
         b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
 
+    assert (a.shape[0] == M.shape[0] or b.shape[0] == M.shape[1]), \
+        "Dimension mismatch, check dimensions of M with a and b"
+
     G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
     result_code_string = check_result(result_code)
     if log:
@@ -212,6 +215,9 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     if len(b) == 0:
         b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
 
+    assert (a.shape[0] == M.shape[0] or b.shape[0] == M.shape[1]), \
+        "Dimension mismatch, check dimensions of M with a and b"
+
     if log or return_matrix:
         def f(b):
             G, cost, u, v, resultCode = emd_c(a, b, M, numItermax)
diff --git a/test/test_ot.py b/test/test_ot.py
index dacae0a..1343604 100644
--- a/test/test_ot.py
+++ b/test/test_ot.py
@@ -14,6 +14,22 @@ from ot.datasets import make_1D_gauss as gauss
 import pytest
 
 
+def test_emd_dimension_mismatch():
+    # test emd and emd2 for simple identity
+    n_samples = 100
+    n_features = 2
+    rng = np.random.RandomState(0)
+
+    x = rng.randn(n_samples, n_features)
+    a = ot.utils.unif(n_samples + 1)
+
+    M = ot.dist(x, x)
+
+    np.testing.assert_raises(AssertionError, emd, a, a, M)
+
+    np.testing.assert_raises(AssertionError, emd2, a, a, M)
+
+
 def test_emd_emd2():
     # test emd and emd2 for simple identity
     n = 100
-- 
cgit v1.2.3


From a9bbc2cfdffd22ceee3256102e470df6c25338f3 Mon Sep 17 00:00:00 2001
From: Kilian <kilian.fatras@gmail.com>
Date: Tue, 10 Dec 2019 11:23:50 +0100
Subject: change or in assert by and

---
 ot/lp/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'ot/lp/__init__.py')

diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index f77c3d7..4cce41c 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -109,7 +109,7 @@ def emd(a, b, M, numItermax=100000, log=False):
     if len(b) == 0:
         b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
 
-    assert (a.shape[0] == M.shape[0] or b.shape[0] == M.shape[1]), \
+    assert (a.shape[0] == M.shape[0] and b.shape[0] == M.shape[1]), \
         "Dimension mismatch, check dimensions of M with a and b"
 
     G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
@@ -215,7 +215,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     if len(b) == 0:
         b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
 
-    assert (a.shape[0] == M.shape[0] or b.shape[0] == M.shape[1]), \
+    assert (a.shape[0] == M.shape[0] and b.shape[0] == M.shape[1]), \
         "Dimension mismatch, check dimensions of M with a and b"
 
     if log or return_matrix:
-- 
cgit v1.2.3


From 3cb03158c42dde141d6f33973ea6e3394b9dc3d4 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Wed, 18 Dec 2019 10:15:30 +0100
Subject: cleanup variable name dense

---
 ot/lp/__init__.py  | 30 ++++++++++++++----------------
 ot/lp/emd_wrap.pyx | 26 +++++++++++++-------------
 2 files changed, 27 insertions(+), 29 deletions(-)

(limited to 'ot/lp/__init__.py')

diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index d476071..bb9829a 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -107,7 +107,6 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True):
     b = np.asarray(b, dtype=np.float64)
     M = np.asarray(M, dtype=np.float64)
 
-    sparse= not dense
 
     # if empty array given then use uniform distributions
     if len(a) == 0:
@@ -115,11 +114,11 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True):
     if len(b) == 0:
         b = np.ones((M.shape[1],), dtype=np.float64) / M.shape[1]
 
-    if sparse:
-        Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax,sparse)
-        G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+    if dense:
+        G, cost, u, v, result_code = emd_c(a, b, M, numItermax,dense)
     else:
-        G, cost, u, v, result_code = emd_c(a, b, M, numItermax,sparse)
+        Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax,dense)
+        G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))        
 
     result_code_string = check_result(result_code)
     if log:
@@ -217,8 +216,6 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     b = np.asarray(b, dtype=np.float64)
     M = np.asarray(M, dtype=np.float64)
 
-    sparse=not dense
-
     # problem with pikling Forks
     if sys.platform.endswith('win32'):
         processes=1
@@ -231,12 +228,11 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
 
     if log or return_matrix:
         def f(b):
-
-            if sparse:
-                Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax,sparse)
-                G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+            if dense:
+                G, cost, u, v, result_code = emd_c(a, b, M, numItermax,dense)
             else:
-                G, cost, u, v, result_code = emd_c(a, b, M, numItermax,sparse)
+                Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax,dense)
+                G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))                
 
             result_code_string = check_result(result_code)
             log = {}
@@ -249,11 +245,13 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
             return [cost, log]
     else:
         def f(b):
-            if sparse:
-                Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax,sparse)
-                G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+            if dense:
+                G, cost, u, v, result_code = emd_c(a, b, M, numItermax,dense)
             else:
-                G, cost, u, v, result_code = emd_c(a, b, M, numItermax,sparse)
+                Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax,dense)
+                G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))                
+
+            result_code_string = check_result(result_code)
             check_result(result_code)
             return cost
 
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index 4e3586d..636a9e3 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -46,7 +46,7 @@ def check_result(result_code):
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"]  b, np.ndarray[double, ndim=2, mode="c"]  M, int max_iter, bint sparse):
+def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"]  b, np.ndarray[double, ndim=2, mode="c"]  M, int max_iter, bint dense):
     """
         Solves the Earth Movers distance problem and returns the optimal transport matrix
 
@@ -110,8 +110,19 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
     if not len(b):
         b=np.ones((n2,))/n2
 
-    if sparse:
+    if dense:
+        # init OT matrix
+        G=np.zeros([n1, n2])
+
+        # calling the function
+        result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
+
+        return G, cost, alpha, beta, result_code
+
 
+    else:
+        
+        # init sparse OT matrix
         Gv=np.zeros(nmax)
         iG=np.zeros(nmax,dtype=np.int)
         jG=np.zeros(nmax,dtype=np.int)
@@ -123,17 +134,6 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
         return Gv[:nG], iG[:nG], jG[:nG], cost, alpha, beta, result_code
 
 
-    else:
-
-
-        G=np.zeros([n1, n2])
-
-
-        # calling the function
-        result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
-
-        return G, cost, alpha, beta, result_code
-
 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-- 
cgit v1.2.3


From e5196fa7a8c493b831fd5dac52a89bbf29e7b0e6 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Mon, 27 Jan 2020 10:40:05 +0100
Subject: correct bug in emd emd2 still todo

---
 ot/lp/__init__.py  | 194 +++++++++++++++++++++++++++++++++++++++++++++++------
 ot/lp/emd_wrap.pyx |   2 +
 2 files changed, 174 insertions(+), 22 deletions(-)

(limited to 'ot/lp/__init__.py')

diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index eabdd3a..a771ce4 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -23,11 +23,150 @@ from ..utils import parmap
 from .cvx import barycenter
 from ..utils import dist
 
-__all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
-         'emd_1d', 'emd2_1d', 'wasserstein_1d']
+__all__ = ['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
+           'emd_1d', 'emd2_1d', 'wasserstein_1d']
 
 
-def emd(a, b, M, numItermax=100000, log=False, dense=True):
+def center_ot_dual(alpha0, beta0, a=None, b=None):
+    r"""Center dual OT potentials wrt theirs weights
+
+    The main idea of this function is to find unique dual potentials
+    that ensure some kind of centering/fairness. It will help have
+    stability when multiple calling of the OT solver with small changes.
+
+    Basically we add another constraint to the potential that will not
+    change the objective value but will ensure unicity. The constraint
+    is the following:
+
+    .. math::
+        \alpha^T a= \beta^T b
+
+    in addition to the OT problem constraints.
+
+    since :math:`\sum_i a_i=\sum_j b_j` this can be solved by adding/removing
+    a constant from both  :math:`\alpha_0` and :math:`\beta_0`.
+
+    .. math::
+        c=\frac{\beta0^T b-\alpha_0^T a}{1^Tb+1^Ta}
+
+        \alpha=\alpha_0+c
+
+        \beta=\beta0+c
+
+    Parameters
+    ----------
+    alpha0 : (ns,) numpy.ndarray, float64
+        Source dual potential
+    beta0 : (nt,) numpy.ndarray, float64
+        Target dual potential
+    a : (ns,) numpy.ndarray, float64
+        Source histogram (uniform weight if empty list)
+    b : (nt,) numpy.ndarray, float64
+        Target histogram (uniform weight if empty list)
+
+    Returns
+    -------
+    alpha : (ns,) numpy.ndarray, float64
+        Source centered dual potential
+    beta : (nt,) numpy.ndarray, float64
+        Target centered dual potential
+
+    """
+    # if no weights are provided, use uniform
+    if a is None:
+        a = np.ones(alpha0.shape[0]) / alpha0.shape[0]
+    if b is None:
+        b = np.ones(beta0.shape[0]) / beta0.shape[0]
+
+    # compute constant that balances the weighted sums of the duals
+    c = (b.dot(beta0) - a.dot(alpha0)) / (a.sum() + b.sum())
+
+    # update duals
+    alpha = alpha0 + c
+    beta = beta0 - c
+
+    return alpha, beta
+
+
+def estimate_dual_null_weights(alpha0, beta0, a, b, M):
+    r"""Estimate feasible values for 0-weighted dual potentials
+
+    The feasible values are computed efficiently bjt rather coarsely.
+    First we compute the constraints violations:
+
+    .. math::
+        V=\alpha+\beta^T-M
+
+    Next we compute the max amount of violation per row (alpha) and
+    columns (beta)
+
+    .. math::
+        v^a_i=\max_j V_{i,j}
+
+        v^b_j=\max_i V_{i,j}
+
+    Finally we update the dual potential with 0 weights if a
+    constraint is violated
+
+    .. math::
+        \alpha_i = \alpha_i -v^a_i \quad \text{ if } a_i=0 \text{ and } v^a_i>0
+
+        \beta_j = \beta_j -v^b_j \quad \text{ if } b_j=0 \text{ and } v^b_j>0
+
+    In the end the dual potential are centred using function
+    :ref:`center_ot_dual`.
+
+    Note that all those updates do not change the objective value of the
+    solution but provide dual potential that do not violate the constraints.
+
+    Parameters
+    ----------
+    alpha0 : (ns,) numpy.ndarray, float64
+        Source dual potential
+    beta0 : (nt,) numpy.ndarray, float64
+        Target dual potential
+    alpha0 : (ns,) numpy.ndarray, float64
+        Source dual potential
+    beta0 : (nt,) numpy.ndarray, float64
+        Target dual potential
+    a : (ns,) numpy.ndarray, float64
+        Source histogram (uniform weight if empty list)
+    b : (nt,) numpy.ndarray, float64
+        Target histogram (uniform weight if empty list)
+    M : (ns,nt) numpy.ndarray, float64
+        Loss matrix (c-order array with type float64)
+
+    Returns
+    -------
+    alpha : (ns,) numpy.ndarray, float64
+        Source corrected dual potential
+    beta : (nt,) numpy.ndarray, float64
+        Target corrected dual potential
+
+    """
+
+    # binary indexing of non-zeros weights
+    asel = a != 0
+    bsel = b != 0
+
+    # compute dual constraints violation
+    Viol = alpha0[:, None] + beta0[None, :] - M
+
+    # Compute worst violation per line and columns
+    aviol = np.max(Viol, 1)
+    bviol = np.max(Viol, 0)
+
+    # update corrects violation of
+    alpha_up = -1 * ~asel * np.maximum(aviol, 0)
+    beta_up = -1 * ~bsel * np.maximum(bviol, 0)
+
+    alpha = alpha0 + alpha_up
+    beta = beta0 + beta_up
+
+    return center_ot_dual(alpha, beta, a, b)
+
+
+def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
     r"""Solves the Earth Movers distance problem and returns the OT matrix
 
 
@@ -43,7 +182,7 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True):
     - a and b are the sample weights
 
     .. warning::
-        Note that the M matrix needs to be a C-order numpy.array in float64 
+        Note that the M matrix needs to be a C-order numpy.array in float64
         format.
 
     Uses the algorithm proposed in [1]_
@@ -66,6 +205,9 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True):
         If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
         Otherwise returns a sparse representation using scipy's `coo_matrix`
         format.
+    center_dual: boolean, optional (default=True)
+        If True, centers the dual potential using function
+        :ref:`center_ot_dual`.
 
     Returns
     -------
@@ -107,7 +249,6 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True):
     b = np.asarray(b, dtype=np.float64)
     M = np.asarray(M, dtype=np.float64)
 
-
     # if empty array given then use uniform distributions
     if len(a) == 0:
         a = np.ones((M.shape[0],), dtype=np.float64) / M.shape[0]
@@ -117,11 +258,21 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True):
     assert (a.shape[0] == M.shape[0] and b.shape[0] == M.shape[1]), \
         "Dimension mismatch, check dimensions of M with a and b"
 
+    asel = a != 0
+    bsel = b != 0
+
     if dense:
-        G, cost, u, v, result_code = emd_c(a, b, M, numItermax,dense)
+        G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
+
+        if np.any(~asel) or np.any(~bsel):
+            u, v = estimate_dual_null_weights(u, v, a, b, M)
+
     else:
-        Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax,dense)
-        G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))        
+        Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
+        G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+
+        if np.any(~asel) or np.any(~bsel):
+            u, v = estimate_dual_null_weights(u, v, a, b, M)
 
     result_code_string = check_result(result_code)
     if log:
@@ -151,7 +302,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     - a and b are the sample weights
 
     .. warning::
-        Note that the M matrix needs to be a C-order numpy.array in float64 
+        Note that the M matrix needs to be a C-order numpy.array in float64
         format.
 
     Uses the algorithm proposed in [1]_
@@ -177,7 +328,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     dense: boolean, optional (default=True)
         If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
         Otherwise returns a sparse representation using scipy's `coo_matrix`
-        format.       
+        format.
 
     Returns
     -------
@@ -221,7 +372,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
 
     # problem with pikling Forks
     if sys.platform.endswith('win32'):
-        processes=1
+        processes = 1
 
     # if empty array given then use uniform distributions
     if len(a) == 0:
@@ -235,10 +386,10 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     if log or return_matrix:
         def f(b):
             if dense:
-                G, cost, u, v, result_code = emd_c(a, b, M, numItermax,dense)
+                G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
             else:
-                Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax,dense)
-                G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))                
+                Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
+                G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
 
             result_code_string = check_result(result_code)
             log = {}
@@ -252,10 +403,10 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     else:
         def f(b):
             if dense:
-                G, cost, u, v, result_code = emd_c(a, b, M, numItermax,dense)
+                G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
             else:
-                Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax,dense)
-                G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))                
+                Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
+                G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
 
             result_code_string = check_result(result_code)
             check_result(result_code)
@@ -265,7 +416,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
         return f(b)
     nb = b.shape[1]
 
-    if processes>1:
+    if processes > 1:
         res = parmap(f, [b[:, i] for i in range(nb)], processes)
     else:
         res = list(map(f, [b[:, i].copy() for i in range(nb)]))
@@ -273,7 +424,6 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     return res
 
 
-
 def free_support_barycenter(measures_locations, measures_weights, X_init, b=None, weights=None, numItermax=100, stopThr=1e-7, verbose=False, log=None):
     """
     Solves the free support (locations of the barycenters are optimized, not the weights) Wasserstein barycenter problem (i.e. the weighted Frechet mean for the 2-Wasserstein distance)
@@ -326,7 +476,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
     k = X_init.shape[0]
     d = X_init.shape[1]
     if b is None:
-        b = np.ones((k,))/k
+        b = np.ones((k,)) / k
     if weights is None:
         weights = np.ones((N,)) / N
 
@@ -337,7 +487,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
 
     displacement_square_norm = stopThr + 1.
 
-    while ( displacement_square_norm > stopThr and iter_count < numItermax ):
+    while (displacement_square_norm > stopThr and iter_count < numItermax):
 
         T_sum = np.zeros((k, d))
 
@@ -347,7 +497,7 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
             T_i = emd(b, measure_weights_i, M_i)
             T_sum = T_sum + weight_i * np.reshape(1. / b, (-1, 1)) * np.matmul(T_i, measure_locations_i)
 
-        displacement_square_norm = np.sum(np.square(T_sum-X))
+        displacement_square_norm = np.sum(np.square(T_sum - X))
         if log:
             displacement_square_norms.append(displacement_square_norm)
 
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index c0d7128..a4987f4 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -40,6 +40,8 @@ def check_result(result_code):
     return message
 
 
+
+
 @cython.boundscheck(False)
 @cython.wraparound(False)
 def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"]  b, np.ndarray[double, ndim=2, mode="c"]  M, int max_iter, bint dense):
-- 
cgit v1.2.3


From 9a9b3547837eac56349ce8df92bb5b0565daa2d6 Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Mon, 27 Jan 2020 10:59:58 +0100
Subject: correct emd2 and add centering for dual potentials

---
 ot/lp/__init__.py | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

(limited to 'ot/lp/__init__.py')

diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index a771ce4..aa3166f 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -264,6 +264,9 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
     if dense:
         G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
 
+        if center_dual:
+            u, v = center_ot_dual(u, v, a, b)
+
         if np.any(~asel) or np.any(~bsel):
             u, v = estimate_dual_null_weights(u, v, a, b, M)
 
@@ -271,6 +274,9 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
         Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
         G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
 
+        if center_dual:
+            u, v = center_ot_dual(u, v, a, b)
+
         if np.any(~asel) or np.any(~bsel):
             u, v = estimate_dual_null_weights(u, v, a, b, M)
 
@@ -287,7 +293,8 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
 
 
 def emd2(a, b, M, processes=multiprocessing.cpu_count(),
-         numItermax=100000, log=False, dense=True, return_matrix=False):
+         numItermax=100000, log=False, dense=True, return_matrix=False,
+         center_dual=True):
     r"""Solves the Earth Movers distance problem and returns the loss
 
     .. math::
@@ -329,6 +336,9 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
         If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
         Otherwise returns a sparse representation using scipy's `coo_matrix`
         format.
+    center_dual: boolean, optional (default=True)
+        If True, centers the dual potential using function
+        :ref:`center_ot_dual`.
 
     Returns
     -------
@@ -383,14 +393,23 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     assert (a.shape[0] == M.shape[0] and b.shape[0] == M.shape[1]), \
         "Dimension mismatch, check dimensions of M with a and b"
 
+    asel = a != 0
+
     if log or return_matrix:
         def f(b):
+            bsel = b != 0
             if dense:
                 G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
             else:
                 Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
                 G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
 
+            if center_dual:
+                u, v = center_ot_dual(u, v, a, b)
+
+            if np.any(~asel) or np.any(~bsel):
+                u, v = estimate_dual_null_weights(u, v, a, b, M)
+
             result_code_string = check_result(result_code)
             log = {}
             if return_matrix:
@@ -402,12 +421,19 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
             return [cost, log]
     else:
         def f(b):
+            bsel = b != 0
             if dense:
                 G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
             else:
                 Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
                 G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
 
+            if center_dual:
+                u, v = center_ot_dual(u, v, a, b)
+
+            if np.any(~asel) or np.any(~bsel):
+                u, v = estimate_dual_null_weights(u, v, a, b, M)
+
             result_code_string = check_result(result_code)
             check_result(result_code)
             return cost
-- 
cgit v1.2.3


From f65073faa73b36280a19ff8b9c383e66f8bdbd2b Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Thu, 30 Jan 2020 08:04:36 +0100
Subject: comlete documentation

---
 ot/lp/__init__.py  | 30 +++++++++++++++++++-----------
 ot/lp/emd_wrap.pyx |  6 ++++++
 test/test_ot.py    |  4 ++--
 3 files changed, 27 insertions(+), 13 deletions(-)

(limited to 'ot/lp/__init__.py')

diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index aa3166f..cdd505d 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -28,10 +28,10 @@ __all__ = ['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
 
 
 def center_ot_dual(alpha0, beta0, a=None, b=None):
-    r"""Center dual OT potentials wrt theirs weights
+    r"""Center dual OT potentials w.r.t. theirs weights
 
     The main idea of this function is to find unique dual potentials
-    that ensure some kind of centering/fairness. It will help have
+    that ensure some kind of centering/fairness. The main idea is to find dual potentials that lead to the same final objective value for both source and targets (see below for more details). It will help having
     stability when multiple calling of the OT solver with small changes.
 
     Basically we add another constraint to the potential that will not
@@ -91,7 +91,15 @@ def center_ot_dual(alpha0, beta0, a=None, b=None):
 def estimate_dual_null_weights(alpha0, beta0, a, b, M):
     r"""Estimate feasible values for 0-weighted dual potentials
 
-    The feasible values are computed efficiently bjt rather coarsely.
+    The feasible values are computed efficiently but rather coarsely.
+
+    .. warning::
+        This function is necessary because the C++ solver in emd_c
+        discards all samples in the distributions with 
+        zeros weights. This means that while the primal variable (transport 
+        matrix) is exact, the solver only returns feasible dual potentials
+        on the samples with weights different from zero. 
+
     First we compute the constraints violations:
 
     .. math::
@@ -113,11 +121,11 @@ def estimate_dual_null_weights(alpha0, beta0, a, b, M):
 
         \beta_j = \beta_j -v^b_j \quad \text{ if } b_j=0 \text{ and } v^b_j>0
 
-    In the end the dual potential are centred using function
+    In the end the dual potentials are centered using function
     :ref:`center_ot_dual`.
 
     Note that all those updates do not change the objective value of the
-    solution but provide dual potential that do not violate the constraints.
+    solution but provide dual potentials that do not violate the constraints.
 
     Parameters
     ----------
@@ -130,9 +138,9 @@ def estimate_dual_null_weights(alpha0, beta0, a, b, M):
     beta0 : (nt,) numpy.ndarray, float64
         Target dual potential
     a : (ns,) numpy.ndarray, float64
-        Source histogram (uniform weight if empty list)
+        Source distribution (uniform weights if empty list)
     b : (nt,) numpy.ndarray, float64
-        Target histogram (uniform weight if empty list)
+        Target distribution (uniform weights if empty list)
     M : (ns,nt) numpy.ndarray, float64
         Loss matrix (c-order array with type float64)
 
@@ -150,11 +158,11 @@ def estimate_dual_null_weights(alpha0, beta0, a, b, M):
     bsel = b != 0
 
     # compute dual constraints violation
-    Viol = alpha0[:, None] + beta0[None, :] - M
+    constraint_violation = alpha0[:, None] + beta0[None, :] - M
 
-    # Compute worst violation per line and columns
-    aviol = np.max(Viol, 1)
-    bviol = np.max(Viol, 0)
+    # Compute largest violation per line and columns
+    aviol = np.max(constraint_violation, 1)
+    bviol = np.max(constraint_violation, 0)
 
     # update corrects violation of
     alpha_up = -1 * ~asel * np.maximum(aviol, 0)
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index a4987f4..d345fd4 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -66,6 +66,12 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
     .. warning::
         Note that the M matrix needs to be a C-order :py.cls:`numpy.array`
 
+    .. warning::
+        The C++ solver discards all samples in the distributions with 
+        zeros weights. This means that while the primal variable (transport 
+        matrix) is exact, the solver only returns feasible dual potentials
+        on the samples with weights different from zero. 
+
     Parameters
     ----------
     a : (ns,) numpy.ndarray, float64
diff --git a/test/test_ot.py b/test/test_ot.py
index 245a107..47df946 100644
--- a/test/test_ot.py
+++ b/test/test_ot.py
@@ -338,9 +338,9 @@ def test_dual_variables():
     np.testing.assert_almost_equal(cost1, log['cost'])
     check_duality_gap(a, b, M, G, log['u'], log['v'], log['cost'])
 
-    viol = log['u'][:, None] + log['v'][None, :] - M
+    constraint_violation = log['u'][:, None] + log['v'][None, :] - M
 
-    assert viol.max() < 1e-8
+    assert constraint_violation.max() < 1e-8
 
 
 def check_duality_gap(a, b, M, G, u, v, cost):
-- 
cgit v1.2.3


From 592f933085d5b521a440eb91eccc283c43732170 Mon Sep 17 00:00:00 2001
From: AdrienCorenflos <adrien.corenflos@gmail.com>
Date: Wed, 1 Apr 2020 12:14:42 +0100
Subject: Fix ordering

---
 ot/lp/__init__.py |  2 +-
 test/test_ot.py   | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+), 1 deletion(-)

(limited to 'ot/lp/__init__.py')

diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index cdd505d..4c968ca 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -656,7 +656,7 @@ def emd_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
     perm_a = np.argsort(x_a_1d)
     perm_b = np.argsort(x_b_1d)
 
-    G_sorted, indices, cost = emd_1d_sorted(a, b,
+    G_sorted, indices, cost = emd_1d_sorted(a[perm_a.flatten()], b[perm_b.flatten()],
                                             x_a_1d[perm_a], x_b_1d[perm_b],
                                             metric=metric, p=p)
     G = coo_matrix((G_sorted, (perm_a[indices[:, 0]], perm_b[indices[:, 1]])),
diff --git a/test/test_ot.py b/test/test_ot.py
index 47df946..7afdae3 100644
--- a/test/test_ot.py
+++ b/test/test_ot.py
@@ -91,6 +91,44 @@ def test_emd_1d_emd2_1d():
     with pytest.raises(AssertionError):
         ot.emd_1d(u, v, [], [])
 
+def test_emd_1d_emd2_1d_with_weights():
+
+    # test emd1d gives similar results as emd
+    n = 20
+    m = 30
+    rng = np.random.RandomState(0)
+    u = rng.randn(n, 1)
+    v = rng.randn(m, 1)
+
+    w_u = rng.uniform(0., 1., n)
+    w_u = w_u / w_u.sum()
+
+    w_v = rng.uniform(0., 1., m)
+    w_v = w_v / w_v.sum()
+
+    M = ot.dist(u, v, metric='sqeuclidean')
+
+    G, log = ot.emd(w_u, w_v, M, log=True)
+    wass = log["cost"]
+    G_1d, log = ot.emd_1d(u, v, w_u, w_v, metric='sqeuclidean', log=True)
+    wass1d = log["cost"]
+    wass1d_emd2 = ot.emd2_1d(u, v, w_u, w_v, metric='sqeuclidean', log=False)
+    wass1d_euc = ot.emd2_1d(u, v, w_u, w_v, metric='euclidean', log=False)
+
+    # check loss is similar
+    np.testing.assert_allclose(wass, wass1d)
+    np.testing.assert_allclose(wass, wass1d_emd2)
+
+    # check loss is similar to scipy's implementation for Euclidean metric
+    wass_sp = wasserstein_distance(u.reshape((-1,)), v.reshape((-1,)))
+    np.testing.assert_allclose(wass_sp, wass1d_euc)
+
+    # check constraints
+    np.testing.assert_allclose(w_u, G.sum(1))
+    np.testing.assert_allclose(w_v, G.sum(0))
+
+
+
 
 def test_wass_1d():
     # test emd1d gives similar results as emd
-- 
cgit v1.2.3


From 60943d00bab1682d6fac22b1e1ba5e64569b4e78 Mon Sep 17 00:00:00 2001
From: AdrienCorenflos <adrien.corenflos@gmail.com>
Date: Thu, 2 Apr 2020 10:41:24 +0100
Subject: Auto PEP8

---
 ot/lp/__init__.py | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

(limited to 'ot/lp/__init__.py')

diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 4c968ca..1922785 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -12,16 +12,16 @@ Solvers for the original linear program OT problem
 
 import multiprocessing
 import sys
+
 import numpy as np
 from scipy.sparse import coo_matrix
 
-from .import cvx
-
+from . import cvx
+from .cvx import barycenter
 # import compiled emd
 from .emd_wrap import emd_c, check_result, emd_1d_sorted
-from ..utils import parmap
-from .cvx import barycenter
 from ..utils import dist
+from ..utils import parmap
 
 __all__ = ['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
            'emd_1d', 'emd2_1d', 'wasserstein_1d']
@@ -458,7 +458,8 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     return res
 
 
-def free_support_barycenter(measures_locations, measures_weights, X_init, b=None, weights=None, numItermax=100, stopThr=1e-7, verbose=False, log=None):
+def free_support_barycenter(measures_locations, measures_weights, X_init, b=None, weights=None, numItermax=100,
+                            stopThr=1e-7, verbose=False, log=None):
     """
     Solves the free support (locations of the barycenters are optimized, not the weights) Wasserstein barycenter problem (i.e. the weighted Frechet mean for the 2-Wasserstein distance)
 
@@ -525,8 +526,8 @@ def free_support_barycenter(measures_locations, measures_weights, X_init, b=None
 
         T_sum = np.zeros((k, d))
 
-        for (measure_locations_i, measure_weights_i, weight_i) in zip(measures_locations, measures_weights, weights.tolist()):
-
+        for (measure_locations_i, measure_weights_i, weight_i) in zip(measures_locations, measures_weights,
+                                                                      weights.tolist()):
             M_i = dist(X, measure_locations_i)
             T_i = emd(b, measure_weights_i, M_i)
             T_sum = T_sum + weight_i * np.reshape(1. / b, (-1, 1)) * np.matmul(T_i, measure_locations_i)
@@ -651,8 +652,8 @@ def emd_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
     if b.ndim == 0 or len(b) == 0:
         b = np.ones((x_b.shape[0],), dtype=np.float64) / x_b.shape[0]
 
-    x_a_1d = x_a.reshape((-1, ))
-    x_b_1d = x_b.reshape((-1, ))
+    x_a_1d = x_a.reshape((-1,))
+    x_b_1d = x_b.reshape((-1,))
     perm_a = np.argsort(x_a_1d)
     perm_b = np.argsort(x_b_1d)
 
-- 
cgit v1.2.3


From a9e69509412338920142c0615a50bc00739144d0 Mon Sep 17 00:00:00 2001
From: AdrienCorenflos <adrien.corenflos@gmail.com>
Date: Thu, 2 Apr 2020 11:11:16 +0100
Subject: Remove flatten, it's not useful.

---
 ot/lp/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'ot/lp/__init__.py')

diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 1922785..f4f6861 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -657,7 +657,7 @@ def emd_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
     perm_a = np.argsort(x_a_1d)
     perm_b = np.argsort(x_b_1d)
 
-    G_sorted, indices, cost = emd_1d_sorted(a[perm_a.flatten()], b[perm_b.flatten()],
+    G_sorted, indices, cost = emd_1d_sorted(a[perm_a], b[perm_b],
                                             x_a_1d[perm_a], x_b_1d[perm_b],
                                             metric=metric, p=p)
     G = coo_matrix((G_sorted, (perm_a[indices[:, 0]], perm_b[indices[:, 1]])),
-- 
cgit v1.2.3


From 9200af5d795517b0772c10bb3d16022dd1a12791 Mon Sep 17 00:00:00 2001
From: ievred <ievgen.redko@univ-st-etienne.fr>
Date: Thu, 2 Apr 2020 15:29:12 +0200
Subject: laplace v1

---
 ot/bregman.py     | 72 +++++++++++++++++++++++++++++++++----------------------
 ot/datasets.py    |  4 ++--
 ot/lp/__init__.py |  4 +---
 ot/plot.py        |  3 ++-
 4 files changed, 49 insertions(+), 34 deletions(-)

(limited to 'ot/lp/__init__.py')

diff --git a/ot/bregman.py b/ot/bregman.py
index fb959e9..951d3ce 100644
--- a/ot/bregman.py
+++ b/ot/bregman.py
@@ -19,6 +19,7 @@ import warnings
 from .utils import unif, dist
 from scipy.optimize import fmin_l_bfgs_b
 
+
 def sinkhorn(a, b, M, reg, method='sinkhorn', numItermax=1000,
              stopThr=1e-9, verbose=False, log=False, **kwargs):
     r"""
@@ -539,12 +540,12 @@ def greenkhorn(a, b, M, reg, numItermax=10000, stopThr=1e-9, verbose=False,
             old_v = v[i_2]
             v[i_2] = b[i_2] / (K[:, i_2].T.dot(u))
             G[:, i_2] = u * K[:, i_2] * v[i_2]
-            #aviol = (G@one_m - a)
-            #aviol_2 = (G.T@one_n - b)
+            # aviol = (G@one_m - a)
+            # aviol_2 = (G.T@one_n - b)
             viol += (-old_v + v[i_2]) * K[:, i_2] * u
             viol_2[i_2] = v[i_2] * K[:, i_2].dot(u) - b[i_2]
 
-            #print('b',np.max(abs(aviol -viol)),np.max(abs(aviol_2 - viol_2)))
+            # print('b',np.max(abs(aviol -viol)),np.max(abs(aviol_2 - viol_2)))
 
         if stopThr_val <= stopThr:
             break
@@ -715,7 +716,7 @@ def sinkhorn_stabilized(a, b, M, reg, numItermax=1000, tau=1e3, stopThr=1e-9,
         if np.abs(u).max() > tau or np.abs(v).max() > tau:
             if n_hists:
                 alpha, beta = alpha + reg * \
-                    np.max(np.log(u), 1), beta + reg * np.max(np.log(v))
+                              np.max(np.log(u), 1), beta + reg * np.max(np.log(v))
             else:
                 alpha, beta = alpha + reg * np.log(u), beta + reg * np.log(v)
                 if n_hists:
@@ -940,7 +941,7 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4,
             # the 10th iterations
             transp = G
             err = np.linalg.norm(
-                (np.sum(transp, axis=0) - b))**2 + np.linalg.norm((np.sum(transp, axis=1) - a))**2
+                (np.sum(transp, axis=0) - b)) ** 2 + np.linalg.norm((np.sum(transp, axis=1) - a)) ** 2
             if log:
                 log['err'].append(err)
 
@@ -966,7 +967,7 @@ def sinkhorn_epsilon_scaling(a, b, M, reg, numItermax=100, epsilon0=1e4,
 
 def geometricBar(weights, alldistribT):
     """return the weighted geometric mean of distributions"""
-    assert(len(weights) == alldistribT.shape[1])
+    assert (len(weights) == alldistribT.shape[1])
     return np.exp(np.dot(np.log(alldistribT), weights.T))
 
 
@@ -1108,7 +1109,7 @@ def barycenter_sinkhorn(A, M, reg, weights=None, numItermax=1000,
     if weights is None:
         weights = np.ones(A.shape[1]) / A.shape[1]
     else:
-        assert(len(weights) == A.shape[1])
+        assert (len(weights) == A.shape[1])
 
     if log:
         log = {'err': []}
@@ -1206,7 +1207,7 @@ def barycenter_stabilized(A, M, reg, tau=1e10, weights=None, numItermax=1000,
     if weights is None:
         weights = np.ones(n_hists) / n_hists
     else:
-        assert(len(weights) == A.shape[1])
+        assert (len(weights) == A.shape[1])
 
     if log:
         log = {'err': []}
@@ -1334,7 +1335,7 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000,
     if weights is None:
         weights = np.ones(A.shape[0]) / A.shape[0]
     else:
-        assert(len(weights) == A.shape[0])
+        assert (len(weights) == A.shape[0])
 
     if log:
         log = {'err': []}
@@ -1350,11 +1351,11 @@ def convolutional_barycenter2d(A, reg, weights=None, numItermax=10000,
     # this is equivalent to blurring on horizontal then vertical directions
     t = np.linspace(0, 1, A.shape[1])
     [Y, X] = np.meshgrid(t, t)
-    xi1 = np.exp(-(X - Y)**2 / reg)
+    xi1 = np.exp(-(X - Y) ** 2 / reg)
 
     t = np.linspace(0, 1, A.shape[2])
     [Y, X] = np.meshgrid(t, t)
-    xi2 = np.exp(-(X - Y)**2 / reg)
+    xi2 = np.exp(-(X - Y) ** 2 / reg)
 
     def K(x):
         return np.dot(np.dot(xi1, x), xi2)
@@ -1501,6 +1502,7 @@ def unmix(a, D, M, M0, h0, reg, reg0, alpha, numItermax=1000,
     else:
         return np.sum(K0, axis=1)
 
+
 def jcpot_barycenter(Xs, Ys, Xt, reg, metric='sqeuclidean', numItermax=100,
                      stopThr=1e-6, verbose=False, log=False, **kwargs):
     r'''Joint OT and proportion estimation for multi-source target shift as proposed in [27]
@@ -1658,6 +1660,7 @@ def jcpot_barycenter(Xs, Ys, Xt, reg, metric='sqeuclidean', numItermax=100,
     else:
         return couplings, bary
 
+
 def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
                        numIterMax=10000, stopThr=1e-9, verbose=False,
                        log=False, **kwargs):
@@ -1749,7 +1752,8 @@ def empirical_sinkhorn(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean',
         return pi
 
 
-def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9,
+                        verbose=False, log=False, **kwargs):
     r'''
     Solve the entropic regularization optimal transport problem from empirical
     data and return the OT loss
@@ -1831,14 +1835,17 @@ def empirical_sinkhorn2(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', num
     M = dist(X_s, X_t, metric=metric)
 
     if log:
-        sinkhorn_loss, log = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+        sinkhorn_loss, log = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log,
+                                       **kwargs)
         return sinkhorn_loss, log
     else:
-        sinkhorn_loss = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log, **kwargs)
+        sinkhorn_loss = sinkhorn2(a, b, M, reg, numItermax=numIterMax, stopThr=stopThr, verbose=verbose, log=log,
+                                  **kwargs)
         return sinkhorn_loss
 
 
-def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9, verbose=False, log=False, **kwargs):
+def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeuclidean', numIterMax=10000, stopThr=1e-9,
+                                  verbose=False, log=False, **kwargs):
     r'''
     Compute the sinkhorn divergence loss from empirical data
 
@@ -1924,11 +1931,14 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
     .. [23] Aude Genevay, Gabriel Peyré, Marco Cuturi, Learning Generative Models with Sinkhorn Divergences,  Proceedings of the Twenty-First International Conference on Artficial Intelligence and Statistics, (AISTATS) 21, 2018
     '''
     if log:
-        sinkhorn_loss_ab, log_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+        sinkhorn_loss_ab, log_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax,
+                                                       stopThr=1e-9, verbose=verbose, log=log, **kwargs)
 
-        sinkhorn_loss_a, log_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+        sinkhorn_loss_a, log_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax,
+                                                     stopThr=1e-9, verbose=verbose, log=log, **kwargs)
 
-        sinkhorn_loss_b, log_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+        sinkhorn_loss_b, log_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax,
+                                                     stopThr=1e-9, verbose=verbose, log=log, **kwargs)
 
         sinkhorn_div = sinkhorn_loss_ab - 1 / 2 * (sinkhorn_loss_a + sinkhorn_loss_b)
 
@@ -1943,11 +1953,14 @@ def empirical_sinkhorn_divergence(X_s, X_t, reg, a=None, b=None, metric='sqeucli
         return max(0, sinkhorn_div), log
 
     else:
-        sinkhorn_loss_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+        sinkhorn_loss_ab = empirical_sinkhorn2(X_s, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9,
+                                               verbose=verbose, log=log, **kwargs)
 
-        sinkhorn_loss_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+        sinkhorn_loss_a = empirical_sinkhorn2(X_s, X_s, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9,
+                                              verbose=verbose, log=log, **kwargs)
 
-        sinkhorn_loss_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9, verbose=verbose, log=log, **kwargs)
+        sinkhorn_loss_b = empirical_sinkhorn2(X_t, X_t, reg, a, b, metric=metric, numIterMax=numIterMax, stopThr=1e-9,
+                                              verbose=verbose, log=log, **kwargs)
 
         sinkhorn_div = sinkhorn_loss_ab - 1 / 2 * (sinkhorn_loss_a + sinkhorn_loss_b)
         return max(0, sinkhorn_div)
@@ -2039,7 +2052,8 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, res
     try:
         import bottleneck
     except ImportError:
-        warnings.warn("Bottleneck module is not installed. Install it from https://pypi.org/project/Bottleneck/ for better performance.")
+        warnings.warn(
+            "Bottleneck module is not installed. Install it from https://pypi.org/project/Bottleneck/ for better performance.")
         bottleneck = np
 
     a = np.asarray(a, dtype=np.float64)
@@ -2173,10 +2187,11 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, res
 
         # box constraints in L-BFGS-B (see Proposition 1 in [26])
         bounds_u = [(max(a_I_min / ((nt - nt_budget) * epsilon + nt_budget * (b_J_max / (
-            ns * epsilon * kappa * K_min))), epsilon / kappa), a_I_max / (nt * epsilon * K_min))] * ns_budget
+                ns * epsilon * kappa * K_min))), epsilon / kappa), a_I_max / (nt * epsilon * K_min))] * ns_budget
 
-        bounds_v = [(max(b_J_min / ((ns - ns_budget) * epsilon + ns_budget * (kappa * a_I_max / (nt * epsilon * K_min))),
-                         epsilon * kappa), b_J_max / (ns * epsilon * K_min))] * nt_budget
+        bounds_v = [(
+                    max(b_J_min / ((ns - ns_budget) * epsilon + ns_budget * (kappa * a_I_max / (nt * epsilon * K_min))),
+                        epsilon * kappa), b_J_max / (ns * epsilon * K_min))] * nt_budget
 
         # pre-calculated constants for the objective
         vec_eps_IJc = epsilon * kappa * (K_IJc * np.ones(nt - nt_budget).reshape((1, -1))).sum(axis=1)
@@ -2225,7 +2240,8 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, res
         return usc, vsc
 
     def screened_obj(usc, vsc):
-        part_IJ = np.dot(np.dot(usc, K_IJ), vsc) - kappa * np.dot(a_I, np.log(usc)) - (1. / kappa) * np.dot(b_J, np.log(vsc))
+        part_IJ = np.dot(np.dot(usc, K_IJ), vsc) - kappa * np.dot(a_I, np.log(usc)) - (1. / kappa) * np.dot(b_J,
+                                                                                                            np.log(vsc))
         part_IJc = np.dot(usc, vec_eps_IJc)
         part_IcJ = np.dot(vec_eps_IcJ, vsc)
         psi_epsilon = part_IJ + part_IJc + part_IcJ
@@ -2247,9 +2263,9 @@ def screenkhorn(a, b, M, reg, ns_budget=None, nt_budget=None, uniform=False, res
         g = np.hstack([g_u, g_v])
         return f, g
 
-    #----------------------------------------------------------------------------------------------------------------#
+    # ----------------------------------------------------------------------------------------------------------------#
     #                                           Step 2: L-BFGS-B solver                                              #
-    #----------------------------------------------------------------------------------------------------------------#
+    # ----------------------------------------------------------------------------------------------------------------#
 
     u0, v0 = restricted_sinkhorn(u0, v0)
     theta0 = np.hstack([u0, v0])
diff --git a/ot/datasets.py b/ot/datasets.py
index eea9f37..a1ca7b6 100644
--- a/ot/datasets.py
+++ b/ot/datasets.py
@@ -30,7 +30,7 @@ def make_1D_gauss(n, m, s):
         1D histogram for a gaussian distribution
     """
     x = np.arange(n, dtype=np.float64)
-    h = np.exp(-(x - m)**2 / (2 * s**2))
+    h = np.exp(-(x - m) ** 2 / (2 * s ** 2))
     return h / h.sum()
 
 
@@ -80,7 +80,7 @@ def get_2D_samples_gauss(n, m, sigma, random_state=None):
     return make_2D_samples_gauss(n, m, sigma, random_state=None)
 
 
-def make_data_classif(dataset, n, nz=.5, theta=0, p = .5, random_state=None, **kwargs):
+def make_data_classif(dataset, n, nz=.5, theta=0, p=.5, random_state=None, **kwargs):
     """Dataset generation for classification problems
 
     Parameters
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index cdd505d..7eaa44a 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -2,8 +2,6 @@
 """
 Solvers for the original linear program OT problem
 
-
-
 """
 
 # Author: Remi Flamary <remi.flamary@unice.fr>
@@ -18,7 +16,7 @@ from scipy.sparse import coo_matrix
 from .import cvx
 
 # import compiled emd
-from .emd_wrap import emd_c, check_result, emd_1d_sorted
+#from .emd_wrap import emd_c, check_result, emd_1d_sorted
 from ..utils import parmap
 from .cvx import barycenter
 from ..utils import dist
diff --git a/ot/plot.py b/ot/plot.py
index f403e98..ad436b4 100644
--- a/ot/plot.py
+++ b/ot/plot.py
@@ -78,9 +78,10 @@ def plot2D_samples_mat(xs, xt, G, thr=1e-8, **kwargs):
     thr : float, optional
         threshold above which the line is drawn
     **kwargs : dict
-        paameters given to the plot functions (default color is black if
+        parameters given to the plot functions (default color is black if
         nothing given)
     """
+
     if ('color' not in kwargs) and ('c' not in kwargs):
         kwargs['color'] = 'k'
     mx = G.max()
-- 
cgit v1.2.3


From b32c81542c99cc48944fbeb13e4648f9947ac19d Mon Sep 17 00:00:00 2001
From: ievred <ievgen.redko@univ-st-etienne.fr>
Date: Fri, 3 Apr 2020 17:32:07 +0200
Subject: remove commented line

---
 ot/lp/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'ot/lp/__init__.py')

diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 7eaa44a..c4b5834 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -16,7 +16,7 @@ from scipy.sparse import coo_matrix
 from .import cvx
 
 # import compiled emd
-#from .emd_wrap import emd_c, check_result, emd_1d_sorted
+from .emd_wrap import emd_c, check_result, emd_1d_sorted
 from ..utils import parmap
 from .cvx import barycenter
 from ..utils import dist
-- 
cgit v1.2.3


From ef12867f1425ee86b3cfddef4287b52d46114e83 Mon Sep 17 00:00:00 2001
From: Nicolas Courty <ncourty@irisa.fr>
Date: Thu, 23 Apr 2020 13:03:28 +0200
Subject: [WIP] Issue with sparse emd and adding tests on macos (#158)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* First commit-warning removal

* remove dense feature

* pep8

* pep8

* EMD.h

* pep8 again

* tic toc tolerance

Co-authored-by: Rémi Flamary <remi.flamary@gmail.com>
---
 .github/workflows/pythonpackage.yml |  48 +++++-----
 ot/lp/EMD.h                         |   3 -
 ot/lp/EMD_wrapper.cpp               | 182 ------------------------------------
 ot/lp/__init__.py                   |  45 +++------
 ot/lp/emd_wrap.pyx                  |  38 ++------
 ot/lp/network_simplex_simple.h      |   5 +-
 test/test_ot.py                     |  26 ------
 test/test_utils.py                  |   4 +-
 8 files changed, 46 insertions(+), 305 deletions(-)

(limited to 'ot/lp/__init__.py')

diff --git a/.github/workflows/pythonpackage.yml b/.github/workflows/pythonpackage.yml
index 394f453..9c35afa 100644
--- a/.github/workflows/pythonpackage.yml
+++ b/.github/workflows/pythonpackage.yml
@@ -47,31 +47,31 @@ jobs:
       run: |
         codecov
 
-  # macos:
-  #   runs-on: macOS-latest
-  #   strategy:
-  #     max-parallel: 4
-  #     matrix:
-  #       python-version: [3.7]
+  macos:
+     runs-on: macOS-latest
+     strategy:
+       max-parallel: 4
+       matrix:
+         python-version: [3.7]
 
-  #   steps:
-  #   - uses: actions/checkout@v1
-  #   - name: Set up Python ${{ matrix.python-version }}
-  #     uses: actions/setup-python@v1
-  #     with:
-  #       python-version: ${{ matrix.python-version }}
-  #   - name: Install dependencies
-  #     run: |
-  #       python -m pip install --upgrade pip
-  #       pip install -r requirements.txt
-  #       pip install pytest "pytest-cov<2.6"
-  #       pip install -U "sklearn"
-  #   - name: Install POT
-  #     run: |
-  #       pip install -e .
-  #   - name: Run tests
-  #     run: |
-  #       python -m pytest -v test/ ot/ --doctest-modules --ignore ot/gpu/ --cov=ot
+     steps:
+     - uses: actions/checkout@v1
+     - name: Set up Python ${{ matrix.python-version }}
+       uses: actions/setup-python@v1
+       with:
+         python-version: ${{ matrix.python-version }}
+     - name: Install dependencies
+       run: |
+         python -m pip install --upgrade pip
+         pip install -r requirements.txt
+         pip install pytest "pytest-cov<2.6"
+         pip install -U "sklearn"
+     - name: Install POT
+       run: |
+         pip install -e .
+     - name: Run tests
+       run: |
+         python -m pytest -v test/ ot/ --doctest-modules --ignore ot/gpu/ --cov=ot
 
 
   windows:
diff --git a/ot/lp/EMD.h b/ot/lp/EMD.h
index 2adaace..c0fe7a3 100644
--- a/ot/lp/EMD.h
+++ b/ot/lp/EMD.h
@@ -32,9 +32,6 @@ enum ProblemType {
 
 int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter);
 
-int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, 
-                    long *iG, long *jG, double *G, long * nG,
-                    double* alpha, double* beta, double *cost, int maxIter);
 
 
 #endif
diff --git a/ot/lp/EMD_wrapper.cpp b/ot/lp/EMD_wrapper.cpp
index 28e4af2..bc873ed 100644
--- a/ot/lp/EMD_wrapper.cpp
+++ b/ot/lp/EMD_wrapper.cpp
@@ -106,185 +106,3 @@ int EMD_wrap(int n1, int n2, double *X, double *Y, double *D, double *G,
     return ret;
 }
 
-
-int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, 
-                    long *iG, long *jG, double *G, long * nG,
-                    double* alpha, double* beta, double *cost, int maxIter)  {
-    // beware M and C anre strored in row major C style!!!
-
-    // Get the number of non zero coordinates for r and c and vectors
-    int n, m, i, cur;
-
-    typedef FullBipartiteDigraph Digraph;
-    DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
-
-    // Get the number of non zero coordinates for r and c
-    n=0;
-    for (int i=0; i<n1; i++) {
-        double val=*(X+i);
-        if (val>0) {
-            n++;
-        }else if(val<0){
-			return INFEASIBLE;
-		}
-    }
-    m=0;
-    for (int i=0; i<n2; i++) {
-        double val=*(Y+i);
-        if (val>0) {
-            m++;
-        }else if(val<0){
-			return INFEASIBLE;
-		}
-    }
-
-    // Define the graph
-
-    std::vector<int> indI(n), indJ(m);
-    std::vector<double> weights1(n), weights2(m);
-    Digraph di(n, m);
-    NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, maxIter);
-
-    // Set supply and demand, don't account for 0 values (faster)
-
-    cur=0;
-    for (int i=0; i<n1; i++) {
-        double val=*(X+i);
-        if (val>0) {
-            weights1[ cur ] = val;
-            indI[cur++]=i;
-        }
-    }
-
-    // Demand is actually negative supply...
-
-    cur=0;
-    for (int i=0; i<n2; i++) {
-        double val=*(Y+i);
-        if (val>0) {
-            weights2[ cur ] = -val;
-            indJ[cur++]=i;
-        }
-    }
-
-    // Define the graph
-    net.supplyMap(&weights1[0], n, &weights2[0], m);
-
-    // Set the cost of each edge
-    for (int i=0; i<n; i++) {
-        for (int j=0; j<m; j++) {
-            double val=*(D+indI[i]*n2+indJ[j]);
-            net.setCost(di.arcFromId(i*m+j), val);
-        }
-    }
-
-
-    // Solve the problem with the network simplex algorithm
-
-    int ret=net.run();
-    if (ret==(int)net.OPTIMAL || ret==(int)net.MAX_ITER_REACHED) {
-        *cost = 0;
-        Arc a; di.first(a);
-        cur=0;
-        for (; a != INVALID; di.next(a)) {
-            int i = di.source(a);
-            int j = di.target(a);
-            double flow = net.flow(a);
-            if (flow>0)
-            {
-                *cost += flow * (*(D+indI[i]*n2+indJ[j-n]));
-
-                *(G+cur) = flow;
-                *(iG+cur) = indI[i];
-                *(jG+cur) = indJ[j-n];
-                *(alpha + indI[i]) = -net.potential(i);
-                *(beta + indJ[j-n]) = net.potential(j);
-                cur++;
-            }
-        }
-        *nG=cur; // nb of value +1 for numpy indexing
-
-    }
-
-
-    return ret;
-}
-
-int EMD_wrap_all_sparse(int n1, int n2, double *X, double *Y, 
-                    long *iD, long *jD, double *D, long  nD,
-                    long *iG, long *jG, double *G, long * nG,
-                    double* alpha, double* beta, double *cost, int maxIter)  {
-    // beware M and C anre strored in row major C style!!!
-
-    // Get the number of non zero coordinates for r and c and vectors
-    int n, m, cur;
-
-    typedef FullBipartiteDigraph Digraph;
-    DIGRAPH_TYPEDEFS(FullBipartiteDigraph);
-
-    n=n1;
-    m=n2;
-
-
-    // Define the graph
-
-
-    std::vector<double>  weights2(m);
-    Digraph di(n, m);
-    NetworkSimplexSimple<Digraph,double,double, node_id_type> net(di, true, n+m, n*m, maxIter);
-
-    // Set supply and demand, don't account for 0 values (faster)
-
-
-    // Demand is actually negative supply...
-
-    cur=0;
-    for (int i=0; i<n2; i++) {
-        double val=*(Y+i);
-        if (val>0) {
-            weights2[ cur ] = -val;
-        }
-    }
-
-    // Define the graph
-    net.supplyMap(X, n, &weights2[0], m);
-
-    // Set the cost of each edge
-    for (int k=0; k<nD; k++) {
-            int i = iD[k];
-            int j = jD[k];
-            net.setCost(di.arcFromId(i*m+j), D[k]);
-        
-    }
-
-
-    // Solve the problem with the network simplex algorithm
-
-    int ret=net.run();
-    if (ret==(int)net.OPTIMAL || ret==(int)net.MAX_ITER_REACHED) {
-        *cost = net.totalCost();
-        Arc a; di.first(a);
-        cur=0;
-        for (; a != INVALID; di.next(a)) {
-            int i = di.source(a);
-            int j = di.target(a);
-            double flow = net.flow(a);
-            if (flow>0)
-            {
-                
-                *(G+cur) = flow;
-                *(iG+cur) = i;
-                *(jG+cur) = j-n;
-                *(alpha + i) = -net.potential(i);
-                *(beta + j-n) = net.potential(j);
-                cur++;
-            }
-        }
-        *nG=cur; // nb of value +1 for numpy indexing
-
-    }
-
-
-    return ret;
-}
-
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 8d1baa0..ad390c5 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -172,7 +172,7 @@ def estimate_dual_null_weights(alpha0, beta0, a, b, M):
     return center_ot_dual(alpha, beta, a, b)
 
 
-def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
+def emd(a, b, M, numItermax=100000, log=False, center_dual=True):
     r"""Solves the Earth Movers distance problem and returns the OT matrix
 
 
@@ -207,10 +207,6 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
     log: bool, optional (default=False)
         If True, returns a dictionary containing the cost and dual
         variables. Otherwise returns only the optimal transportation matrix.
-    dense: boolean, optional (default=True)
-        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
-        Otherwise returns a sparse representation using scipy's `coo_matrix`
-        format.
     center_dual: boolean, optional (default=True)
         If True, centers the dual potential using function
         :ref:`center_ot_dual`.
@@ -267,25 +263,14 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
     asel = a != 0
     bsel = b != 0
 
-    if dense:
-        G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
-
-        if center_dual:
-            u, v = center_ot_dual(u, v, a, b)
+    G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
 
-        if np.any(~asel) or np.any(~bsel):
-            u, v = estimate_dual_null_weights(u, v, a, b, M)
-
-    else:
-        Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
-        G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
-
-        if center_dual:
-            u, v = center_ot_dual(u, v, a, b)
-
-        if np.any(~asel) or np.any(~bsel):
-            u, v = estimate_dual_null_weights(u, v, a, b, M)
+    if center_dual:
+        u, v = center_ot_dual(u, v, a, b)
 
+    if np.any(~asel) or np.any(~bsel):
+        u, v = estimate_dual_null_weights(u, v, a, b, M)
+    
     result_code_string = check_result(result_code)
     if log:
         log = {}
@@ -299,7 +284,7 @@ def emd(a, b, M, numItermax=100000, log=False, dense=True, center_dual=True):
 
 
 def emd2(a, b, M, processes=multiprocessing.cpu_count(),
-         numItermax=100000, log=False, dense=True, return_matrix=False,
+         numItermax=100000, log=False, return_matrix=False,
          center_dual=True):
     r"""Solves the Earth Movers distance problem and returns the loss
 
@@ -404,11 +389,8 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     if log or return_matrix:
         def f(b):
             bsel = b != 0
-            if dense:
-                G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
-            else:
-                Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
-                G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+            
+            G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
 
             if center_dual:
                 u, v = center_ot_dual(u, v, a, b)
@@ -428,11 +410,7 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     else:
         def f(b):
             bsel = b != 0
-            if dense:
-                G, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
-            else:
-                Gv, iG, jG, cost, u, v, result_code = emd_c(a, b, M, numItermax, dense)
-                G = coo_matrix((Gv, (iG, jG)), shape=(a.shape[0], b.shape[0]))
+            G, cost, u, v, result_code = emd_c(a, b, M, numItermax)
 
             if center_dual:
                 u, v = center_ot_dual(u, v, a, b)
@@ -440,7 +418,6 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
             if np.any(~asel) or np.any(~bsel):
                 u, v = estimate_dual_null_weights(u, v, a, b, M)
 
-            result_code_string = check_result(result_code)
             check_result(result_code)
             return cost
 
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index d345fd4..b6bda47 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -20,9 +20,6 @@ import warnings
 
 cdef extern from "EMD.h":
     int EMD_wrap(int n1,int n2, double *X, double *Y,double *D, double *G, double* alpha, double* beta, double *cost, int maxIter)
-    int EMD_wrap_return_sparse(int n1, int n2, double *X, double *Y, double *D, 
-                    long *iG, long *jG, double *G, long * nG,
-                    double* alpha, double* beta, double *cost, int maxIter)
     cdef enum ProblemType: INFEASIBLE, OPTIMAL, UNBOUNDED, MAX_ITER_REACHED
 
 
@@ -38,13 +35,10 @@ def check_result(result_code):
         message = "numItermax reached before optimality. Try to increase numItermax."
     warnings.warn(message)
     return message
-
-
-
-
+ 
 @cython.boundscheck(False)
 @cython.wraparound(False)
-def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"]  b, np.ndarray[double, ndim=2, mode="c"]  M, int max_iter, bint dense):
+def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mode="c"]  b, np.ndarray[double, ndim=2, mode="c"]  M, int max_iter):
     """
         Solves the Earth Movers distance problem and returns the optimal transport matrix
 
@@ -83,8 +77,6 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
     max_iter : int
         The maximum number of iterations before stopping the optimization
         algorithm if it has not converged.
-    dense : bool
-        Return a sparse transport matrix if set to False
 
     Returns
     -------
@@ -114,29 +106,13 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
     if not len(b):
         b=np.ones((n2,))/n2
 
-    if dense:
-        # init OT matrix
-        G=np.zeros([n1, n2])
-
-        # calling the function
-        result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
-
-        return G, cost, alpha, beta, result_code
-
-
-    else:
-        
-        # init sparse OT matrix
-        Gv=np.zeros(nmax)
-        iG=np.zeros(nmax,dtype=np.int)
-        jG=np.zeros(nmax,dtype=np.int)
-
-
-        result_code = EMD_wrap_return_sparse(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <long*> iG.data, <long*> jG.data, <double*> Gv.data, <long*> &nG, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
-
+    # init OT matrix
+    G=np.zeros([n1, n2])
 
-        return Gv[:nG], iG[:nG], jG[:nG], cost, alpha, beta, result_code
+    # calling the function
+    result_code = EMD_wrap(n1, n2, <double*> a.data, <double*> b.data, <double*> M.data, <double*> G.data, <double*> alpha.data, <double*> beta.data, <double*> &cost, max_iter)
 
+    return G, cost, alpha, beta, result_code
 
 
 @cython.boundscheck(False)
diff --git a/ot/lp/network_simplex_simple.h b/ot/lp/network_simplex_simple.h
index 498e921..5d93040 100644
--- a/ot/lp/network_simplex_simple.h
+++ b/ot/lp/network_simplex_simple.h
@@ -875,7 +875,7 @@ namespace lemon {
              c += Number(it->second) * Number(_cost[it->first]);
              return c;*/
 
-            for (int i=0; i<_flow.size(); i++)
+            for (unsigned long i=0; i<_flow.size(); i++)
                 c += _flow[i] * Number(_cost[i]);
             return c;
 
@@ -1257,7 +1257,7 @@ namespace lemon {
                 u = w;
             }
             _pred[u_in] = in_arc;
-            _forward[u_in] = (u_in == _source[in_arc]);
+            _forward[u_in] = ((unsigned int)u_in == _source[in_arc]);
             _succ_num[u_in] = old_succ_num;
 
             // Set limits for updating _last_succ form v_in and v_out
@@ -1418,7 +1418,6 @@ namespace lemon {
         template <typename PivotRuleImpl>
         ProblemType start() {
             PivotRuleImpl pivot(*this);
-            double prevCost=-1;
 			ProblemType retVal = OPTIMAL;
 
             // Perform heuristic initial pivots
diff --git a/test/test_ot.py b/test/test_ot.py
index 0f1357f..b7306f6 100644
--- a/test/test_ot.py
+++ b/test/test_ot.py
@@ -170,27 +170,6 @@ def test_emd_empty():
     np.testing.assert_allclose(w, 0)
 
 
-def test_emd_sparse():
-    n = 100
-    rng = np.random.RandomState(0)
-
-    x = rng.randn(n, 2)
-    x2 = rng.randn(n, 2)
-
-    M = ot.dist(x, x2)
-
-    G = ot.emd([], [], M, dense=True)
-
-    Gs = ot.emd([], [], M, dense=False)
-
-    ws = ot.emd2([], [], M, dense=False)
-
-    # check G is the same
-    np.testing.assert_allclose(G, Gs.todense())
-    # check value
-    np.testing.assert_allclose(Gs.multiply(M).sum(), ws, rtol=1e-6)
-
-
 def test_emd2_multi():
     n = 500  # nb bins
 
@@ -222,12 +201,7 @@ def test_emd2_multi():
     emdn = ot.emd2(a, b, M)
     ot.toc('multi proc : {} s')
 
-    ot.tic()
-    emdn2 = ot.emd2(a, b, M, dense=False)
-    ot.toc('multi proc : {} s')
-
     np.testing.assert_allclose(emd1, emdn)
-    np.testing.assert_allclose(emd1, emdn2, rtol=1e-6)
 
     # emd loss multipro proc with log
     ot.tic()
diff --git a/test/test_utils.py b/test/test_utils.py
index 640598d..db9cda6 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -36,10 +36,10 @@ def test_tic_toc():
     t2 = ot.toq()
 
     # test timing
-    np.testing.assert_allclose(0.5, t, rtol=1e-2, atol=1e-2)
+    np.testing.assert_allclose(0.5, t, rtol=1e-1, atol=1e-1)
 
     # test toc vs toq
-    np.testing.assert_allclose(t, t2, rtol=1e-2, atol=1e-2)
+    np.testing.assert_allclose(t, t2, rtol=1e-1, atol=1e-1)
 
 
 def test_kernel():
-- 
cgit v1.2.3


From 8599e720d5f438e2aaf5c635883e64deb026f3ce Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Fri, 24 Apr 2020 11:20:02 +0200
Subject: correct doc for emd

---
 docs/source/conf.py | 2 +-
 ot/lp/__init__.py   | 6 +++++-
 2 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'ot/lp/__init__.py')

diff --git a/docs/source/conf.py b/docs/source/conf.py
index 880c71d..f3c61e7 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -67,7 +67,7 @@ extensions = [
     'sphinx.ext.ifconfig',
     'sphinx.ext.viewcode',
     'sphinx.ext.napoleon',
-    'sphinx_gallery.gen_gallery',
+    #'sphinx_gallery.gen_gallery',
 ]
 
 napoleon_numpy_docstring = True
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index ad390c5..50003ed 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -180,7 +180,9 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True):
         \gamma = arg\min_\gamma <\gamma,M>_F
 
         s.t. \gamma 1 = a
+
              \gamma^T 1= b
+
              \gamma\geq 0
     where :
 
@@ -289,10 +291,12 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
     r"""Solves the Earth Movers distance problem and returns the loss
 
     .. math::
-        \gamma = arg\min_\gamma <\gamma,M>_F
+        \min_\gamma <\gamma,M>_F
 
         s.t. \gamma 1 = a
+
              \gamma^T 1= b
+
              \gamma\geq 0
     where :
 
-- 
cgit v1.2.3


From 8406caafaef8b3683d6a1d44917c404ba780f82c Mon Sep 17 00:00:00 2001
From: Rémi Flamary <remi.flamary@gmail.com>
Date: Tue, 5 May 2020 07:53:45 +0200
Subject: remove dense from ducumentation

---
 ot/lp/__init__.py | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'ot/lp/__init__.py')

diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 50003ed..514a607 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -327,10 +327,6 @@ def emd2(a, b, M, processes=multiprocessing.cpu_count(),
         variables. Otherwise returns only the optimal transportation cost.
     return_matrix: boolean, optional (default=False)
         If True, returns the optimal transportation matrix in the log.
-    dense: boolean, optional (default=True)
-        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
-        Otherwise returns a sparse representation using scipy's `coo_matrix`
-        format.
     center_dual: boolean, optional (default=True)
         If True, centers the dual potential using function
         :ref:`center_ot_dual`.
-- 
cgit v1.2.3