From f63f34f8adb6943b6410f8b773b4b4d8f1c7b4ba Mon Sep 17 00:00:00 2001
From: Romain Tavenard <romain.tavenard@univ-rennes2.fr>
Date: Thu, 20 Jun 2019 14:29:56 +0200
Subject: EMD 1d without doc

---
 ot/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'ot/__init__.py')

diff --git a/ot/__init__.py b/ot/__init__.py
index b74b924..5d5b700 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -22,7 +22,7 @@ from . import smooth
 from . import stochastic
 
 # OT functions
-from .lp import emd, emd2
+from .lp import emd, emd2, emd_1d
 from .bregman import sinkhorn, sinkhorn2, barycenter
 from .da import sinkhorn_lpl1_mm
 
@@ -31,6 +31,6 @@ from .utils import dist, unif, tic, toc, toq
 
 __version__ = "0.5.1"
 
-__all__ = ["emd", "emd2", "sinkhorn", "sinkhorn2", "utils", 'datasets',
+__all__ = ["emd", "emd2", 'emd_1d', "sinkhorn", "sinkhorn2", "utils", 'datasets',
            'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov',
            'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim']
-- 
cgit v1.2.3


From 18502d6861a4977cbade957f2e48eeb8dbb55414 Mon Sep 17 00:00:00 2001
From: Romain Tavenard <romain.tavenard@univ-rennes2.fr>
Date: Fri, 21 Jun 2019 11:21:08 +0200
Subject: Sparse G matrix for EMD1d + standard metrics computed without cdist

---
 ot/__init__.py     |  4 ++--
 ot/lp/emd_wrap.pyx | 29 +++++++++++++++++++++--------
 test/test_ot.py    | 23 ++++++++++++++++++-----
 3 files changed, 41 insertions(+), 15 deletions(-)

(limited to 'ot/__init__.py')

diff --git a/ot/__init__.py b/ot/__init__.py
index 5d5b700..f0e526c 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -22,7 +22,7 @@ from . import smooth
 from . import stochastic
 
 # OT functions
-from .lp import emd, emd2, emd_1d
+from .lp import emd, emd2, emd_1d, emd2_1d
 from .bregman import sinkhorn, sinkhorn2, barycenter
 from .da import sinkhorn_lpl1_mm
 
@@ -32,5 +32,5 @@ from .utils import dist, unif, tic, toc, toq
 __version__ = "0.5.1"
 
 __all__ = ["emd", "emd2", 'emd_1d', "sinkhorn", "sinkhorn2", "utils", 'datasets',
-           'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov',
+           'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov', 'emd_1d', 'emd2_1d',
            'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim']
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index 2966206..ab88d7f 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -101,8 +101,8 @@ def emd_c(np.ndarray[double, ndim=1, mode="c"] a, np.ndarray[double, ndim=1, mod
 @cython.wraparound(False)
 def emd_1d_sorted(np.ndarray[double, ndim=1, mode="c"] u_weights,
                   np.ndarray[double, ndim=1, mode="c"] v_weights,
-                  np.ndarray[double, ndim=2, mode="c"] u,
-                  np.ndarray[double, ndim=2, mode="c"] v,
+                  np.ndarray[double, ndim=1, mode="c"] u,
+                  np.ndarray[double, ndim=1, mode="c"] v,
                   str metric='sqeuclidean'):
     r"""
     Roro's stuff
@@ -118,21 +118,34 @@ def emd_1d_sorted(np.ndarray[double, ndim=1, mode="c"] u_weights,
 
     cdef double m_ij = 0.
 
-    cdef np.ndarray[double, ndim=2, mode="c"] G = np.zeros((n, m),
+    cdef np.ndarray[double, ndim=1, mode="c"] G = np.zeros((n + m - 1, ),
                                                            dtype=np.float64)
+    cdef np.ndarray[long, ndim=2, mode="c"] indices = np.zeros((n + m - 1, 2),
+                                                              dtype=np.int)
+    cdef int cur_idx = 0
     while i < n and j < m:
-        m_ij = dist(u[i].reshape((1, 1)), v[j].reshape((1, 1)),
-                    metric=metric)[0, 0]
+        if metric == 'sqeuclidean':
+            m_ij = (u[i] - v[j]) ** 2
+        elif metric == 'cityblock' or metric == 'euclidean':
+            m_ij = np.abs(u[i] - v[j])
+        else:
+            m_ij = dist(u[i].reshape((1, 1)), v[j].reshape((1, 1)),
+                        metric=metric)[0, 0]
         if w_i < w_j or j == m - 1:
             cost += m_ij * w_i
-            G[i, j] = w_i
+            G[cur_idx] = w_i
+            indices[cur_idx, 0] = i
+            indices[cur_idx, 1] = j
             i += 1
             w_j -= w_i
             w_i = u_weights[i]
         else:
             cost += m_ij * w_j
-            G[i, j] = w_j
+            G[cur_idx] = w_j
+            indices[cur_idx, 0] = i
+            indices[cur_idx, 1] = j
             j += 1
             w_i -= w_j
             w_j = v_weights[j]
-    return G, cost
\ No newline at end of file
+        cur_idx += 1
+    return G[:cur_idx], indices[:cur_idx], cost
diff --git a/test/test_ot.py b/test/test_ot.py
index 7008002..2a2e0a5 100644
--- a/test/test_ot.py
+++ b/test/test_ot.py
@@ -7,6 +7,7 @@
 import warnings
 
 import numpy as np
+from scipy.stats import wasserstein_distance
 
 import ot
 from ot.datasets import make_1D_gauss as gauss
@@ -37,7 +38,7 @@ def test_emd_emd2():
 
     # check G is identity
     np.testing.assert_allclose(G, np.eye(n) / n)
-    # check constratints
+    # check constraints
     np.testing.assert_allclose(u, G.sum(1))  # cf convergence sinkhorn
     np.testing.assert_allclose(u, G.sum(0))  # cf convergence sinkhorn
 
@@ -46,12 +47,13 @@ def test_emd_emd2():
     np.testing.assert_allclose(w, 0)
 
 
-def test_emd1d():
+def test_emd_1d_emd2_1d():
     # test emd1d gives similar results as emd
     n = 20
     m = 30
-    u = np.random.randn(n, 1)
-    v = np.random.randn(m, 1)
+    rng = np.random.RandomState(0)
+    u = rng.randn(n, 1)
+    v = rng.randn(m, 1)
 
     M = ot.dist(u, v, metric='sqeuclidean')
 
@@ -59,9 +61,20 @@ def test_emd1d():
     wass = log["cost"]
     G_1d, log = ot.emd_1d([], [], u, v, metric='sqeuclidean', log=True)
     wass1d = log["cost"]
+    wass1d_emd2 = ot.emd2_1d([], [], u, v, metric='sqeuclidean', log=False)
+    wass1d_euc = ot.emd2_1d([], [], u, v, metric='euclidean', log=False)
 
     # check loss is similar
     np.testing.assert_allclose(wass, wass1d)
+    np.testing.assert_allclose(wass, wass1d_emd2)
+
+    # check loss is similar to scipy's implementation for Euclidean metric
+    wass_sp = wasserstein_distance(u.reshape((-1, )), v.reshape((-1, )))
+    np.testing.assert_allclose(wass_sp, wass1d_euc)
+
+    # check constraints
+    np.testing.assert_allclose(np.ones((n, )) / n, G.sum(1))
+    np.testing.assert_allclose(np.ones((m, )) / m, G.sum(0))
 
     # check G is similar
     np.testing.assert_allclose(G, G_1d)
@@ -86,7 +99,7 @@ def test_emd_empty():
 
     # check G is identity
     np.testing.assert_allclose(G, np.eye(n) / n)
-    # check constratints
+    # check constraints
     np.testing.assert_allclose(u, G.sum(1))  # cf convergence sinkhorn
     np.testing.assert_allclose(u, G.sum(0))  # cf convergence sinkhorn
 
-- 
cgit v1.2.3


From 0d333e004636f5d25edea6bb195e8e4d9a95ba98 Mon Sep 17 00:00:00 2001
From: Romain Tavenard <romain.tavenard@univ-rennes2.fr>
Date: Thu, 27 Jun 2019 10:23:32 +0200
Subject: Improved tests and docs for wasserstein_1d

---
 ot/__init__.py     |  5 +++--
 ot/lp/__init__.py  | 13 ++++++-------
 ot/lp/emd_wrap.pyx |  3 ++-
 test/test_ot.py    | 23 +++++++++++++++++++++++
 4 files changed, 34 insertions(+), 10 deletions(-)

(limited to 'ot/__init__.py')

diff --git a/ot/__init__.py b/ot/__init__.py
index f0e526c..5bd9bb3 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -22,7 +22,7 @@ from . import smooth
 from . import stochastic
 
 # OT functions
-from .lp import emd, emd2, emd_1d, emd2_1d
+from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d, wasserstein2_1d
 from .bregman import sinkhorn, sinkhorn2, barycenter
 from .da import sinkhorn_lpl1_mm
 
@@ -32,5 +32,6 @@ from .utils import dist, unif, tic, toc, toq
 __version__ = "0.5.1"
 
 __all__ = ["emd", "emd2", 'emd_1d', "sinkhorn", "sinkhorn2", "utils", 'datasets',
-           'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov', 'emd_1d', 'emd2_1d',
+           'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov',
+           'emd_1d', 'emd2_1d', 'wasserstein_1d', 'wasserstein2_1d',
            'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim']
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 719032b..76c9ec0 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -530,13 +530,13 @@ def emd2_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
 
 
 def wasserstein_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
-    """Solves the Wasserstein distance problem between 1d measures and returns
+    """Solves the p-Wasserstein distance problem between 1d measures and returns
     the OT matrix
 
 
     .. math::
-        \gamma = arg\min_\gamma \left(\sum_i \sum_j \gamma_{ij}
-            |x_a[i] - x_b[j]|^p \right)^{1/p}
+        \gamma = arg\min_\gamma \left( \sum_i \sum_j \gamma_{ij}
+            |x_a[i] - x_b[j]|^p \\right)^{1/p}
 
         s.t. \gamma 1 = a,
              \gamma^T 1= b,
@@ -617,15 +617,14 @@ def wasserstein_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
                   dense=dense, log=log)
 
 
-def wasserstein2_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1.,
-                    dense=True, log=False):
-    """Solves the Wasserstein distance problem between 1d measures and returns
+def wasserstein2_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
+    """Solves the p-Wasserstein distance problem between 1d measures and returns
     the loss
 
 
     .. math::
         \gamma = arg\min_\gamma \left( \sum_i \sum_j \gamma_{ij}
-            |x_a[i] - x_b[j]|^p \right)^{1/p}
+            |x_a[i] - x_b[j]|^p \\right)^{1/p}
 
         s.t. \gamma 1 = a,
              \gamma^T 1= b,
diff --git a/ot/lp/emd_wrap.pyx b/ot/lp/emd_wrap.pyx
index 7134136..42b848f 100644
--- a/ot/lp/emd_wrap.pyx
+++ b/ot/lp/emd_wrap.pyx
@@ -13,6 +13,7 @@ cimport numpy as np
 from ..utils import dist
 
 cimport cython
+cimport libc.math as math
 
 import warnings
 
@@ -159,7 +160,7 @@ def emd_1d_sorted(np.ndarray[double, ndim=1, mode="c"] u_weights,
         elif metric == 'cityblock' or metric == 'euclidean':
             m_ij = abs(u[i] - v[j])
         elif metric == 'minkowski':
-            m_ij = abs(u[i] - v[j]) ** p
+            m_ij = math.pow(abs(u[i] - v[j]), p)
         else:
             m_ij = dist(u[i].reshape((1, 1)), v[j].reshape((1, 1)),
                         metric=metric)[0, 0]
diff --git a/test/test_ot.py b/test/test_ot.py
index 6d6ea26..48423e7 100644
--- a/test/test_ot.py
+++ b/test/test_ot.py
@@ -85,6 +85,29 @@ def test_emd_1d_emd2_1d():
     np.testing.assert_raises(AssertionError, ot.emd_1d, u, v, [], [])
 
 
+def test_wass_1d():
+    # test emd1d gives similar results as emd
+    n = 20
+    m = 30
+    rng = np.random.RandomState(0)
+    u = rng.randn(n, 1)
+    v = rng.randn(m, 1)
+
+    M = ot.dist(u, v, metric='sqeuclidean')
+
+    G, log = ot.emd([], [], M, log=True)
+    wass = log["cost"]
+
+    G_1d, log = ot.wasserstein_1d(u, v, [], [], p=2., log=True)
+    wass1d = log["cost"]
+
+    # check loss is similar
+    np.testing.assert_allclose(np.sqrt(wass), wass1d)
+
+    # check G is similar
+    np.testing.assert_allclose(G, G_1d)
+
+
 def test_emd_empty():
     # test emd and emd2 for simple identity
     n = 100
-- 
cgit v1.2.3


From c92e595009ad5e2ae6d4b2c040556cffb6316847 Mon Sep 17 00:00:00 2001
From: Romain Tavenard <romain.tavenard@univ-rennes2.fr>
Date: Thu, 27 Jun 2019 11:08:15 +0200
Subject: Wasserstein defined as the cost itself (do not return transportation
 matrix)

---
 ot/__init__.py    |   4 +-
 ot/lp/__init__.py | 125 +++++-------------------------------------------------
 test/test_ot.py   |   6 +--
 3 files changed, 13 insertions(+), 122 deletions(-)

(limited to 'ot/__init__.py')

diff --git a/ot/__init__.py b/ot/__init__.py
index 730aa4f..1b3c2fb 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -23,7 +23,7 @@ from . import stochastic
 from . import unbalanced
 
 # OT functions
-from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d, wasserstein2_1d
+from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d
 from .bregman import sinkhorn, sinkhorn2, barycenter
 from .unbalanced import sinkhorn_unbalanced, barycenter_unbalanced
 from .da import sinkhorn_lpl1_mm
@@ -35,6 +35,6 @@ __version__ = "0.5.1"
 
 __all__ = ["emd", "emd2", 'emd_1d', "sinkhorn", "sinkhorn2", "utils", 'datasets',
            'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov',
-           'emd_1d', 'emd2_1d', 'wasserstein_1d', 'wasserstein2_1d',
+           'emd_1d', 'emd2_1d', 'wasserstein_1d',
            'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim',
            'sinkhorn_unbalanced', "barycenter_unbalanced"]
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 76c9ec0..a3f5b8d 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -21,7 +21,7 @@ from .cvx import barycenter
 from ..utils import dist
 
 __all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
-         'emd_1d', 'emd2_1d', 'wasserstein_1d', 'wasserstein2_1d']
+         'emd_1d', 'emd2_1d', 'wasserstein_1d']
 
 
 def emd(a, b, M, numItermax=100000, log=False):
@@ -529,9 +529,9 @@ def emd2_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
     return cost
 
 
-def wasserstein_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
+def wasserstein_1d(x_a, x_b, a=None, b=None, p=1.):
     """Solves the p-Wasserstein distance problem between 1d measures and returns
-    the OT matrix
+    the distance
 
 
     .. math::
@@ -560,22 +560,11 @@ def wasserstein_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
         Target histogram (default is uniform weight)
     p: float, optional (default=1.0)
          The order of the p-Wasserstein distance to be computed
-    dense: boolean, optional (default=True)
-        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
-        Otherwise returns a sparse representation using scipy's `coo_matrix`
-        format. Due to implementation details, this function runs faster when
-        `'sqeuclidean'`, `'minkowski'`, `'cityblock'`,  or `'euclidean'` metrics
-        are used.
-    log: boolean, optional (default=False)
-        If True, returns a dictionary containing the cost.
-        Otherwise returns only the optimal transportation matrix.
 
     Returns
     -------
-    gamma: (ns, nt) ndarray
-        Optimal transportation matrix for the given parameters
-    log: dict
-        If input log is True, a dictionary containing the cost
+    dist: float
+        p-Wasserstein distance
 
 
     Examples
@@ -590,96 +579,8 @@ def wasserstein_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
     >>> x_a = [2., 0.]
     >>> x_b = [0., 3.]
     >>> ot.wasserstein_1d(x_a, x_b, a, b)
-    array([[0. ,  0.5],
-           [0.5,  0. ]])
-    >>> ot.wasserstein_1d(x_a, x_b)
-    array([[0. ,  0.5],
-           [0.5,  0. ]])
-
-    References
-    ----------
-
-    .. [1]  Peyré, G., & Cuturi, M. (2017). "Computational Optimal
-        Transport", 2018.
-
-    See Also
-    --------
-    ot.lp.emd_1d : EMD for 1d distributions
-    ot.lp.wasserstein2_1d : Wasserstein for 1d distributions (returns the cost
-        instead of the transportation matrix)
-    """
-    if log:
-        G, log = emd_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
-                        dense=dense, log=log)
-        log['cost'] = np.power(log['cost'], 1. / p)
-        return G, log
-    return emd_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
-                  dense=dense, log=log)
-
-
-def wasserstein2_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
-    """Solves the p-Wasserstein distance problem between 1d measures and returns
-    the loss
-
-
-    .. math::
-        \gamma = arg\min_\gamma \left( \sum_i \sum_j \gamma_{ij}
-            |x_a[i] - x_b[j]|^p \\right)^{1/p}
-
-        s.t. \gamma 1 = a,
-             \gamma^T 1= b,
-             \gamma\geq 0
-    where :
-
-    - x_a and x_b are the samples
-    - a and b are the sample weights
-
-    Uses the algorithm detailed in [1]_
-
-    Parameters
-    ----------
-    x_a : (ns,) or (ns, 1) ndarray, float64
-        Source dirac locations (on the real line)
-    x_b : (nt,) or (ns, 1) ndarray, float64
-        Target dirac locations (on the real line)
-    a : (ns,) ndarray, float64, optional
-        Source histogram (default is uniform weight)
-    b : (nt,) ndarray, float64, optional
-        Target histogram (default is uniform weight)
-    p: float, optional (default=1.0)
-         The order of the p-Wasserstein distance to be computed
-    dense: boolean, optional (default=True)
-        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
-        Otherwise returns a sparse representation using scipy's `coo_matrix`
-        format. Only used if log is set to True. Due to implementation details,
-        this function runs faster when dense is set to False.
-    log: boolean, optional (default=False)
-        If True, returns a dictionary containing the transportation matrix.
-        Otherwise returns only the loss.
-
-    Returns
-    -------
-    loss: float
-        Cost associated to the optimal transportation
-    log: dict
-        If input log is True, a dictionary containing the Optimal transportation
-        matrix for the given parameters
-
-
-    Examples
-    --------
-
-    Simple example with obvious solution. The function wasserstein2_1d accepts
-    lists and performs automatic conversion to numpy arrays
-
-    >>> import ot
-    >>> a=[.5, .5]
-    >>> b=[.5, .5]
-    >>> x_a = [2., 0.]
-    >>> x_b = [0., 3.]
-    >>> ot.wasserstein2_1d(x_a, x_b, a, b)
     0.5
-    >>> ot.wasserstein2_1d(x_a, x_b)
+    >>> ot.wasserstein_1d(x_a, x_b)
     0.5
 
     References
@@ -690,14 +591,8 @@ def wasserstein2_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
 
     See Also
     --------
-    ot.lp.emd2_1d : EMD for 1d distributions
-    ot.lp.wasserstein_1d : Wasserstein for 1d distributions (returns the
-        transportation matrix instead of the cost)
+    ot.lp.emd_1d : EMD for 1d distributions
     """
-    if log:
-        cost, log = emd2_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
-                            dense=dense, log=log)
-        cost = np.power(cost, 1. / p)
-        return cost, log
-    return np.power(emd2_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
-                            dense=dense, log=log), 1. /  p)
\ No newline at end of file
+    cost_emd = emd2_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
+                       dense=False, log=False)
+    return np.power(cost_emd, 1. / p)
diff --git a/test/test_ot.py b/test/test_ot.py
index 48423e7..3c4ac11 100644
--- a/test/test_ot.py
+++ b/test/test_ot.py
@@ -98,15 +98,11 @@ def test_wass_1d():
     G, log = ot.emd([], [], M, log=True)
     wass = log["cost"]
 
-    G_1d, log = ot.wasserstein_1d(u, v, [], [], p=2., log=True)
-    wass1d = log["cost"]
+    wass1d = ot.wasserstein_1d(u, v, [], [], p=2.)
 
     # check loss is similar
     np.testing.assert_allclose(np.sqrt(wass), wass1d)
 
-    # check G is similar
-    np.testing.assert_allclose(G, G_1d)
-
 
 def test_emd_empty():
     # test emd and emd2 for simple identity
-- 
cgit v1.2.3