Wasserstein defined as the cost itself (do not return transportation matrix)

author: Romain Tavenard <romain.tavenard@univ-rennes2.fr> 2019-06-27 11:08:15 +0200
committer: Romain Tavenard <romain.tavenard@univ-rennes2.fr> 2019-06-27 11:08:15 +0200
commit: c92e595009ad5e2ae6d4b2c040556cffb6316847 (patch)
tree: 717e40eed0de6e35e302f3080ffc4eff349b3726 /ot
parent: bbc56e74bf119b8810c0de7b446bb01b30efc3c2 (diff)
2 files changed, 12 insertions, 117 deletions
diff --git a/ot/__init__.py b/ot/__init__.py
index 730aa4f..1b3c2fb 100644
--- a/ot/__init__.py
+++ b/ot/__init__.py
@@ -23,7 +23,7 @@ from . import stochastic
 from . import unbalanced
 
 # OT functions
-from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d, wasserstein2_1d
+from .lp import emd, emd2, emd_1d, emd2_1d, wasserstein_1d
 from .bregman import sinkhorn, sinkhorn2, barycenter
 from .unbalanced import sinkhorn_unbalanced, barycenter_unbalanced
 from .da import sinkhorn_lpl1_mm
@@ -35,6 +35,6 @@ __version__ = "0.5.1"
 
 __all__ = ["emd", "emd2", 'emd_1d', "sinkhorn", "sinkhorn2", "utils", 'datasets',
            'bregman', 'lp', 'tic', 'toc', 'toq', 'gromov',
-           'emd_1d', 'emd2_1d', 'wasserstein_1d', 'wasserstein2_1d',
+           'emd_1d', 'emd2_1d', 'wasserstein_1d',
            'dist', 'unif', 'barycenter', 'sinkhorn_lpl1_mm', 'da', 'optim',
            'sinkhorn_unbalanced', "barycenter_unbalanced"]
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index 76c9ec0..a3f5b8d 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -21,7 +21,7 @@ from .cvx import barycenter
 from ..utils import dist
 
 __all__=['emd', 'emd2', 'barycenter', 'free_support_barycenter', 'cvx',
-         'emd_1d', 'emd2_1d', 'wasserstein_1d', 'wasserstein2_1d']
+         'emd_1d', 'emd2_1d', 'wasserstein_1d']
 
 
 def emd(a, b, M, numItermax=100000, log=False):
@@ -529,9 +529,9 @@ def emd2_1d(x_a, x_b, a=None, b=None, metric='sqeuclidean', p=1., dense=True,
     return cost
 
 
-def wasserstein_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
+def wasserstein_1d(x_a, x_b, a=None, b=None, p=1.):
     """Solves the p-Wasserstein distance problem between 1d measures and returns
-    the OT matrix
+    the distance
 
 
     .. math::
@@ -560,22 +560,11 @@ def wasserstein_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
         Target histogram (default is uniform weight)
     p: float, optional (default=1.0)
          The order of the p-Wasserstein distance to be computed
-    dense: boolean, optional (default=True)
-        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
-        Otherwise returns a sparse representation using scipy's `coo_matrix`
-        format. Due to implementation details, this function runs faster when
-        `'sqeuclidean'`, `'minkowski'`, `'cityblock'`,  or `'euclidean'` metrics
-        are used.
-    log: boolean, optional (default=False)
-        If True, returns a dictionary containing the cost.
-        Otherwise returns only the optimal transportation matrix.
 
     Returns
     -------
-    gamma: (ns, nt) ndarray
-        Optimal transportation matrix for the given parameters
-    log: dict
-        If input log is True, a dictionary containing the cost
+    dist: float
+        p-Wasserstein distance
 
 
     Examples
@@ -590,96 +579,8 @@ def wasserstein_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
     >>> x_a = [2., 0.]
     >>> x_b = [0., 3.]
     >>> ot.wasserstein_1d(x_a, x_b, a, b)
-    array([[0. ,  0.5],
-           [0.5,  0. ]])
-    >>> ot.wasserstein_1d(x_a, x_b)
-    array([[0. ,  0.5],
-           [0.5,  0. ]])
-
-    References
-    ----------
-
-    .. [1]  Peyré, G., & Cuturi, M. (2017). "Computational Optimal
-        Transport", 2018.
-
-    See Also
-    --------
-    ot.lp.emd_1d : EMD for 1d distributions
-    ot.lp.wasserstein2_1d : Wasserstein for 1d distributions (returns the cost
-        instead of the transportation matrix)
-    """
-    if log:
-        G, log = emd_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
-                        dense=dense, log=log)
-        log['cost'] = np.power(log['cost'], 1. / p)
-        return G, log
-    return emd_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
-                  dense=dense, log=log)
-
-
-def wasserstein2_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
-    """Solves the p-Wasserstein distance problem between 1d measures and returns
-    the loss
-
-
-    .. math::
-        \gamma = arg\min_\gamma \left( \sum_i \sum_j \gamma_{ij}
-            |x_a[i] - x_b[j]|^p \\right)^{1/p}
-
-        s.t. \gamma 1 = a,
-             \gamma^T 1= b,
-             \gamma\geq 0
-    where :
-
-    - x_a and x_b are the samples
-    - a and b are the sample weights
-
-    Uses the algorithm detailed in [1]_
-
-    Parameters
-    ----------
-    x_a : (ns,) or (ns, 1) ndarray, float64
-        Source dirac locations (on the real line)
-    x_b : (nt,) or (ns, 1) ndarray, float64
-        Target dirac locations (on the real line)
-    a : (ns,) ndarray, float64, optional
-        Source histogram (default is uniform weight)
-    b : (nt,) ndarray, float64, optional
-        Target histogram (default is uniform weight)
-    p: float, optional (default=1.0)
-         The order of the p-Wasserstein distance to be computed
-    dense: boolean, optional (default=True)
-        If True, returns math:`\gamma` as a dense ndarray of shape (ns, nt).
-        Otherwise returns a sparse representation using scipy's `coo_matrix`
-        format. Only used if log is set to True. Due to implementation details,
-        this function runs faster when dense is set to False.
-    log: boolean, optional (default=False)
-        If True, returns a dictionary containing the transportation matrix.
-        Otherwise returns only the loss.
-
-    Returns
-    -------
-    loss: float
-        Cost associated to the optimal transportation
-    log: dict
-        If input log is True, a dictionary containing the Optimal transportation
-        matrix for the given parameters
-
-
-    Examples
-    --------
-
-    Simple example with obvious solution. The function wasserstein2_1d accepts
-    lists and performs automatic conversion to numpy arrays
-
-    >>> import ot
-    >>> a=[.5, .5]
-    >>> b=[.5, .5]
-    >>> x_a = [2., 0.]
-    >>> x_b = [0., 3.]
-    >>> ot.wasserstein2_1d(x_a, x_b, a, b)
     0.5
-    >>> ot.wasserstein2_1d(x_a, x_b)
+    >>> ot.wasserstein_1d(x_a, x_b)
     0.5
 
     References
@@ -690,14 +591,8 @@ def wasserstein2_1d(x_a, x_b, a=None, b=None, p=1., dense=True, log=False):
 
     See Also
     --------
-    ot.lp.emd2_1d : EMD for 1d distributions
-    ot.lp.wasserstein_1d : Wasserstein for 1d distributions (returns the
-        transportation matrix instead of the cost)
+    ot.lp.emd_1d : EMD for 1d distributions
     """
-    if log:
-        cost, log = emd2_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
-                            dense=dense, log=log)
-        cost = np.power(cost, 1. / p)
-        return cost, log
-    return np.power(emd2_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
-                            dense=dense, log=log), 1. /  p)
-\ No newline at end of file
+    cost_emd = emd2_1d(x_a=x_a, x_b=x_b, a=a, b=b, metric='minkowski', p=p,
+                       dense=False, log=False)
+    return np.power(cost_emd, 1. / p)
author	Romain Tavenard <romain.tavenard@univ-rennes2.fr>	2019-06-27 11:08:15 +0200
committer	Romain Tavenard <romain.tavenard@univ-rennes2.fr>	2019-06-27 11:08:15 +0200
commit	c92e595009ad5e2ae6d4b2c040556cffb6316847 (patch)
tree	717e40eed0de6e35e302f3080ffc4eff349b3726 /ot
parent	bbc56e74bf119b8810c0de7b446bb01b30efc3c2 (diff)