From 42a62c123776e04ee805aefb9afd6d98abdcf192 Mon Sep 17 00:00:00 2001
From: Tianlin Liu <tliu@jacobs-alumni.de>
Date: Tue, 25 Apr 2023 12:14:29 +0200
Subject: [FEAT] add the sparsity-constrained optimal transport funtionality
 and example (#459)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* add sparsity-constrained ot funtionality and example

* correct typos; add projection_sparse_simplex

* add gradcheck; merge ot.sparse into ot.smooth.

* reuse existing ot.smooth functions with a new 'sparsity_constrained' reg_type

* address pep8 error

* add backends for

* update releases

---------

Co-authored-by: Rémi Flamary <remi.flamary@gmail.com>
---
 ot/smooth.py | 80 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
 ot/utils.py  | 81 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 155 insertions(+), 6 deletions(-)

(limited to 'ot')

diff --git a/ot/smooth.py b/ot/smooth.py
index 8e0ef38..331cfc0 100644
--- a/ot/smooth.py
+++ b/ot/smooth.py
@@ -24,9 +24,10 @@
 
 # Author: Mathieu Blondel
 #         Remi Flamary <remi.flamary@unice.fr>
+#         Tianlin Liu <t.liu@unibas.ch>
 
 """
-Smooth and Sparse Optimal Transport solvers (KL an L2 reg.)
+Smooth and Sparse (KL an L2 reg.) and sparsity-constrained OT solvers.
 
 Implementation of :
 Smooth and Sparse Optimal Transport.
@@ -34,17 +35,31 @@ Mathieu Blondel, Vivien Seguy, Antoine Rolet.
 In Proc. of AISTATS 2018.
 https://arxiv.org/abs/1710.06276
 
+(Original code from https://github.com/mblondel/smooth-ot/)
+
+Sparsity-Constrained Optimal Transport.
+Liu, T., Puigcerver, J., & Blondel, M. (2023).
+Sparsity-constrained optimal transport.
+Proceedings of the Eleventh International Conference on
+Learning Representations (ICLR).
+https://arxiv.org/abs/2209.15466
+
+
 [17] Blondel, M., Seguy, V., & Rolet, A. (2018). Smooth and Sparse Optimal
 Transport. Proceedings of the Twenty-First International Conference on
 Artificial Intelligence and Statistics (AISTATS).
 
-Original code from https://github.com/mblondel/smooth-ot/
+[50] Liu, T., Puigcerver, J., & Blondel, M. (2023).
+Sparsity-constrained optimal transport.
+Proceedings of the Eleventh International Conference on
+Learning Representations (ICLR).
 
 """
 
 import numpy as np
 from scipy.optimize import minimize
 from .backend import get_backend
+import ot
 
 
 def projection_simplex(V, z=1, axis=None):
@@ -209,6 +224,39 @@ class SquaredL2(Regularization):
         return 0.5 * self.gamma * np.sum(T ** 2)
 
 
+class SparsityConstrained(Regularization):
+    """ Squared L2 regularization with sparsity constraints """
+
+    def __init__(self, max_nz, gamma=1.0):
+        self.max_nz = max_nz
+        self.gamma = gamma
+
+    def delta_Omega(self, X):
+        # For each column of X, find entries that are not among the top max_nz.
+        non_top_indices = np.argpartition(
+            -X, self.max_nz, axis=0)[self.max_nz:]
+        # Set these entries to -inf.
+        if X.ndim == 1:
+            X[non_top_indices] = 0.0
+        else:
+            X[non_top_indices, np.arange(X.shape[1])] = 0.0
+        max_X = np.maximum(X, 0)
+        val = np.sum(max_X ** 2, axis=0) / (2 * self.gamma)
+        G = max_X / self.gamma
+        return val, G
+
+    def max_Omega(self, X, b):
+        # Project the scaled X onto the simplex with sparsity constraint.
+        G = ot.utils.projection_sparse_simplex(
+            X / (b * self.gamma), self.max_nz, axis=0)
+        val = np.sum(X * G, axis=0)
+        val -= 0.5 * self.gamma * b * np.sum(G * G, axis=0)
+        return val, G
+
+    def Omega(self, T):
+        return 0.5 * self.gamma * np.sum(T ** 2)
+
+
 def dual_obj_grad(alpha, beta, a, b, C, regul):
     r"""
     Compute objective value and gradients of dual objective.
@@ -435,8 +483,9 @@ def get_plan_from_semi_dual(alpha, b, C, regul):
     return regul.max_Omega(X, b)[1] * b
 
 
-def smooth_ot_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=1e-9,
-                   numItermax=500, verbose=False, log=False):
+def smooth_ot_dual(a, b, M, reg, reg_type='l2',
+                   method="L-BFGS-B", stopThr=1e-9,
+                   numItermax=500, verbose=False, log=False, max_nz=None):
     r"""
     Solve the regularized OT problem in the dual and return the OT matrix
 
@@ -477,6 +526,9 @@ def smooth_ot_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=1e-9,
               :ref:`[2] <references-smooth-ot-dual>`)
 
             - 'l2' : Squared Euclidean regularization
+            - 'sparsity_constrained' : Sparsity-constrained regularization [50]
+    max_nz : int or None, optional. Used only in the case of reg_type = 'sparsity_constrained' to specify the maximum number of nonzeros per column of the optimal plan;
+        not used for other regularization types.
     method : str
         Solver to use for scipy.optimize.minimize
     numItermax : int, optional
@@ -504,6 +556,8 @@ def smooth_ot_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=1e-9,
 
     .. [17] Blondel, M., Seguy, V., & Rolet, A. (2018). Smooth and Sparse Optimal Transport. Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS).
 
+    .. [50] Liu, T., Puigcerver, J., & Blondel, M. (2023). Sparsity-constrained optimal transport. Proceedings of the Eleventh International Conference on Learning Representations (ICLR).
+
     See Also
     --------
     ot.lp.emd : Unregularized OT
@@ -518,6 +572,11 @@ def smooth_ot_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=1e-9,
         regul = SquaredL2(gamma=reg)
     elif reg_type.lower() in ['entropic', 'negentropy', 'kl']:
         regul = NegEntropy(gamma=reg)
+    elif reg_type.lower() in ['sparsity_constrained', 'sparsity-constrained']:
+        if not isinstance(max_nz, int):
+            raise ValueError(
+                f'max_nz {max_nz} must be an integer')
+        regul = SparsityConstrained(gamma=reg, max_nz=max_nz)
     else:
         raise NotImplementedError('Unknown regularization')
 
@@ -539,7 +598,8 @@ def smooth_ot_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=1e-9,
         return G
 
 
-def smooth_ot_semi_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=1e-9,
+def smooth_ot_semi_dual(a, b, M, reg, reg_type='l2', max_nz=None,
+                        method="L-BFGS-B", stopThr=1e-9,
                         numItermax=500, verbose=False, log=False):
     r"""
     Solve the regularized OT problem in the semi-dual and return the OT matrix
@@ -583,6 +643,9 @@ def smooth_ot_semi_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=
               :ref:`[2] <references-smooth-ot-semi-dual>`)
 
             - 'l2' : Squared Euclidean regularization
+            - 'sparsity_constrained' : Sparsity-constrained regularization [50]
+    max_nz : int or None, optional. Used only in the case of reg_type = 'sparsity_constrained' to specify the maximum number of nonzeros per column of the optimal plan;
+        not used for other regularization types.
     method : str
         Solver to use for scipy.optimize.minimize
     numItermax : int, optional
@@ -610,6 +673,8 @@ def smooth_ot_semi_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=
 
     .. [17] Blondel, M., Seguy, V., & Rolet, A. (2018). Smooth and Sparse Optimal Transport. Proceedings of the Twenty-First International Conference on Artificial Intelligence and Statistics (AISTATS).
 
+    .. [50] Liu, T., Puigcerver, J., & Blondel, M. (2023). Sparsity-constrained optimal transport. Proceedings of the Eleventh International Conference on Learning Representations (ICLR).
+
     See Also
     --------
     ot.lp.emd : Unregularized OT
@@ -621,6 +686,11 @@ def smooth_ot_semi_dual(a, b, M, reg, reg_type='l2', method="L-BFGS-B", stopThr=
         regul = SquaredL2(gamma=reg)
     elif reg_type.lower() in ['entropic', 'negentropy', 'kl']:
         regul = NegEntropy(gamma=reg)
+    elif reg_type.lower() in ['sparsity_constrained', 'sparsity-constrained']:
+        if not isinstance(max_nz, int):
+            raise ValueError(
+                f'max_nz {max_nz} must be an integer')
+        regul = SparsityConstrained(gamma=reg, max_nz=max_nz)
     else:
         raise NotImplementedError('Unknown regularization')
 
diff --git a/ot/utils.py b/ot/utils.py
index 3423a7e..3343028 100644
--- a/ot/utils.py
+++ b/ot/utils.py
@@ -15,7 +15,7 @@ from scipy.spatial.distance import cdist
 import sys
 import warnings
 from inspect import signature
-from .backend import get_backend, Backend, NumpyBackend
+from .backend import get_backend, Backend, NumpyBackend, JaxBackend
 
 __time_tic_toc = time.time()
 
@@ -117,6 +117,85 @@ def proj_simplex(v, z=1):
         return w
 
 
+def projection_sparse_simplex(V, max_nz, z=1, axis=None, nx=None):
+    r"""Projection of :math:`\mathbf{V}` onto the simplex with cardinality constraint (maximum number of non-zero elements) and then scaled by `z`.
+
+    .. math::
+        P\left(\mathbf{V}, max_nz, z\right) = \mathop{\arg \min}_{\substack{\mathbf{y} >= 0 \\ \sum_i \mathbf{y}_i = z} \\ ||p||_0 \le \text{max_nz}} \quad \|\mathbf{y} - \mathbf{V}\|^2
+
+    Parameters
+    ----------
+    V: 1-dim or 2-dim ndarray
+    z: float or array
+        If array, len(z) must be compatible with :math:`\mathbf{V}`
+    axis: None or int
+        - axis=None: project :math:`\mathbf{V}` by :math:`P(\mathbf{V}.\mathrm{ravel}(), max_nz, z)`
+        - axis=1: project each :math:`\mathbf{V}_i` by :math:`P(\mathbf{V}_i, max_nz, z_i)`
+        - axis=0: project each :math:`\mathbf{V}_{:, j}` by :math:`P(\mathbf{V}_{:, j}, max_nz, z_j)`
+
+    Returns
+    -------
+    projection: ndarray, shape :math:`\mathbf{V}`.shape
+
+    References:
+        Sparse projections onto the simplex
+        Anastasios Kyrillidis, Stephen Becker, Volkan Cevher and, Christoph Koch
+        ICML 2013
+        https://arxiv.org/abs/1206.1529
+    """
+    if nx is None:
+        nx = get_backend(V)
+    if V.ndim == 1:
+        return projection_sparse_simplex(
+            # V[nx.newaxis, :], max_nz, z, axis=1).ravel()
+            V[None, :], max_nz, z, axis=1).ravel()
+
+    if V.ndim > 2:
+        raise ValueError('V.ndim must be <= 2')
+
+    if axis == 1:
+        # For each row of V, find top max_nz values; arrange the
+        # corresponding column indices such that their values are
+        # in a descending order.
+        max_nz_indices = nx.argsort(V, axis=1)[:, -max_nz:]
+        max_nz_indices = nx.flip(max_nz_indices, axis=1)
+
+        row_indices = nx.arange(V.shape[0])
+        row_indices = row_indices.reshape(-1, 1)
+        print(row_indices.shape)
+        # Extract the top max_nz values for each row
+        # and then project to simplex.
+        U = V[row_indices, max_nz_indices]
+        z = nx.ones(len(U)) * z
+        cssv = nx.cumsum(U, axis=1) - z[:, None]
+        ind = nx.arange(max_nz) + 1
+        cond = U - cssv / ind > 0
+        # rho = nx.count_nonzero(cond, axis=1)
+        rho = nx.sum(cond, axis=1)
+        theta = cssv[nx.arange(len(U)), rho - 1] / rho
+        nz_projection = nx.maximum(U - theta[:, None], 0)
+
+        # Put the projection of max_nz_values to their original column indices
+        # while keeping other values zero.
+        sparse_projection = nx.zeros(V.shape, type_as=nz_projection)
+
+        if isinstance(nx, JaxBackend):
+            # in Jax, we need to use the `at` property of `jax.numpy.ndarray`
+            # to do in-place array modificatons.
+            sparse_projection = sparse_projection.at[
+                row_indices, max_nz_indices].set(nz_projection)
+        else:
+            sparse_projection[row_indices, max_nz_indices] = nz_projection
+        return sparse_projection
+
+    elif axis == 0:
+        return projection_sparse_simplex(V.T, max_nz, z, axis=1).T
+
+    else:
+        V = V.ravel().reshape(1, -1)
+        return projection_sparse_simplex(V, max_nz, z, axis=1).ravel()
+
+
 def unif(n, type_as=None):
     r"""
     Return a uniform histogram of length `n` (simplex).
-- 
cgit v1.2.3