[MRG] Domain adaptation and unbalanced solvers with backend support (#343)

* First draft * Add matrix inverse and square root to backend * Eigen decomposition for older versions of pytorch (1.8.1 and older) * Corrected eigen decomposition for pytorch 1.8.1 and older * Spectral theorem is a thing * Optimization * small optimization * More functions converted * pep8 * remove a warning and prepare torch meshgrid for future torch release (which will change default indexing) * dots and pep8 * Meshgrid corrected for older version and prepared for future versions changes * New backend functions * Base transport * LinearTransport * All transport classes + pep8 * PR added to release file * Jcpot barycenter test * unbalanced with backend * pep8 * bug solve * test of domain adaptation with backends * solve bug for tic toc & macos * solving scipy deprecation warning * solving scipy deprecation warning attempt2 * solving scipy deprecation warning attempt3 * A warning is triggered when a float->int conversion is detected * bug solve * docs * release file updated * Better handling of float->int conversion in EMD * Corrected test for is_floating_point * docs * release file updated * cupy does not allow implicit cast * fromnumpy * added test * test da tf jax * test unbalanced with no provided histogram * using type_as argument in unif function correctly * pep8 * transport plan cast in emd changed behaviour, now trying to cast as histogram's dtype, defaulting to cost matrix Co-authored-by: Rémi Flamary <remi.flamary@gmail.com>
author: Nathan Cassereau <84033440+ncassereau-idris@users.noreply.github.com> 2022-03-24 10:53:47 +0100
committer: GitHub <noreply@github.com> 2022-03-24 10:53:47 +0100
commit: 767171593f2a98a26b9a39bf110a45085e3b982e (patch)
tree: 4eb4bcc657efc53a65c3fb4439bd0e0e106b6745 /ot/lp/__init__.py
parent: 9b9d2221d257f40ea3eb58b279b30d69162d62bb (diff)
1 files changed, 65 insertions, 18 deletions
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py
index d9b6fa9..abf7fe0 100644
--- a/ot/lp/__init__.py
+++ b/ot/lp/__init__.py
@@ -225,6 +225,13 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True, numThreads=1):
         from all compatible backends. But the algorithm uses the C++ CPU backend
         which can lead to copy overhead on GPU arrays.
 
+    .. note:: This function will cast the computed transport plan to the data type
+        of the provided input with the following priority: :math:`\mathbf{a}`,
+        then :math:`\mathbf{b}`, then :math:`\mathbf{M}` if marginals are not provided.
+        Casting to an integer tensor might result in a loss of precision.
+        If this behaviour is unwanted, please make sure to provide a
+        floating point input.
+
     Uses the algorithm proposed in :ref:`[1] <references-emd>`.
 
     Parameters
@@ -290,12 +297,16 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True, numThreads=1):
     a, b, M = list_to_array(a, b, M)
 
     a0, b0, M0 = a, b, M
+    if len(a0) != 0:
+        type_as = a0
+    elif len(b0) != 0:
+        type_as = b0
+    else:
+        type_as = M0
     nx = get_backend(M0, a0, b0)
 
     # convert to numpy
-    M = nx.to_numpy(M)
-    a = nx.to_numpy(a)
-    b = nx.to_numpy(b)
+    M, a, b = nx.to_numpy(M, a, b)
 
     # ensure float64
     a = np.asarray(a, dtype=np.float64)
@@ -330,15 +341,23 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True, numThreads=1):
         u, v = estimate_dual_null_weights(u, v, a, b, M)
 
     result_code_string = check_result(result_code)
+    if not nx.is_floating_point(type_as):
+        warnings.warn(
+            "Input histogram consists of integer. The transport plan will be "
+            "casted accordingly, possibly resulting in a loss of precision. "
+            "If this behaviour is unwanted, please make sure your input "
+            "histogram consists of floating point elements.",
+            stacklevel=2
+        )
     if log:
         log = {}
         log['cost'] = cost
-        log['u'] = nx.from_numpy(u, type_as=a0)
-        log['v'] = nx.from_numpy(v, type_as=b0)
+        log['u'] = nx.from_numpy(u, type_as=type_as)
+        log['v'] = nx.from_numpy(v, type_as=type_as)
         log['warning'] = result_code_string
         log['result_code'] = result_code
-        return nx.from_numpy(G, type_as=M0), log
-    return nx.from_numpy(G, type_as=M0)
+        return nx.from_numpy(G, type_as=type_as), log
+    return nx.from_numpy(G, type_as=type_as)
 
 
 def emd2(a, b, M, processes=1,
@@ -364,6 +383,14 @@ def emd2(a, b, M, processes=1,
         from all compatible backends. But the algorithm uses the C++ CPU backend
         which can lead to copy overhead on GPU arrays.
 
+    .. note:: This function will cast the computed transport plan and
+        transportation loss to the data type of the provided input with the
+        following priority: :math:`\mathbf{a}`, then :math:`\mathbf{b}`,
+        then :math:`\mathbf{M}` if marginals are not provided.
+        Casting to an integer tensor might result in a loss of precision.
+        If this behaviour is unwanted, please make sure to provide a
+        floating point input.
+
     Uses the algorithm proposed in :ref:`[1] <references-emd2>`.
 
     Parameters
@@ -432,12 +459,16 @@ def emd2(a, b, M, processes=1,
     a, b, M = list_to_array(a, b, M)
 
     a0, b0, M0 = a, b, M
+    if len(a0) != 0:
+        type_as = a0
+    elif len(b0) != 0:
+        type_as = b0
+    else:
+        type_as = M0
     nx = get_backend(M0, a0, b0)
 
     # convert to numpy
-    M = nx.to_numpy(M)
-    a = nx.to_numpy(a)
-    b = nx.to_numpy(b)
+    M, a, b = nx.to_numpy(M, a, b)
 
     a = np.asarray(a, dtype=np.float64)
     b = np.asarray(b, dtype=np.float64)
@@ -470,14 +501,22 @@ def emd2(a, b, M, processes=1,
 
             result_code_string = check_result(result_code)
             log = {}
-            G = nx.from_numpy(G, type_as=M0)
+            if not nx.is_floating_point(type_as):
+                warnings.warn(
+                    "Input histogram consists of integer. The transport plan will be "
+                    "casted accordingly, possibly resulting in a loss of precision. "
+                    "If this behaviour is unwanted, please make sure your input "
+                    "histogram consists of floating point elements.",
+                    stacklevel=2
+                )
+            G = nx.from_numpy(G, type_as=type_as)
             if return_matrix:
                 log['G'] = G
-            log['u'] = nx.from_numpy(u, type_as=a0)
-            log['v'] = nx.from_numpy(v, type_as=b0)
+            log['u'] = nx.from_numpy(u, type_as=type_as)
+            log['v'] = nx.from_numpy(v, type_as=type_as)
             log['warning'] = result_code_string
             log['result_code'] = result_code
-            cost = nx.set_gradients(nx.from_numpy(cost, type_as=M0),
+            cost = nx.set_gradients(nx.from_numpy(cost, type_as=type_as),
                                     (a0, b0, M0), (log['u'], log['v'], G))
             return [cost, log]
     else:
@@ -491,10 +530,18 @@ def emd2(a, b, M, processes=1,
             if np.any(~asel) or np.any(~bsel):
                 u, v = estimate_dual_null_weights(u, v, a, b, M)
 
-            G = nx.from_numpy(G, type_as=M0)
-            cost = nx.set_gradients(nx.from_numpy(cost, type_as=M0),
-                                    (a0, b0, M0), (nx.from_numpy(u, type_as=a0),
-                                                   nx.from_numpy(v, type_as=b0), G))
+            if not nx.is_floating_point(type_as):
+                warnings.warn(
+                    "Input histogram consists of integer. The transport plan will be "
+                    "casted accordingly, possibly resulting in a loss of precision. "
+                    "If this behaviour is unwanted, please make sure your input "
+                    "histogram consists of floating point elements.",
+                    stacklevel=2
+                )
+            G = nx.from_numpy(G, type_as=type_as)
+            cost = nx.set_gradients(nx.from_numpy(cost, type_as=type_as),
+                                    (a0, b0, M0), (nx.from_numpy(u, type_as=type_as),
+                                                   nx.from_numpy(v, type_as=type_as), G))
 
             check_result(result_code)
             return cost
author	Nathan Cassereau <84033440+ncassereau-idris@users.noreply.github.com>	2022-03-24 10:53:47 +0100
committer	GitHub <noreply@github.com>	2022-03-24 10:53:47 +0100
commit	767171593f2a98a26b9a39bf110a45085e3b982e (patch)
tree	4eb4bcc657efc53a65c3fb4439bd0e0e106b6745 /ot/lp/__init__.py
parent	9b9d2221d257f40ea3eb58b279b30d69162d62bb (diff)