diff options
author | Nathan Cassereau <84033440+ncassereau-idris@users.noreply.github.com> | 2022-03-24 10:53:47 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-03-24 10:53:47 +0100 |
commit | 767171593f2a98a26b9a39bf110a45085e3b982e (patch) | |
tree | 4eb4bcc657efc53a65c3fb4439bd0e0e106b6745 /ot/lp/__init__.py | |
parent | 9b9d2221d257f40ea3eb58b279b30d69162d62bb (diff) |
[MRG] Domain adaptation and unbalanced solvers with backend support (#343)
* First draft
* Add matrix inverse and square root to backend
* Eigen decomposition for older versions of pytorch (1.8.1 and older)
* Corrected eigen decomposition for pytorch 1.8.1 and older
* Spectral theorem is a thing
* Optimization
* small optimization
* More functions converted
* pep8
* remove a warning and prepare torch meshgrid for future torch release (which will change default indexing)
* dots and pep8
* Meshgrid corrected for older version and prepared for future versions changes
* New backend functions
* Base transport
* LinearTransport
* All transport classes + pep8
* PR added to release file
* Jcpot barycenter test
* unbalanced with backend
* pep8
* bug solve
* test of domain adaptation with backends
* solve bug for tic toc & macos
* solving scipy deprecation warning
* solving scipy deprecation warning attempt2
* solving scipy deprecation warning attempt3
* A warning is triggered when a float->int conversion is detected
* bug solve
* docs
* release file updated
* Better handling of float->int conversion in EMD
* Corrected test for is_floating_point
* docs
* release file updated
* cupy does not allow implicit cast
* fromnumpy
* added test
* test da tf jax
* test unbalanced with no provided histogram
* using type_as argument in unif function correctly
* pep8
* transport plan cast in emd changed behaviour, now trying to cast as histogram's dtype, defaulting to cost matrix
Co-authored-by: RĂ©mi Flamary <remi.flamary@gmail.com>
Diffstat (limited to 'ot/lp/__init__.py')
-rw-r--r-- | ot/lp/__init__.py | 83 |
1 files changed, 65 insertions, 18 deletions
diff --git a/ot/lp/__init__.py b/ot/lp/__init__.py index d9b6fa9..abf7fe0 100644 --- a/ot/lp/__init__.py +++ b/ot/lp/__init__.py @@ -225,6 +225,13 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True, numThreads=1): from all compatible backends. But the algorithm uses the C++ CPU backend which can lead to copy overhead on GPU arrays. + .. note:: This function will cast the computed transport plan to the data type + of the provided input with the following priority: :math:`\mathbf{a}`, + then :math:`\mathbf{b}`, then :math:`\mathbf{M}` if marginals are not provided. + Casting to an integer tensor might result in a loss of precision. + If this behaviour is unwanted, please make sure to provide a + floating point input. + Uses the algorithm proposed in :ref:`[1] <references-emd>`. Parameters @@ -290,12 +297,16 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True, numThreads=1): a, b, M = list_to_array(a, b, M) a0, b0, M0 = a, b, M + if len(a0) != 0: + type_as = a0 + elif len(b0) != 0: + type_as = b0 + else: + type_as = M0 nx = get_backend(M0, a0, b0) # convert to numpy - M = nx.to_numpy(M) - a = nx.to_numpy(a) - b = nx.to_numpy(b) + M, a, b = nx.to_numpy(M, a, b) # ensure float64 a = np.asarray(a, dtype=np.float64) @@ -330,15 +341,23 @@ def emd(a, b, M, numItermax=100000, log=False, center_dual=True, numThreads=1): u, v = estimate_dual_null_weights(u, v, a, b, M) result_code_string = check_result(result_code) + if not nx.is_floating_point(type_as): + warnings.warn( + "Input histogram consists of integer. The transport plan will be " + "casted accordingly, possibly resulting in a loss of precision. " + "If this behaviour is unwanted, please make sure your input " + "histogram consists of floating point elements.", + stacklevel=2 + ) if log: log = {} log['cost'] = cost - log['u'] = nx.from_numpy(u, type_as=a0) - log['v'] = nx.from_numpy(v, type_as=b0) + log['u'] = nx.from_numpy(u, type_as=type_as) + log['v'] = nx.from_numpy(v, type_as=type_as) log['warning'] = result_code_string log['result_code'] = result_code - return nx.from_numpy(G, type_as=M0), log - return nx.from_numpy(G, type_as=M0) + return nx.from_numpy(G, type_as=type_as), log + return nx.from_numpy(G, type_as=type_as) def emd2(a, b, M, processes=1, @@ -364,6 +383,14 @@ def emd2(a, b, M, processes=1, from all compatible backends. But the algorithm uses the C++ CPU backend which can lead to copy overhead on GPU arrays. + .. note:: This function will cast the computed transport plan and + transportation loss to the data type of the provided input with the + following priority: :math:`\mathbf{a}`, then :math:`\mathbf{b}`, + then :math:`\mathbf{M}` if marginals are not provided. + Casting to an integer tensor might result in a loss of precision. + If this behaviour is unwanted, please make sure to provide a + floating point input. + Uses the algorithm proposed in :ref:`[1] <references-emd2>`. Parameters @@ -432,12 +459,16 @@ def emd2(a, b, M, processes=1, a, b, M = list_to_array(a, b, M) a0, b0, M0 = a, b, M + if len(a0) != 0: + type_as = a0 + elif len(b0) != 0: + type_as = b0 + else: + type_as = M0 nx = get_backend(M0, a0, b0) # convert to numpy - M = nx.to_numpy(M) - a = nx.to_numpy(a) - b = nx.to_numpy(b) + M, a, b = nx.to_numpy(M, a, b) a = np.asarray(a, dtype=np.float64) b = np.asarray(b, dtype=np.float64) @@ -470,14 +501,22 @@ def emd2(a, b, M, processes=1, result_code_string = check_result(result_code) log = {} - G = nx.from_numpy(G, type_as=M0) + if not nx.is_floating_point(type_as): + warnings.warn( + "Input histogram consists of integer. The transport plan will be " + "casted accordingly, possibly resulting in a loss of precision. " + "If this behaviour is unwanted, please make sure your input " + "histogram consists of floating point elements.", + stacklevel=2 + ) + G = nx.from_numpy(G, type_as=type_as) if return_matrix: log['G'] = G - log['u'] = nx.from_numpy(u, type_as=a0) - log['v'] = nx.from_numpy(v, type_as=b0) + log['u'] = nx.from_numpy(u, type_as=type_as) + log['v'] = nx.from_numpy(v, type_as=type_as) log['warning'] = result_code_string log['result_code'] = result_code - cost = nx.set_gradients(nx.from_numpy(cost, type_as=M0), + cost = nx.set_gradients(nx.from_numpy(cost, type_as=type_as), (a0, b0, M0), (log['u'], log['v'], G)) return [cost, log] else: @@ -491,10 +530,18 @@ def emd2(a, b, M, processes=1, if np.any(~asel) or np.any(~bsel): u, v = estimate_dual_null_weights(u, v, a, b, M) - G = nx.from_numpy(G, type_as=M0) - cost = nx.set_gradients(nx.from_numpy(cost, type_as=M0), - (a0, b0, M0), (nx.from_numpy(u, type_as=a0), - nx.from_numpy(v, type_as=b0), G)) + if not nx.is_floating_point(type_as): + warnings.warn( + "Input histogram consists of integer. The transport plan will be " + "casted accordingly, possibly resulting in a loss of precision. " + "If this behaviour is unwanted, please make sure your input " + "histogram consists of floating point elements.", + stacklevel=2 + ) + G = nx.from_numpy(G, type_as=type_as) + cost = nx.set_gradients(nx.from_numpy(cost, type_as=type_as), + (a0, b0, M0), (nx.from_numpy(u, type_as=type_as), + nx.from_numpy(v, type_as=type_as), G)) check_result(result_code) return cost |