[MRG] Tensorflow backend & Benchmarker & Myst_parser (#316)

* First batch of tf methods (to be continued) * Second batch of method (yet to debug) * tensorflow for cpu * add tf requirement * pep8 + bug * small changes * attempt to solve pymanopt bug with tf2 * attempt #2 * attempt #3 * attempt 4 * docstring * correct pep8 violation introduced in merge conflicts resolution * attempt 5 * attempt 6 * just a random try * Revert "just a random try" This reverts commit 8223e768bfe33635549fb66cca2267514a60ebbf. * GPU tests for tensorflow * pep8 * attempt to solve issue with m2r2 * Remove transpose backend method * first draft of benchmarker (need to correct time measurement) * prettier bench table * Bitsize and prettier device methods * prettified table bench * Bug corrected (results were mixed up in the final table) * Better perf counter (for GPU support) * pep8 * EMD bench * solve bug if no GPU available * pep8 * warning about tensorflow numpy api being required in the backend.py docstring * Bug solve in backend docstring * not covering code which requires a GPU * Tensorflow gradients manipulation tested * Number of warmup runs is now customizable * typo * Remove some warnings while building docs * Change prettier_device to device_type in backend * Correct JAX mistakes preventing to see the CPU if a GPU is present * Attempt to solve JAX bug in case no GPU is found * Reworked benchmarks order and results storage & clear GPU after usage by benchmark * Add bench to backend docstring * better benchs * remove useless stuff * Better device_type * Now using MYST_PARSER and solving links issue in the README.md / online docs
author: Nathan Cassereau <84033440+ncassereau-idris@users.noreply.github.com> 2021-12-09 17:55:12 +0100
committer: GitHub <noreply@github.com> 2021-12-09 17:55:12 +0100
commit: f8d871e8c6f15009f559ece6a12eb8d8891c60fb (patch)
tree: 9aa46b2fcc8046c6cddd8e9159a6f607dcf0e1e9 /benchmarks
parent: b3dc68feac355fa94c4237f4ecad65edc9f7a7e8 (diff)
4 files changed, 192 insertions, 0 deletions
diff --git a/benchmarks/__init__.py b/benchmarks/__init__.py
new file mode 100644
index 0000000..37f5e56
--- /dev/null
+++ b/benchmarks/__init__.py
@@ -0,0 +1,5 @@
+from . import benchmark
+from . import sinkhorn_knopp
+from . import emd
+
+__all__= ["benchmark", "sinkhorn_knopp", "emd"]
diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py
new file mode 100644
index 0000000..7973c6b
--- /dev/null
+++ b/benchmarks/benchmark.py
@@ -0,0 +1,105 @@
+# /usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+from ot.backend import get_backend_list, jax, tf
+import gc
+
+
+def setup_backends():
+    if jax:
+        from jax.config import config
+        config.update("jax_enable_x64", True)
+
+    if tf:
+        from tensorflow.python.ops.numpy_ops import np_config
+        np_config.enable_numpy_behavior()
+
+
+def exec_bench(setup, tested_function, param_list, n_runs, warmup_runs):
+    backend_list = get_backend_list()
+    for i, nx in enumerate(backend_list):
+        if nx.__name__ == "tf" and i < len(backend_list) - 1:
+            # Tensorflow should be the last one to be benchmarked because
+            # as far as I'm aware, there is no way to force it to release
+            # GPU memory. Hence, if any other backend is benchmarked after
+            # Tensorflow and requires the usage of a GPU, it will not have the
+            # full memory available and you may have a GPU Out Of Memory error
+            # even though your GPU can technically hold your tensors in memory.
+            backend_list.pop(i)
+            backend_list.append(nx)
+            break
+
+    inputs = [setup(param) for param in param_list]
+    results = dict()
+    for nx in backend_list:
+        for i in range(len(param_list)):
+            print(nx, param_list[i])
+            args = inputs[i]
+            results_nx = nx._bench(
+                tested_function,
+                *args,
+                n_runs=n_runs,
+                warmup_runs=warmup_runs
+            )
+            gc.collect()
+            results_nx_with_param_in_key = dict()
+            for key in results_nx:
+                new_key = (param_list[i], *key)
+                results_nx_with_param_in_key[new_key] = results_nx[key]
+            results.update(results_nx_with_param_in_key)
+    return results
+
+
+def convert_to_html_table(results, param_name, main_title=None, comments=None):
+    string = "<table>\n"
+    keys = list(results.keys())
+    params, names, devices, bitsizes = zip(*keys)
+
+    devices_names = sorted(list(set(zip(devices, names))))
+    params = sorted(list(set(params)))
+    bitsizes = sorted(list(set(bitsizes)))
+    length = len(devices_names) + 1
+    cpus_cols = list(devices).count("CPU") / len(bitsizes) / len(params)
+    gpus_cols = list(devices).count("GPU") / len(bitsizes) / len(params)
+    assert cpus_cols + gpus_cols == len(devices_names)
+
+    if main_title is not None:
+        string += f'<tr><th align="center" colspan="{length}">{str(main_title)}</th></tr>\n'
+
+    for i, bitsize in enumerate(bitsizes):
+
+        if i != 0:
+            string += f'<tr><td colspan="{length}">&nbsp;</td></tr>\n'
+
+        # make bitsize header
+        text = f"{bitsize} bits"
+        if comments is not None:
+            text += " - "
+            if isinstance(comments, (tuple, list)) and len(comments) == len(bitsizes):
+                text += str(comments[i])
+            else:
+                text += str(comments)
+        string += f'<tr><th align="center">Bitsize</th>'
+        string += f'<th align="center" colspan="{length - 1}">{text}</th></tr>\n'
+
+        # make device header
+        string += f'<tr><th align="center">Device</th>'
+        string += f'<th align="center" colspan="{cpus_cols}">CPU</th>'
+        string += f'<th align="center" colspan="{gpus_cols}">GPU</th></tr>\n'
+
+        # make param_name / backend header
+        string += f'<tr><th align="center">{param_name}</th>'
+        for device, name in devices_names:
+            string += f'<th align="center">{name}</th>'
+        string += "</tr>\n"
+
+        # make results rows
+        for param in params:
+            string += f'<tr><td align="center">{param}</td>'
+            for device, name in devices_names:
+                key = (param, name, device, bitsize)
+                string += f'<td align="center">{results[key]:.4f}</td>'
+            string += "</tr>\n"
+
+    string += "</table>"
+    return string
diff --git a/benchmarks/emd.py b/benchmarks/emd.py
new file mode 100644
index 0000000..9f64863
--- /dev/null
+++ b/benchmarks/emd.py
@@ -0,0 +1,40 @@
+# /usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import ot
+from .benchmark import (
+    setup_backends,
+    exec_bench,
+    convert_to_html_table
+)
+
+
+def setup(n_samples):
+    rng = np.random.RandomState(789465132)
+    x = rng.randn(n_samples, 2)
+    y = rng.randn(n_samples, 2)
+
+    a = ot.utils.unif(n_samples)
+    M = ot.dist(x, y)
+    return a, M
+
+
+if __name__ == "__main__":
+    n_runs = 100
+    warmup_runs = 10
+    param_list = [50, 100, 500, 1000, 2000, 5000]
+
+    setup_backends()
+    results = exec_bench(
+        setup=setup,
+        tested_function=lambda a, M: ot.emd(a, a, M),
+        param_list=param_list,
+        n_runs=n_runs,
+        warmup_runs=warmup_runs
+    )
+    print(convert_to_html_table(
+        results, 
+        param_name="Sample size",
+        main_title=f"EMD - Averaged on {n_runs} runs"
+    ))
diff --git a/benchmarks/sinkhorn_knopp.py b/benchmarks/sinkhorn_knopp.py
new file mode 100644
index 0000000..3a1ef3f
--- /dev/null
+++ b/benchmarks/sinkhorn_knopp.py
@@ -0,0 +1,42 @@
+# /usr/bin/env python3
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import ot
+from .benchmark import (
+    setup_backends,
+    exec_bench,
+    convert_to_html_table
+)
+
+
+def setup(n_samples):
+    rng = np.random.RandomState(123456789)
+    a = rng.rand(n_samples // 4, 100)
+    b = rng.rand(n_samples, 100)
+
+    wa = ot.unif(n_samples // 4)
+    wb = ot.unif(n_samples)
+
+    M = ot.dist(a.copy(), b.copy())
+    return wa, wb, M
+
+
+if __name__ == "__main__":
+    n_runs = 100
+    warmup_runs = 10
+    param_list = [50, 100, 500, 1000, 2000, 5000]
+
+    setup_backends()
+    results = exec_bench(
+        setup=setup,
+        tested_function=lambda *args: ot.bregman.sinkhorn(*args, reg=1, stopThr=1e-7),
+        param_list=param_list,
+        n_runs=n_runs,
+        warmup_runs=warmup_runs
+    )
+    print(convert_to_html_table(
+        results, 
+        param_name="Sample size",
+        main_title=f"Sinkhorn Knopp - Averaged on {n_runs} runs"
+    ))
author	Nathan Cassereau <84033440+ncassereau-idris@users.noreply.github.com>	2021-12-09 17:55:12 +0100
committer	GitHub <noreply@github.com>	2021-12-09 17:55:12 +0100
commit	f8d871e8c6f15009f559ece6a12eb8d8891c60fb (patch)
tree	9aa46b2fcc8046c6cddd8e9159a6f607dcf0e1e9 /benchmarks
parent	b3dc68feac355fa94c4237f4ecad65edc9f7a7e8 (diff)