diff options
author | Nathan Cassereau <84033440+ncassereau-idris@users.noreply.github.com> | 2021-12-09 17:55:12 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-12-09 17:55:12 +0100 |
commit | f8d871e8c6f15009f559ece6a12eb8d8891c60fb (patch) | |
tree | 9aa46b2fcc8046c6cddd8e9159a6f607dcf0e1e9 /benchmarks/benchmark.py | |
parent | b3dc68feac355fa94c4237f4ecad65edc9f7a7e8 (diff) |
[MRG] Tensorflow backend & Benchmarker & Myst_parser (#316)
* First batch of tf methods (to be continued)
* Second batch of method (yet to debug)
* tensorflow for cpu
* add tf requirement
* pep8 + bug
* small changes
* attempt to solve pymanopt bug with tf2
* attempt #2
* attempt #3
* attempt 4
* docstring
* correct pep8 violation introduced in merge conflicts resolution
* attempt 5
* attempt 6
* just a random try
* Revert "just a random try"
This reverts commit 8223e768bfe33635549fb66cca2267514a60ebbf.
* GPU tests for tensorflow
* pep8
* attempt to solve issue with m2r2
* Remove transpose backend method
* first draft of benchmarker (need to correct time measurement)
* prettier bench table
* Bitsize and prettier device methods
* prettified table bench
* Bug corrected (results were mixed up in the final table)
* Better perf counter (for GPU support)
* pep8
* EMD bench
* solve bug if no GPU available
* pep8
* warning about tensorflow numpy api being required in the backend.py docstring
* Bug solve in backend docstring
* not covering code which requires a GPU
* Tensorflow gradients manipulation tested
* Number of warmup runs is now customizable
* typo
* Remove some warnings while building docs
* Change prettier_device to device_type in backend
* Correct JAX mistakes preventing to see the CPU if a GPU is present
* Attempt to solve JAX bug in case no GPU is found
* Reworked benchmarks order and results storage & clear GPU after usage by benchmark
* Add bench to backend docstring
* better benchs
* remove useless stuff
* Better device_type
* Now using MYST_PARSER and solving links issue in the README.md / online docs
Diffstat (limited to 'benchmarks/benchmark.py')
-rw-r--r-- | benchmarks/benchmark.py | 105 |
1 files changed, 105 insertions, 0 deletions
diff --git a/benchmarks/benchmark.py b/benchmarks/benchmark.py new file mode 100644 index 0000000..7973c6b --- /dev/null +++ b/benchmarks/benchmark.py @@ -0,0 +1,105 @@ +# /usr/bin/env python3 +# -*- coding: utf-8 -*- + +from ot.backend import get_backend_list, jax, tf +import gc + + +def setup_backends(): + if jax: + from jax.config import config + config.update("jax_enable_x64", True) + + if tf: + from tensorflow.python.ops.numpy_ops import np_config + np_config.enable_numpy_behavior() + + +def exec_bench(setup, tested_function, param_list, n_runs, warmup_runs): + backend_list = get_backend_list() + for i, nx in enumerate(backend_list): + if nx.__name__ == "tf" and i < len(backend_list) - 1: + # Tensorflow should be the last one to be benchmarked because + # as far as I'm aware, there is no way to force it to release + # GPU memory. Hence, if any other backend is benchmarked after + # Tensorflow and requires the usage of a GPU, it will not have the + # full memory available and you may have a GPU Out Of Memory error + # even though your GPU can technically hold your tensors in memory. + backend_list.pop(i) + backend_list.append(nx) + break + + inputs = [setup(param) for param in param_list] + results = dict() + for nx in backend_list: + for i in range(len(param_list)): + print(nx, param_list[i]) + args = inputs[i] + results_nx = nx._bench( + tested_function, + *args, + n_runs=n_runs, + warmup_runs=warmup_runs + ) + gc.collect() + results_nx_with_param_in_key = dict() + for key in results_nx: + new_key = (param_list[i], *key) + results_nx_with_param_in_key[new_key] = results_nx[key] + results.update(results_nx_with_param_in_key) + return results + + +def convert_to_html_table(results, param_name, main_title=None, comments=None): + string = "<table>\n" + keys = list(results.keys()) + params, names, devices, bitsizes = zip(*keys) + + devices_names = sorted(list(set(zip(devices, names)))) + params = sorted(list(set(params))) + bitsizes = sorted(list(set(bitsizes))) + length = len(devices_names) + 1 + cpus_cols = list(devices).count("CPU") / len(bitsizes) / len(params) + gpus_cols = list(devices).count("GPU") / len(bitsizes) / len(params) + assert cpus_cols + gpus_cols == len(devices_names) + + if main_title is not None: + string += f'<tr><th align="center" colspan="{length}">{str(main_title)}</th></tr>\n' + + for i, bitsize in enumerate(bitsizes): + + if i != 0: + string += f'<tr><td colspan="{length}"> </td></tr>\n' + + # make bitsize header + text = f"{bitsize} bits" + if comments is not None: + text += " - " + if isinstance(comments, (tuple, list)) and len(comments) == len(bitsizes): + text += str(comments[i]) + else: + text += str(comments) + string += f'<tr><th align="center">Bitsize</th>' + string += f'<th align="center" colspan="{length - 1}">{text}</th></tr>\n' + + # make device header + string += f'<tr><th align="center">Device</th>' + string += f'<th align="center" colspan="{cpus_cols}">CPU</th>' + string += f'<th align="center" colspan="{gpus_cols}">GPU</th></tr>\n' + + # make param_name / backend header + string += f'<tr><th align="center">{param_name}</th>' + for device, name in devices_names: + string += f'<th align="center">{name}</th>' + string += "</tr>\n" + + # make results rows + for param in params: + string += f'<tr><td align="center">{param}</td>' + for device, name in devices_names: + key = (param, name, device, bitsize) + string += f'<td align="center">{results[key]:.4f}</td>' + string += "</tr>\n" + + string += "</table>" + return string |