summaryrefslogtreecommitdiff
path: root/scripts/benchmark
diff options
context:
space:
mode:
Diffstat (limited to 'scripts/benchmark')
-rw-r--r--scripts/benchmark/benchmark.py135
-rw-r--r--scripts/benchmark/plot.py76
-rw-r--r--scripts/benchmark/settings.py321
-rw-r--r--scripts/benchmark/utils.py66
4 files changed, 598 insertions, 0 deletions
diff --git a/scripts/benchmark/benchmark.py b/scripts/benchmark/benchmark.py
new file mode 100644
index 00000000..1574fdc4
--- /dev/null
+++ b/scripts/benchmark/benchmark.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This file follows the
+# PEP8 Python style guide and uses a max-width of 120 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+
+import argparse
+import json
+import os
+import sys
+
+import settings
+import plot
+import utils
+
+EXPERIMENTS = {
+ "axpy": settings.AXPY,
+ "gemv": settings.GEMV,
+ "gemm": settings.GEMM,
+ "gemm_small": settings.GEMM_SMALL,
+ "symm": settings.SYMM,
+ "syrk": settings.SYRK,
+ "summary": settings.SUMMARY,
+}
+
+
+def run_benchmark(name, arguments_list, precision, num_runs, platform, device):
+ binary = "./clblast_client_x" + name
+
+ # Loops over sub-benchmarks per benchmark
+ results = []
+ for arguments in arguments_list:
+
+ # Sets the arguments
+ constant_arguments = ["-warm_up", "-q", "-no_abbrv", "-cblas 0"]
+ common_arguments = ["-precision %d" % precision, "-runs %d" % num_runs]
+ opencl_arguments = ["-platform %s" % platform, "-device %s" % device]
+ all_arguments = opencl_arguments + common_arguments + constant_arguments
+ for name, value in arguments.items():
+ all_arguments.append("-" + name + " " + str(value))
+
+ # Calls the binary and parses the results
+ benchmark_output = utils.run_binary(binary, all_arguments)
+ result = utils.parse_results(benchmark_output)
+
+ # For half-precision: also runs single-precision for comparison
+ if precision == 16:
+ all_arguments = [arg if arg != "-precision 16" else "-precision 32" for arg in all_arguments]
+ benchmark_output = utils.run_binary(binary, all_arguments)
+ result_extra = utils.parse_results(benchmark_output)
+ for index in range(len(min(result, result_extra))):
+ result[index]["GBs_1_FP32"] = result_extra[index]["GBs_1"]
+ result[index]["GBs_2"] = result_extra[index]["GBs_2"]
+ result[index]["GFLOPS_1_FP32"] = result_extra[index]["GFLOPS_1"]
+ result[index]["GFLOPS_2"] = result_extra[index]["GFLOPS_2"]
+
+ results.extend(result)
+ return results
+
+
+def main(argv):
+
+ # Parses the command-line arguments
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-b", "--benchmark", help="The benchmark to perform (choose from %s)" % EXPERIMENTS.keys())
+ parser.add_argument("-p", "--platform", help="The ID of the OpenCL platform to test on")
+ parser.add_argument("-d", "--device", help="The ID of the OpenCL device to test on")
+ parser.add_argument("-n", "--num_runs", type=int, default=10, help="The number of benchmark repeats for averaging")
+ parser.add_argument("-x", "--precision", type=int, default=32,
+ help="The precision to test for (choose from 16, 32, 64, 3232, 6464")
+ parser.add_argument("-l", "--load_from_disk", action="store_true", help="Increase verbosity of the script")
+ parser.add_argument("-t", "--plot_title", default=None, help="The title for the plots, defaults to benchmark name")
+ parser.add_argument("-v", "--verbose", action="store_true", help="Increase verbosity of the script")
+ cl_args = parser.parse_args(argv)
+
+ # The benchmark name and plot title
+ benchmark_name = utils.precision_to_letter(cl_args.precision) + cl_args.benchmark.upper()
+ if cl_args.plot_title is None:
+ cl_args.plot_title = benchmark_name
+
+ # Retrieves the benchmark settings
+ if cl_args.benchmark not in EXPERIMENTS.keys():
+ print("[benchmark] Invalid benchmark '%s', choose from %s" % (cl_args.benchmark, EXPERIMENTS.keys()))
+ return
+ experiment = EXPERIMENTS[cl_args.benchmark]
+ benchmarks = experiment["benchmarks"]
+
+ # Either run the benchmarks for this experiment or load old results from disk
+ json_file_name = benchmark_name.lower() + "_benchmarks.json"
+ if cl_args.load_from_disk and os.path.isfile(json_file_name):
+ print("[benchmark] Loading previous benchmark results from '" + json_file_name + "'")
+ with open(json_file_name) as f:
+ results = json.load(f)
+ else:
+
+ # Runs all the individual benchmarks
+ print("[benchmark] Running %d benchmarks for settings '%s'" % (len(benchmarks), cl_args.benchmark))
+ results = {"label_names": experiment["label_names"], "num_rows": experiment["num_rows"],
+ "num_cols": experiment["num_cols"], "benchmarks": []}
+ for benchmark in benchmarks:
+ result = run_benchmark(benchmark["name"], benchmark["arguments"], cl_args.precision, cl_args.num_runs,
+ cl_args.platform, cl_args.device)
+ results["benchmarks"].append(result)
+
+ # Stores the results to disk
+ print("[benchmark] Saving benchmark results to '" + json_file_name + "'")
+ with open(json_file_name, "wb") as f:
+ json.dump(results, f, sort_keys=True, indent=4)
+
+ # Retrieves the data from the benchmark settings
+ pdf_file_name = benchmark_name.lower() + "_plot.pdf"
+ titles = [utils.precision_to_letter(cl_args.precision) + b["name"].upper() + " " + b["title"] for b in benchmarks]
+ x_keys = [b["x_keys"] for b in benchmarks]
+ y_keys = [b["y_keys"] for b in benchmarks]
+ x_labels = [b["x_label"] for b in benchmarks]
+ y_labels = [b["y_label"] for b in benchmarks]
+ label_names = results["label_names"]
+
+ # For half-precision: also adds single-precision results for comparison
+ if cl_args.precision == 16:
+ label_names = ["CLBlast FP16", "clBLAS FP32", "CLBlast FP32"]
+ y_keys = [y_key + [y_key[0] + "_FP32"] for y_key in y_keys]
+
+ # Plots the graphs
+ plot.plot_graphs(results["benchmarks"], pdf_file_name, results["num_rows"], results["num_cols"],
+ x_keys, y_keys, titles, x_labels, y_labels,
+ label_names, cl_args.plot_title, cl_args.verbose)
+
+ print("[benchmark] All done")
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff --git a/scripts/benchmark/plot.py b/scripts/benchmark/plot.py
new file mode 100644
index 00000000..dc4800fe
--- /dev/null
+++ b/scripts/benchmark/plot.py
@@ -0,0 +1,76 @@
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This file follows the
+# PEP8 Python style guide and uses a max-width of 120 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+
+import utils
+
+import matplotlib.pyplot as plt
+
+
+BLUEISH = [c / 255.0 for c in [71, 101, 177]] # #4765b1
+REDISH = [c / 255.0 for c in [214, 117, 104]] # #d67568
+PURPLISH = [c / 255.0 for c in [85, 0, 119]] # #550077
+COLORS = [BLUEISH, REDISH, PURPLISH]
+MARKERS = ["o-", "x-", ".-"]
+
+
+def plot_graphs(results, file_name, num_rows, num_cols,
+ x_keys, y_keys, titles, x_labels, y_labels,
+ label_names, title, verbose):
+ assert len(results) == num_rows * num_cols
+ assert len(results) != 1
+ assert len(x_keys) == len(results)
+ assert len(y_keys) == len(results)
+ assert len(titles) == len(results)
+ assert len(x_labels) == len(results)
+ assert len(y_labels) == len(results)
+
+ # Initializes the plot
+ size_x = 6 * num_cols
+ size_y = 6 * num_rows
+ fig, axes = plt.subplots(nrows=num_rows, ncols=num_cols, figsize=(size_x, size_y), facecolor='w', edgecolor='k')
+ fig.text(.5, .93, title, horizontalalignment="center", fontsize=18)
+
+ # Loops over each subplot
+ for index, result in enumerate(results):
+ ax = axes.flat[index]
+ plt.sca(ax)
+ print("[plot] Plotting subplot %d" % index)
+
+ # Sets the x-axis labels
+ x_list = [[r[x_key] for r in result] for x_key in x_keys[index]]
+ x_ticks = [",".join([utils.float_to_kilo_mega(v) for v in values]) for values in zip(*x_list)]
+ x_location = range(len(x_ticks))
+
+ # Sets the y-data
+ y_list = [[r[y_key] for r in result] for y_key in y_keys[index]]
+ y_max = max([max(y) for y in y_list])
+
+ # Sets the axes
+ y_rounding = 10 if y_max < 80 else 50 if y_max < 400 else 200
+ y_axis_limit = (y_max * 1.2) - ((y_max * 1.2) % y_rounding) + y_rounding
+ plt.ylim(ymin=0, ymax=y_axis_limit)
+ plt.xticks(x_location, x_ticks, rotation='vertical')
+
+ # Sets the labels
+ ax.set_title(titles[index], fontsize=14, y=0.93)
+ ax.set_ylabel(y_labels[index], fontsize=14)
+ ax.set_xlabel(x_labels[index], fontsize=14)
+ ax.xaxis.set_label_coords(0.5, 0.06)
+
+ # Plots the graph
+ assert len(COLORS) >= len(y_keys[index])
+ assert len(MARKERS) >= len(y_keys[index])
+ assert len(label_names) == len(y_keys[index])
+ for i in range(len(y_keys[index])):
+ ax.plot(x_location, y_list[i], MARKERS[i], label=label_names[i], color=COLORS[i])
+
+ # Sets the legend
+ leg = ax.legend(loc=(0.02, 0.88 - 0.05 * len(y_keys[index])))
+ leg.draw_frame(False)
+
+ # Saves the plot to disk
+ fig.savefig(file_name, bbox_inches='tight')
+ plt.show()
diff --git a/scripts/benchmark/settings.py b/scripts/benchmark/settings.py
new file mode 100644
index 00000000..0243832f
--- /dev/null
+++ b/scripts/benchmark/settings.py
@@ -0,0 +1,321 @@
+#!/usr/bin/env python
+
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This file follows the
+# PEP8 Python style guide and uses a max-width of 120 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+
+import utils
+
+
+AXPY = {
+ "label_names": ["CLBlast", "clBLAS"],
+ "num_rows": 2, "num_cols": 3,
+ "benchmarks": [
+ {
+ "name": "axpy",
+ "title": "multiples of 256K",
+ "x_label": "vector sizes (n)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": utils.k(256), "incx": 1, "incy": 1, "step": utils.k(256), "num_steps": 16}],
+ },
+ {
+ "name": "axpy",
+ "title": "multiples of 256K+1",
+ "x_label": "vector sizes (n)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": utils.k(256) + 1, "incx": 1, "incy": 1, "step": utils.k(256) + 1, "num_steps": 16}],
+ },
+ {
+ "name": "axpy",
+ "title": "around n=1M",
+ "x_label": "vector sizes (n)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": utils.m(1), "incx": 1, "incy": 1, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "axpy",
+ "title": "around n=16M",
+ "x_label": "vector sizes (n)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": utils.m(16), "incx": 1, "incy": 1, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "axpy",
+ "title": "strides (n=8M)",
+ "x_label": "increments/strides for x,y", "x_keys": ["incx", "incy"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": utils.m(8), "incx": inc_x, "incy": inc_y, "step": 0, "num_steps": 1}
+ for inc_x in [1, 2, 4] for inc_y in [1, 2, 4]],
+ },
+ {
+ "name": "axpy",
+ "title": "powers of 2",
+ "x_label": "vector sizes (n)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": n, "incx": 1, "incy": 1, "step": 0, "num_steps": 1}
+ for n in utils.powers_of_2(utils.k(32), utils.m(64))],
+ }
+ ]
+}
+
+GEMV = {
+ "label_names": ["CLBlast", "clBLAS"],
+ "num_rows": 2, "num_cols": 3,
+ "benchmarks": [
+ {
+ "name": "gemv",
+ "title": "multiples of 256",
+ "x_label": "matrix/vector sizes (n=m)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": 256, "m": 256, "incx": 1, "incy": 1, "layout": 102, "step": 256, "num_steps": 20}],
+ },
+ {
+ "name": "gemv",
+ "title": "multiples of 257",
+ "x_label": "matrix/vector sizes (n=m)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": 257, "m": 257, "incx": 1, "incy": 1, "layout": 102, "step": 257, "num_steps": 20}],
+ },
+ {
+ "name": "gemv",
+ "title": "around n=m=4K",
+ "x_label": "matrix/vector sizes (n=m)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": 4096, "m": 4096, "incx": 1, "incy": 1, "layout": 102, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "gemv",
+ "title": "multiples of 256 rotated",
+ "x_label": "matrix/vector sizes (n=m)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": 256, "m": 256, "incx": 1, "incy": 1, "layout": 101, "step": 256, "num_steps": 20}],
+ },
+ {
+ "name": "gemv",
+ "title": "multiples of 257 rotated",
+ "x_label": "matrix/vector sizes (n=m)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": 257, "m": 257, "incx": 1, "incy": 1, "layout": 101, "step": 257, "num_steps": 20}],
+ },
+ {
+ "name": "gemv",
+ "title": "strides (n=m=4K)",
+ "x_label": "increments/strides for x,y", "x_keys": ["incx", "incy"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": 4096, "m": 4096, "incx": inc_x, "incy": inc_y, "layout": 102, "step": 0, "num_steps": 1}
+ for inc_x in [1, 2, 4] for inc_y in [1, 2, 4]],
+ }
+ ]
+}
+
+GEMM = {
+ "label_names": ["CLBlast", "clBLAS"],
+ "num_rows": 2, "num_cols": 3,
+ "benchmarks": [
+ {
+ "name": "gemm",
+ "title": "multiples of 128",
+ "x_label": "matrix sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 128, "n": 128, "k": 128, "layout": 102,
+ "transA": 111, "transB": 111, "step": 128, "num_steps": 20}],
+ },
+ {
+ "name": "gemm",
+ "title": "multiples of 129",
+ "x_label": "matrix sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 129, "n": 129, "k": 129, "layout": 102,
+ "transA": 111, "transB": 111, "step": 129, "num_steps": 20}],
+ },
+ {
+ "name": "gemm",
+ "title": "around m=n=k=512",
+ "x_label": "matrix sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 512, "n": 512, "k": 512, "layout": 102,
+ "transA": 111, "transB": 111, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "gemm",
+ "title": "around m=n=k=2048",
+ "x_label": "matrix sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 2048, "n": 2048, "k": 2048, "layout": 102,
+ "transA": 111, "transB": 111, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "gemm",
+ "title": "layouts/transposing (m=n=k=1K)",
+ "x_label": "layout, transA, transB", "x_keys": ["layout", "transA", "transB"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 1024, "n": 1024, "k": 1024, "layout": layout,
+ "transA": transA, "transB": transB, "step": 0, "num_steps": 1}
+ for layout in [101, 102] for transA in [111, 112] for transB in [111, 112]],
+ },
+ {
+ "name": "gemm",
+ "title": "powers of 2",
+ "x_label": "matrix sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": n, "n": n, "k": n, "layout": 102,
+ "transA": 111, "transB": 111, "step": 0, "num_steps": 1}
+ for n in utils.powers_of_2(8, utils.k(4))],
+ }
+ ]
+}
+
+GEMM_SMALL = {
+ "label_names": ["CLBlast", "clBLAS"],
+ "num_rows": 2, "num_cols": 1,
+ "benchmarks": [
+ {
+ "name": "gemm",
+ "title": "small matrices in steps of 16",
+ "x_label": "matrix sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 128, "n": 128, "k": 128, "layout": 102,
+ "transA": 111, "transB": 111, "step": 16, "num_steps": 57}],
+ },
+ {
+ "name": "gemm",
+ "title": "small matrices in steps of 1",
+ "x_label": "matrix sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 128, "n": 128, "k": 128, "layout": 102,
+ "transA": 111, "transB": 111, "step": 1, "num_steps": 385}],
+ },
+
+ ]
+}
+
+SYMM = {
+ "label_names": ["CLBlast", "clBLAS"],
+ "num_rows": 2, "num_cols": 3,
+ "benchmarks": [
+ {
+ "name": "symm",
+ "title": "multiples of 128",
+ "x_label": "matrix sizes (m=n)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 128, "n": 128, "layout": 102,
+ "side": 141, "triangle": 121, "step": 128, "num_steps": 20}],
+ },
+ {
+ "name": "symm",
+ "title": "multiples of 129",
+ "x_label": "matrix sizes (m=n)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 129, "n": 129, "layout": 102,
+ "side": 141, "triangle": 121, "step": 129, "num_steps": 20}],
+ },
+ {
+ "name": "symm",
+ "title": "around m=n=512",
+ "x_label": "matrix sizes (m=n)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 512, "n": 512, "layout": 102,
+ "side": 141, "triangle": 121, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "symm",
+ "title": "around m=n=2048",
+ "x_label": "matrix sizes (m=n)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 2048, "n": 2048, "layout": 102,
+ "side": 141, "triangle": 121, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "symm",
+ "title": "layouts/sides/triangles (m=n=1K)",
+ "x_label": "layout, side, triangle", "x_keys": ["layout", "side", "triangle"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 1024, "n": 1024, "layout": layout,
+ "side": side, "triangle": triangle, "step": 0, "num_steps": 1}
+ for layout in [101, 102] for side in [141, 142] for triangle in [121, 122]],
+ },
+ {
+ "name": "symm",
+ "title": "powers of 2",
+ "x_label": "matrix sizes (m=n)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": n, "n": n, "layout": 102,
+ "side": 141, "triangle": 121, "step": 0, "num_steps": 1}
+ for n in utils.powers_of_2(8, utils.k(4))],
+ }
+ ]
+}
+
+SYRK = {
+ "label_names": ["CLBlast", "clBLAS"],
+ "num_rows": 2, "num_cols": 3,
+ "benchmarks": [
+ {
+ "name": "syrk",
+ "title": "multiples of 128",
+ "x_label": "matrix sizes (n=k)", "x_keys": ["n"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"n": 128, "k": 128, "layout": 102,
+ "side": 141, "triangle": 121, "step": 128, "num_steps": 20}],
+ },
+ {
+ "name": "syrk",
+ "title": "multiples of 129",
+ "x_label": "matrix sizes (n=k)", "x_keys": ["n"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"n": 129, "k": 129, "layout": 102,
+ "side": 141, "triangle": 121, "step": 129, "num_steps": 20}],
+ },
+ {
+ "name": "syrk",
+ "title": "around n=k=512",
+ "x_label": "matrix sizes (n=k)", "x_keys": ["n"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"n": 512, "k": 512, "layout": 102,
+ "side": 141, "triangle": 121, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "syrk",
+ "title": "around n=k=2048",
+ "x_label": "matrix sizes (n=k)", "x_keys": ["n"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"n": 2048, "k": 2048, "layout": 102,
+ "side": 141, "triangle": 121, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "syrk",
+ "title": "layouts/sides/triangles (n=k=1K)",
+ "x_label": "layout, triangle, transA", "x_keys": ["layout", "triangle", "transA"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"n": 1024, "k": 1024, "layout": layout,
+ "triangle": triangle, "transA": transA, "step": 0, "num_steps": 1}
+ for layout in [101, 102] for triangle in [121, 122] for transA in [111, 112]],
+ },
+ {
+ "name": "syrk",
+ "title": "powers of 2",
+ "x_label": "matrix sizes (n=k)", "x_keys": ["n"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"n": n, "k": n, "layout": 102,
+ "side": 141, "triangle": 121, "step": 0, "num_steps": 1}
+ for n in utils.powers_of_2(8, utils.k(4))],
+ }
+ ]
+}
+
+SUMMARY = {
+ "label_names": ["CLBlast", "clBLAS"],
+ "num_rows": 4, "num_cols": 2,
+ "benchmarks": [
+ AXPY["benchmarks"][0],
+ AXPY["benchmarks"][1],
+ GEMV["benchmarks"][0],
+ GEMV["benchmarks"][1],
+ GEMM["benchmarks"][0],
+ GEMM["benchmarks"][1],
+ SYMM["benchmarks"][0],
+ SYMM["benchmarks"][1],
+ ]
+}
diff --git a/scripts/benchmark/utils.py b/scripts/benchmark/utils.py
new file mode 100644
index 00000000..62e18de2
--- /dev/null
+++ b/scripts/benchmark/utils.py
@@ -0,0 +1,66 @@
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This file follows the
+# PEP8 Python style guide and uses a max-width of 120 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+
+import csv
+import subprocess
+
+
+def k(value):
+ return value * 1024
+
+
+def m(value):
+ return value * 1024 * 1024
+
+
+def float_to_kilo_mega(value):
+ if value % 1024 or value <= 1024:
+ return "%.0f" % value
+ elif value % (1024 * 1024) or value <= (1024 * 1024):
+ return "%.0fK" % (value / 1024.0)
+ else:
+ return "%.0fM" % (value / (1024.0 * 1024.0))
+
+
+def powers_of_2(start, stop):
+ while start <= stop:
+ yield start
+ start *= 2
+
+
+def precision_to_letter(precision):
+ if precision == 16:
+ return "H"
+ elif precision == 32:
+ return "S"
+ elif precision == 64:
+ return "D"
+ elif precision == 3232:
+ return "C"
+ elif precision == 6464:
+ return "Z"
+ else:
+ return "X"
+
+
+def run_binary(command, arguments):
+ full_command = command + " " + " ".join(arguments)
+ print("[benchmark] Calling binary: %s" % str(full_command))
+ try:
+ return subprocess.Popen(full_command, shell=True, stdout=subprocess.PIPE).stdout.read()
+ except OSError as e:
+ print("[benchmark] Error while running the binary, got exception: %s" + str(e))
+ return False
+
+
+def parse_results(csv_data):
+ csv_data = csv_data.split("\n")
+ results = csv.DictReader(csv_data, delimiter=";", skipinitialspace=True)
+ results = [r for r in results]
+ for result in results:
+ for key in result:
+ result[key] = float(result[key]) if "." in result[key] else int(result[key])
+ return results