summaryrefslogtreecommitdiff
path: root/scripts
diff options
context:
space:
mode:
Diffstat (limited to 'scripts')
-rw-r--r--scripts/benchmark/benchmark.py71
-rw-r--r--scripts/benchmark/benchmark_all.py44
-rw-r--r--scripts/benchmark/plot.py4
-rw-r--r--scripts/benchmark/settings.py126
-rwxr-xr-xscripts/generator/generator.py2
-rw-r--r--scripts/generator/generator/datatype.py4
6 files changed, 186 insertions, 65 deletions
diff --git a/scripts/benchmark/benchmark.py b/scripts/benchmark/benchmark.py
index 31aa8c4f..a5f216c6 100644
--- a/scripts/benchmark/benchmark.py
+++ b/scripts/benchmark/benchmark.py
@@ -17,9 +17,11 @@ import utils
EXPERIMENTS = {
"axpy": settings.AXPY,
+ "axpybatched": settings.AXPYBATCHED,
"gemv": settings.GEMV,
"gemm": settings.GEMM,
"gemm_small": settings.GEMM_SMALL,
+ "gemmbatched": settings.GEMMBATCHED,
"symm": settings.SYMM,
"syrk": settings.SYRK,
"summary": settings.SUMMARY,
@@ -34,7 +36,7 @@ def run_benchmark(name, arguments_list, precision, num_runs, platform, device):
for arguments in arguments_list:
# Sets the arguments
- constant_arguments = ["-warm_up", "-q", "-no_abbrv", "-cblas 0"]
+ constant_arguments = ["-warm_up", "-q", "-no_abbrv", "-cblas 0", "-cublas 0"]
common_arguments = ["-precision %d" % precision, "-runs %d" % num_runs]
opencl_arguments = ["-platform %d" % platform, "-device %d" % device]
all_arguments = opencl_arguments + common_arguments + constant_arguments
@@ -60,51 +62,60 @@ def run_benchmark(name, arguments_list, precision, num_runs, platform, device):
return results
-def main(argv):
-
- # Parses the command-line arguments
- parser = argparse.ArgumentParser()
- parser.add_argument("-b", "--benchmark", required=True, help="The benchmark to perform (choose from %s)" % EXPERIMENTS.keys())
+def parse_arguments(argv):
+ parser = argparse.ArgumentParser(description="Runs a full benchmark for a specific routine on a specific device")
+ parser.add_argument("-b", "--benchmark", required=True, help="The benchmark to perform (choose from %s)" % sorted(EXPERIMENTS.keys()))
parser.add_argument("-p", "--platform", required=True, type=int, help="The ID of the OpenCL platform to test on")
parser.add_argument("-d", "--device", required=True, type=int, help="The ID of the OpenCL device to test on")
- parser.add_argument("-n", "--num_runs", type=int, default=10, help="The number of benchmark repeats for averaging")
- parser.add_argument("-x", "--precision", type=int, default=32,
- help="The precision to test for (choose from 16, 32, 64, 3232, 6464")
+ parser.add_argument("-n", "--num_runs", type=int, default=None, help="Overrides the default number of benchmark repeats for averaging")
+ parser.add_argument("-x", "--precision", type=int, default=32, help="The precision to test for (choose from 16, 32, 64, 3232, 6464")
parser.add_argument("-l", "--load_from_disk", action="store_true", help="Increase verbosity of the script")
- parser.add_argument("-t", "--plot_title", default=None, help="The title for the plots, defaults to benchmark name")
+ parser.add_argument("-t", "--plot_title", default="", help="The title for the plots, defaults to benchmark name")
parser.add_argument("-z", "--tight_plot", action="store_true", help="Enables tight plot layout for in paper or presentation")
+ parser.add_argument("-o", "--output_folder", default=os.getcwd(), help="Sets the folder for output plots (defaults to current folder)")
parser.add_argument("-v", "--verbose", action="store_true", help="Increase verbosity of the script")
cl_args = parser.parse_args(argv)
+ return vars(cl_args)
+
+
+def benchmark_single(benchmark, platform, device, num_runs, precision, load_from_disk,
+ plot_title, tight_plot, output_folder, verbose):
+
+ # Sanity check
+ if not os.path.isdir(output_folder):
+ print("[benchmark] Error: folder '%s' doesn't exist" % output_folder)
+ return
# The benchmark name and plot title
- benchmark_name = utils.precision_to_letter(cl_args.precision) + cl_args.benchmark.upper()
- if cl_args.plot_title is None:
- cl_args.plot_title = benchmark_name
+ benchmark_name = utils.precision_to_letter(precision) + benchmark.upper()
+ if benchmark.upper() != "SUMMARY":
+ plot_title = benchmark_name if plot_title is "" else benchmark_name + ": " + plot_title
# Retrieves the benchmark settings
- if cl_args.benchmark not in EXPERIMENTS.keys():
- print("[benchmark] Invalid benchmark '%s', choose from %s" % (cl_args.benchmark, EXPERIMENTS.keys()))
+ if benchmark not in EXPERIMENTS.keys():
+ print("[benchmark] Invalid benchmark '%s', choose from %s" % (benchmark, EXPERIMENTS.keys()))
return
- experiment = EXPERIMENTS[cl_args.benchmark]
+ experiment = EXPERIMENTS[benchmark]
benchmarks = experiment["benchmarks"]
# Either run the benchmarks for this experiment or load old results from disk
- json_file_name = benchmark_name.lower() + "_benchmarks.json"
- if cl_args.load_from_disk and os.path.isfile(json_file_name):
+ json_file_name = os.path.join(output_folder, benchmark_name.lower() + "_benchmarks.json")
+ if load_from_disk and os.path.isfile(json_file_name):
print("[benchmark] Loading previous benchmark results from '" + json_file_name + "'")
with open(json_file_name) as f:
results = json.load(f)
else:
# Runs all the individual benchmarks
- print("[benchmark] Running on platform %d, device %d" % (cl_args.platform, cl_args.device))
- print("[benchmark] Running %d benchmarks for settings '%s'" % (len(benchmarks), cl_args.benchmark))
+ print("[benchmark] Running on platform %d, device %d" % (platform, device))
+ print("[benchmark] Running %d benchmarks for settings '%s'" % (len(benchmarks), benchmark))
results = {"label_names": experiment["label_names"], "num_rows": experiment["num_rows"],
"num_cols": experiment["num_cols"], "benchmarks": []}
- for benchmark in benchmarks:
- print("[benchmark] Running benchmark '%s:%s'" % (benchmark["name"], benchmark["title"]))
- result = run_benchmark(benchmark["name"], benchmark["arguments"], cl_args.precision, cl_args.num_runs,
- cl_args.platform, cl_args.device)
+ for bench in benchmarks:
+ num_runs_benchmark = bench["num_runs"] if num_runs is None else num_runs
+ print("[benchmark] Running benchmark '%s:%s'" % (bench["name"], bench["title"]))
+ result = run_benchmark(bench["name"], bench["arguments"], precision, num_runs_benchmark,
+ platform, device)
results["benchmarks"].append(result)
# Stores the results to disk
@@ -113,8 +124,9 @@ def main(argv):
json.dump(results, f, sort_keys=True, indent=4)
# Retrieves the data from the benchmark settings
- pdf_file_name = benchmark_name.lower() + "_plot.pdf"
- titles = [utils.precision_to_letter(cl_args.precision) + b["name"].upper() + " " + b["title"] for b in benchmarks]
+ file_name_suffix = "_tight" if tight_plot else ""
+ pdf_file_name = os.path.join(output_folder, benchmark_name.lower() + "_plot" + file_name_suffix + ".pdf")
+ titles = [utils.precision_to_letter(precision) + b["name"].upper() + " " + b["title"] for b in benchmarks]
x_keys = [b["x_keys"] for b in benchmarks]
y_keys = [b["y_keys"] for b in benchmarks]
x_labels = [b["x_label"] for b in benchmarks]
@@ -122,17 +134,18 @@ def main(argv):
label_names = results["label_names"]
# For half-precision: also adds single-precision results for comparison
- if cl_args.precision == 16:
+ if precision == 16:
label_names = ["CLBlast FP16", "clBLAS FP32", "CLBlast FP32"]
y_keys = [y_key + [y_key[0] + "_FP32"] for y_key in y_keys]
# Plots the graphs
plot.plot_graphs(results["benchmarks"], pdf_file_name, results["num_rows"], results["num_cols"],
x_keys, y_keys, titles, x_labels, y_labels,
- label_names, cl_args.plot_title, cl_args.tight_plot, cl_args.verbose)
+ label_names, plot_title, tight_plot, verbose)
print("[benchmark] All done")
if __name__ == '__main__':
- main(sys.argv[1:])
+ parsed_arguments = parse_arguments(sys.argv[1:])
+ benchmark_single(**parsed_arguments)
diff --git a/scripts/benchmark/benchmark_all.py b/scripts/benchmark/benchmark_all.py
new file mode 100644
index 00000000..9bf09190
--- /dev/null
+++ b/scripts/benchmark/benchmark_all.py
@@ -0,0 +1,44 @@
+#!/usr/bin/env python
+
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This file follows the
+# PEP8 Python style guide and uses a max-width of 120 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+
+import argparse
+import os
+import sys
+
+from benchmark import benchmark_single
+
+
+BENCHMARKS = ["axpy", "gemv", "gemm", "summary", "axpybatched", "gemmbatched"]
+
+
+def parse_arguments(argv):
+ parser = argparse.ArgumentParser(description="Runs all (main) benchmarks in one go for a given device")
+ parser.add_argument("-p", "--platform", required=True, type=int, help="The ID of the OpenCL platform to test on")
+ parser.add_argument("-d", "--device", required=True, type=int, help="The ID of the OpenCL device to test on")
+ parser.add_argument("-x", "--precision", type=int, default=32, help="The precision to test for (choose from 16, 32, 64, 3232, 6464")
+ parser.add_argument("-l", "--load_from_disk", action="store_true", help="Increase verbosity of the script")
+ parser.add_argument("-t", "--plot_title", default=None, help="The title for the plots, defaults to benchmark name")
+ parser.add_argument("-o", "--output_folder", default=os.getcwd(), help="Sets the folder for output plots (defaults to current folder)")
+ parser.add_argument("-v", "--verbose", action="store_true", help="Increase verbosity of the script")
+ cl_args = parser.parse_args(argv)
+ return vars(cl_args)
+
+
+def benchmark_all(platform, device, precision, load_from_disk,
+ plot_title, output_folder, verbose):
+ for bench in BENCHMARKS:
+ from_disk = load_from_disk
+ for tight_plot in [True, False]: # two plots for a single benchmark
+ benchmark_single(bench, platform, device, None, precision, from_disk,
+ plot_title, tight_plot, output_folder, verbose)
+ from_disk = True # for the next plot of the same data
+
+
+if __name__ == '__main__':
+ parsed_arguments = parse_arguments(sys.argv[1:])
+ benchmark_all(**parsed_arguments)
diff --git a/scripts/benchmark/plot.py b/scripts/benchmark/plot.py
index 275a3ba8..0cb6d8c5 100644
--- a/scripts/benchmark/plot.py
+++ b/scripts/benchmark/plot.py
@@ -6,6 +6,8 @@
import utils
+import matplotlib
+matplotlib.use('Agg')
from matplotlib import rcParams
import matplotlib.pyplot as plt
@@ -112,5 +114,5 @@ def plot_graphs(results, file_name, num_rows, num_cols,
leg.draw_frame(False)
# Saves the plot to disk
+ print("[benchmark] Saving plot to '" + file_name + "'")
fig.savefig(file_name, bbox_inches=bounding_box)
- # plt.show()
diff --git a/scripts/benchmark/settings.py b/scripts/benchmark/settings.py
index cc7220e1..38db9ef5 100644
--- a/scripts/benchmark/settings.py
+++ b/scripts/benchmark/settings.py
@@ -14,35 +14,35 @@ AXPY = {
"num_rows": 2, "num_cols": 3,
"benchmarks": [
{
- "name": "axpy",
+ "name": "axpy", "num_runs": 40,
"title": "multiples of 256K",
"x_label": "sizes (n)", "x_keys": ["n"],
"y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
"arguments": [{"n": utils.k(256), "incx": 1, "incy": 1, "step": utils.k(256), "num_steps": 16}],
},
{
- "name": "axpy",
+ "name": "axpy", "num_runs": 40,
"title": "multiples of 256K+1",
"x_label": "sizes (n)", "x_keys": ["n"],
"y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
"arguments": [{"n": utils.k(256) + 1, "incx": 1, "incy": 1, "step": utils.k(256) + 1, "num_steps": 16}],
},
{
- "name": "axpy",
+ "name": "axpy", "num_runs": 40,
"title": "around 1M",
"x_label": "sizes (n)", "x_keys": ["n"],
"y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
"arguments": [{"n": utils.m(1), "incx": 1, "incy": 1, "step": 1, "num_steps": 16}],
},
{
- "name": "axpy",
+ "name": "axpy", "num_runs": 20,
"title": "around 16M",
"x_label": "sizes (n)", "x_keys": ["n"],
"y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
"arguments": [{"n": utils.m(16), "incx": 1, "incy": 1, "step": 1, "num_steps": 16}],
},
{
- "name": "axpy",
+ "name": "axpy", "num_runs": 20,
"title": "strides n=8M",
"x_label": "increments for x,y", "x_keys": ["incx", "incy"],
"y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
@@ -50,7 +50,7 @@ AXPY = {
for inc_x in [1, 2, 4] for inc_y in [1, 2, 4]],
},
{
- "name": "axpy",
+ "name": "axpy", "num_runs": 40,
"title": "powers of 2",
"x_label": "sizes (n)", "x_keys": ["n"],
"y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
@@ -60,47 +60,78 @@ AXPY = {
]
}
+AXPYBATCHED = {
+ "label_names": ["CLBlast", "clBLAS (non batched)"],
+ "num_rows": 1, "num_cols": 3,
+ "benchmarks": [
+ {
+ "name": "axpybatched", "num_runs": 30,
+ "title": "8 batches",
+ "x_label": "sizes (n)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"batch_num": 8, "n": n, "incx": 1, "incy": 1, "step": 0, "num_steps": 1}
+ for n in utils.powers_of_2(utils.k(8), utils.m(4))],
+ },
+ {
+ "name": "axpybatched", "num_runs": 20,
+ "title": "64 batches",
+ "x_label": "sizes (n)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"batch_num": 64, "n": n, "incx": 1, "incy": 1, "step": 0, "num_steps": 1}
+ for n in utils.powers_of_2(utils.k(8), utils.m(4))],
+ },
+ {
+ "name": "axpybatched", "num_runs": 40,
+ "title": "n=512K",
+ "x_label": "number of batches", "x_keys": ["batch_num"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"batch_num": b, "n": utils.k(512), "incx": 1, "incy": 1, "step": 1, "num_steps": 1}
+ for b in utils.powers_of_2(1, 512)],
+ }
+ ]
+}
+
GEMV = {
"label_names": ["CLBlast", "clBLAS"],
"num_rows": 2, "num_cols": 3,
"benchmarks": [
{
- "name": "gemv",
+ "name": "gemv", "num_runs": 40,
"title": "multiples of 256",
"x_label": "sizes (n=m)", "x_keys": ["n"],
"y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
"arguments": [{"n": 256, "m": 256, "incx": 1, "incy": 1, "layout": 102, "step": 256, "num_steps": 20}],
},
{
- "name": "gemv",
+ "name": "gemv", "num_runs": 40,
"title": "multiples of 257",
"x_label": "sizes (n=m)", "x_keys": ["n"],
"y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
"arguments": [{"n": 257, "m": 257, "incx": 1, "incy": 1, "layout": 102, "step": 257, "num_steps": 20}],
},
{
- "name": "gemv",
+ "name": "gemv", "num_runs": 20,
"title": "around 4K",
"x_label": "sizes (n=m)", "x_keys": ["n"],
"y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
"arguments": [{"n": 4096, "m": 4096, "incx": 1, "incy": 1, "layout": 102, "step": 1, "num_steps": 16}],
},
{
- "name": "gemv",
+ "name": "gemv", "num_runs": 40,
"title": "multiples of 256 rotated",
"x_label": "sizes (n=m)", "x_keys": ["n"],
"y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
"arguments": [{"n": 256, "m": 256, "incx": 1, "incy": 1, "layout": 101, "step": 256, "num_steps": 20}],
},
{
- "name": "gemv",
+ "name": "gemv", "num_runs": 40,
"title": "multiples of 257 rotated",
"x_label": "sizes (n=m)", "x_keys": ["n"],
"y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
"arguments": [{"n": 257, "m": 257, "incx": 1, "incy": 1, "layout": 101, "step": 257, "num_steps": 20}],
},
{
- "name": "gemv",
+ "name": "gemv", "num_runs": 20,
"title": "strides n=m=4K",
"x_label": "increments/strides for x,y", "x_keys": ["incx", "incy"],
"y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
@@ -115,7 +146,7 @@ GEMM = {
"num_rows": 2, "num_cols": 3,
"benchmarks": [
{
- "name": "gemm",
+ "name": "gemm", "num_runs": 20,
"title": "multiples of 128",
"x_label": "sizes (m=n=k)", "x_keys": ["m"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -123,7 +154,7 @@ GEMM = {
"transA": 111, "transB": 111, "step": 128, "num_steps": 20}],
},
{
- "name": "gemm",
+ "name": "gemm", "num_runs": 20,
"title": "multiples of 129",
"x_label": "sizes (m=n=k)", "x_keys": ["m"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -131,7 +162,7 @@ GEMM = {
"transA": 111, "transB": 111, "step": 129, "num_steps": 20}],
},
{
- "name": "gemm",
+ "name": "gemm", "num_runs": 20,
"title": "around 512",
"x_label": "sizes (m=n=k)", "x_keys": ["m"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -139,7 +170,7 @@ GEMM = {
"transA": 111, "transB": 111, "step": 1, "num_steps": 16}],
},
{
- "name": "gemm",
+ "name": "gemm", "num_runs": 10,
"title": "around 2048",
"x_label": "sizes (m=n=k)", "x_keys": ["m"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -147,7 +178,7 @@ GEMM = {
"transA": 111, "transB": 111, "step": 1, "num_steps": 16}],
},
{
- "name": "gemm",
+ "name": "gemm", "num_runs": 10,
"title": "layouts/transpose",
"x_label": "layout, transA, transB", "x_keys": ["layout", "transA", "transB"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -156,7 +187,7 @@ GEMM = {
for layout in [101, 102] for transA in [111, 112] for transB in [111, 112]],
},
{
- "name": "gemm",
+ "name": "gemm", "num_runs": 10,
"title": "powers of 2",
"x_label": "sizes (m=n=k)", "x_keys": ["m"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -172,7 +203,7 @@ GEMM_SMALL = {
"num_rows": 2, "num_cols": 1,
"benchmarks": [
{
- "name": "gemm",
+ "name": "gemm", "num_runs": 10,
"title": "small matrices in steps of 16",
"x_label": "sizes (m=n=k)", "x_keys": ["m"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -180,7 +211,7 @@ GEMM_SMALL = {
"transA": 111, "transB": 111, "step": 16, "num_steps": 57}],
},
{
- "name": "gemm",
+ "name": "gemm", "num_runs": 10,
"title": "small matrices in steps of 1",
"x_label": "sizes (m=n=k)", "x_keys": ["m"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -191,12 +222,43 @@ GEMM_SMALL = {
]
}
+GEMMBATCHED = {
+ "label_names": ["CLBlast", "clBLAS (non batched)"],
+ "num_rows": 1, "num_cols": 3,
+ "benchmarks": [
+ {
+ "name": "gemmbatched", "num_runs": 40,
+ "title": "8 batches",
+ "x_label": "sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"batch_num": 8, "m": 32, "n": 32, "k": 32, "layout": 102,
+ "transA": 111, "transB": 111, "step": 32, "num_steps": 20}],
+ },
+ {
+ "name": "gemmbatched", "num_runs": 20,
+ "title": "64 batches",
+ "x_label": "sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"batch_num": 64, "m": 32, "n": 32, "k": 32, "layout": 102,
+ "transA": 111, "transB": 111, "step": 32, "num_steps": 20}],
+ },
+ {
+ "name": "gemmbatched", "num_runs": 30,
+ "title": "m=n=k=128",
+ "x_label": "number of batches", "x_keys": ["batch_num"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"batch_num": b, "m": 128, "n": 128, "k": 128, "layout": 102,
+ "transA": 111, "transB": 111} for b in utils.powers_of_2(1, utils.k(16))],
+ }
+ ]
+}
+
SYMM = {
"label_names": ["CLBlast", "clBLAS"],
"num_rows": 2, "num_cols": 3,
"benchmarks": [
{
- "name": "symm",
+ "name": "symm", "num_runs": 10,
"title": "multiples of 128",
"x_label": "sizes (m=n)", "x_keys": ["m"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -204,7 +266,7 @@ SYMM = {
"side": 141, "triangle": 121, "step": 128, "num_steps": 20}],
},
{
- "name": "symm",
+ "name": "symm", "num_runs": 10,
"title": "multiples of 129",
"x_label": "sizes (m=n)", "x_keys": ["m"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -212,7 +274,7 @@ SYMM = {
"side": 141, "triangle": 121, "step": 129, "num_steps": 20}],
},
{
- "name": "symm",
+ "name": "symm", "num_runs": 10,
"title": "around 512",
"x_label": "sizes (m=n)", "x_keys": ["m"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -220,7 +282,7 @@ SYMM = {
"side": 141, "triangle": 121, "step": 1, "num_steps": 16}],
},
{
- "name": "symm",
+ "name": "symm", "num_runs": 10,
"title": "around 2048",
"x_label": "sizes (m=n)", "x_keys": ["m"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -228,7 +290,7 @@ SYMM = {
"side": 141, "triangle": 121, "step": 1, "num_steps": 16}],
},
{
- "name": "symm",
+ "name": "symm", "num_runs": 10,
"title": "layouts/sides/triangles",
"x_label": "layout, side, triangle", "x_keys": ["layout", "side", "triangle"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -237,7 +299,7 @@ SYMM = {
for layout in [101, 102] for side in [141, 142] for triangle in [121, 122]],
},
{
- "name": "symm",
+ "name": "symm", "num_runs": 10,
"title": "powers of 2",
"x_label": "sizes (m=n)", "x_keys": ["m"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -253,7 +315,7 @@ SYRK = {
"num_rows": 2, "num_cols": 3,
"benchmarks": [
{
- "name": "syrk",
+ "name": "syrk", "num_runs": 10,
"title": "multiples of 128",
"x_label": "sizes (n=k)", "x_keys": ["n"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -261,7 +323,7 @@ SYRK = {
"side": 141, "triangle": 121, "step": 128, "num_steps": 20}],
},
{
- "name": "syrk",
+ "name": "syrk", "num_runs": 10,
"title": "multiples of 129",
"x_label": "sizes (n=k)", "x_keys": ["n"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -269,7 +331,7 @@ SYRK = {
"side": 141, "triangle": 121, "step": 129, "num_steps": 20}],
},
{
- "name": "syrk",
+ "name": "syrk", "num_runs": 10,
"title": "around 512",
"x_label": "sizes (n=k)", "x_keys": ["n"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -277,7 +339,7 @@ SYRK = {
"side": 141, "triangle": 121, "step": 1, "num_steps": 16}],
},
{
- "name": "syrk",
+ "name": "syrk", "num_runs": 10,
"title": "around 2048",
"x_label": "sizes (n=k)", "x_keys": ["n"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -285,7 +347,7 @@ SYRK = {
"side": 141, "triangle": 121, "step": 1, "num_steps": 16}],
},
{
- "name": "syrk",
+ "name": "syrk", "num_runs": 10,
"title": "layouts/sides/triangles",
"x_label": "layout, triangle, transA", "x_keys": ["layout", "triangle", "transA"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
@@ -294,7 +356,7 @@ SYRK = {
for layout in [101, 102] for triangle in [121, 122] for transA in [111, 112]],
},
{
- "name": "syrk",
+ "name": "syrk", "num_runs": 10,
"title": "powers of 2",
"x_label": "sizes (n=k)", "x_keys": ["n"],
"y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index f7ef4528..8c13b2ff 100755
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -42,7 +42,7 @@ FILES = [
"/include/clblast_netlib_c.h",
"/src/clblast_netlib_c.cpp",
]
-HEADER_LINES = [122, 77, 126, 23, 29, 41, 29, 65, 32]
+HEADER_LINES = [122, 77, 126, 24, 29, 41, 29, 65, 32]
FOOTER_LINES = [25, 139, 27, 38, 6, 6, 6, 9, 2]
HEADER_LINES_DOC = 0
FOOTER_LINES_DOC = 63
diff --git a/scripts/generator/generator/datatype.py b/scripts/generator/generator/datatype.py
index 6ac5681a..fdb584bc 100644
--- a/scripts/generator/generator/datatype.py
+++ b/scripts/generator/generator/datatype.py
@@ -72,8 +72,8 @@ class DataType:
def test_template(self):
"""Returns the template as used in the correctness/performance tests"""
- buffer_type = "clblast::" + self.buffer_type if self.buffer_type in [D_FLOAT2, D_DOUBLE2] else self.buffer_type
- beta_cpp = "clblast::" + self.beta_cpp if self.beta_cpp in [D_FLOAT2, D_DOUBLE2] else self.beta_cpp
+ buffer_type = "clblast::" + self.buffer_type if self.is_non_standard() else self.buffer_type
+ beta_cpp = "clblast::" + self.beta_cpp if self.beta_cpp in [D_HALF, D_FLOAT2, D_DOUBLE2] else self.beta_cpp
if self.buffer_type != self.beta_cpp:
return "<" + buffer_type + "," + self.beta_cpp + ">, " + buffer_type + ", " + beta_cpp
return "<" + buffer_type + ">, " + buffer_type + ", " + beta_cpp