summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG3
-rw-r--r--README.md4
-rw-r--r--scripts/benchmark/benchmark.py135
-rw-r--r--scripts/benchmark/plot.py76
-rw-r--r--scripts/benchmark/settings.py321
-rw-r--r--scripts/benchmark/utils.py66
-rw-r--r--scripts/graphs/common.r262
-rw-r--r--scripts/graphs/xaxpy.r96
-rwxr-xr-xscripts/graphs/xgemm.r94
-rw-r--r--scripts/graphs/xgemm_small.r56
-rw-r--r--scripts/graphs/xgemv.r83
-rw-r--r--scripts/graphs/xsymm.r94
-rw-r--r--scripts/graphs/xsyr2k.r94
-rw-r--r--scripts/graphs/xsyrk.r94
-rw-r--r--scripts/graphs/xtrmm.r127
15 files changed, 602 insertions, 1003 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 34b81a81..1455cf19 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -7,13 +7,14 @@ Development version (next release)
- Fixed bugs in the half-precision routines HTBMV/HTPMV/HTRMV/HSYR2K/HTRMM
- Tests now also exit with an error code when OpenCL errors or compilation errors occur
- Tests now also check for the L2 error in case of half-precision
+- Replaced the R graph scripts with Python/Matplotlib scripts
- Various minor fixes and enhancements
- Added tuned parameters for various devices (see README)
- Added the OverrideParameters function to the API to be able to supply custom tuning parmeters
- Added triangular solver (level-2 & level-3) routines:
* STRSV/DTRSV/CTRSV/ZTRSV (experimental, un-optimized)
* STRSM/DTRSM/CTRSM/ZTRSM (experimental, un-optimized)
-- Added batched (non-BLAS) routines:
+- Added batched (not part of the BLAS standard) routines:
* SAXPYBATCHED/DAXPYBATCHED/CAXPYBATCHED/ZAXPYBATCHED/HAXPYBATCHED (batched version of AXPY)
* SGEMMBATCHED/DGEMMBATCHED/CGEMMBATCHED/ZGEMMBATCHED/HGEMMBATCHED (batched version of GEMM)
diff --git a/README.md b/README.md
index d49648d9..3109b4bf 100644
--- a/README.md
+++ b/README.md
@@ -205,9 +205,9 @@ To test the performance of CLBlast and compare optionally against [clBLAS](http:
The performance tests come in the form of client executables named `clblast_client_xxxxx`, in which `xxxxx` is the name of a routine (e.g. `xgemm`). These clients take a bunch of configuration options and directly run CLBlast in a head-to-head performance test against optionally clBLAS and/or a CPU BLAS library. You can use the command-line options `-clblas 1` or `-cblas 1` to select a library to test against.
-The folder `doc/performance` contains some PDF files with performance results on tested devices. Performance is compared in this case against a tuned version of the clBLAS library. These graphs can be generated automatically on your own device. First, compile CLBlast with the clients enabled. Then, make sure your installation of the reference clBLAS is performance-tuned by running the `tune` executable. Finally, run one of the graph-scripts found in `scripts/graphs` using R. For example, to generate the Xgemm PDF on device 1 of platform 0 from the `build` subdirectory:
+The folder `doc/performance` contains some PDF files with performance results on tested devices. Performance is compared in this case against a tuned version of the clBLAS library. These graphs can be generated automatically on your own device. First, compile CLBlast with the clients enabled. Then, make sure your installation of the reference clBLAS is performance-tuned by running the `tune` executable (shipped with clBLAS). Finally, run the Python/Matplotlib graph-script found in `scripts/benchmark/benchmark.py`. For example, to generate the SGEMM PDF on device 1 of platform 0 from the `build` subdirectory:
- Rscript ../scripts/graphs/xgemm.r 0 1
+ python ../scripts/benchmark/benchmark.py --platform 0 --device 1 --benchmark gemm
Note that the CLBlast library provides pre-tuned parameter-values for some devices only: if your device is not among these, then out-of-the-box performance might be poor. See above under `Using the tuners` to find out how to tune for your device.
diff --git a/scripts/benchmark/benchmark.py b/scripts/benchmark/benchmark.py
new file mode 100644
index 00000000..1574fdc4
--- /dev/null
+++ b/scripts/benchmark/benchmark.py
@@ -0,0 +1,135 @@
+#!/usr/bin/env python
+
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This file follows the
+# PEP8 Python style guide and uses a max-width of 120 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+
+import argparse
+import json
+import os
+import sys
+
+import settings
+import plot
+import utils
+
+EXPERIMENTS = {
+ "axpy": settings.AXPY,
+ "gemv": settings.GEMV,
+ "gemm": settings.GEMM,
+ "gemm_small": settings.GEMM_SMALL,
+ "symm": settings.SYMM,
+ "syrk": settings.SYRK,
+ "summary": settings.SUMMARY,
+}
+
+
+def run_benchmark(name, arguments_list, precision, num_runs, platform, device):
+ binary = "./clblast_client_x" + name
+
+ # Loops over sub-benchmarks per benchmark
+ results = []
+ for arguments in arguments_list:
+
+ # Sets the arguments
+ constant_arguments = ["-warm_up", "-q", "-no_abbrv", "-cblas 0"]
+ common_arguments = ["-precision %d" % precision, "-runs %d" % num_runs]
+ opencl_arguments = ["-platform %s" % platform, "-device %s" % device]
+ all_arguments = opencl_arguments + common_arguments + constant_arguments
+ for name, value in arguments.items():
+ all_arguments.append("-" + name + " " + str(value))
+
+ # Calls the binary and parses the results
+ benchmark_output = utils.run_binary(binary, all_arguments)
+ result = utils.parse_results(benchmark_output)
+
+ # For half-precision: also runs single-precision for comparison
+ if precision == 16:
+ all_arguments = [arg if arg != "-precision 16" else "-precision 32" for arg in all_arguments]
+ benchmark_output = utils.run_binary(binary, all_arguments)
+ result_extra = utils.parse_results(benchmark_output)
+ for index in range(len(min(result, result_extra))):
+ result[index]["GBs_1_FP32"] = result_extra[index]["GBs_1"]
+ result[index]["GBs_2"] = result_extra[index]["GBs_2"]
+ result[index]["GFLOPS_1_FP32"] = result_extra[index]["GFLOPS_1"]
+ result[index]["GFLOPS_2"] = result_extra[index]["GFLOPS_2"]
+
+ results.extend(result)
+ return results
+
+
+def main(argv):
+
+ # Parses the command-line arguments
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-b", "--benchmark", help="The benchmark to perform (choose from %s)" % EXPERIMENTS.keys())
+ parser.add_argument("-p", "--platform", help="The ID of the OpenCL platform to test on")
+ parser.add_argument("-d", "--device", help="The ID of the OpenCL device to test on")
+ parser.add_argument("-n", "--num_runs", type=int, default=10, help="The number of benchmark repeats for averaging")
+ parser.add_argument("-x", "--precision", type=int, default=32,
+ help="The precision to test for (choose from 16, 32, 64, 3232, 6464")
+ parser.add_argument("-l", "--load_from_disk", action="store_true", help="Increase verbosity of the script")
+ parser.add_argument("-t", "--plot_title", default=None, help="The title for the plots, defaults to benchmark name")
+ parser.add_argument("-v", "--verbose", action="store_true", help="Increase verbosity of the script")
+ cl_args = parser.parse_args(argv)
+
+ # The benchmark name and plot title
+ benchmark_name = utils.precision_to_letter(cl_args.precision) + cl_args.benchmark.upper()
+ if cl_args.plot_title is None:
+ cl_args.plot_title = benchmark_name
+
+ # Retrieves the benchmark settings
+ if cl_args.benchmark not in EXPERIMENTS.keys():
+ print("[benchmark] Invalid benchmark '%s', choose from %s" % (cl_args.benchmark, EXPERIMENTS.keys()))
+ return
+ experiment = EXPERIMENTS[cl_args.benchmark]
+ benchmarks = experiment["benchmarks"]
+
+ # Either run the benchmarks for this experiment or load old results from disk
+ json_file_name = benchmark_name.lower() + "_benchmarks.json"
+ if cl_args.load_from_disk and os.path.isfile(json_file_name):
+ print("[benchmark] Loading previous benchmark results from '" + json_file_name + "'")
+ with open(json_file_name) as f:
+ results = json.load(f)
+ else:
+
+ # Runs all the individual benchmarks
+ print("[benchmark] Running %d benchmarks for settings '%s'" % (len(benchmarks), cl_args.benchmark))
+ results = {"label_names": experiment["label_names"], "num_rows": experiment["num_rows"],
+ "num_cols": experiment["num_cols"], "benchmarks": []}
+ for benchmark in benchmarks:
+ result = run_benchmark(benchmark["name"], benchmark["arguments"], cl_args.precision, cl_args.num_runs,
+ cl_args.platform, cl_args.device)
+ results["benchmarks"].append(result)
+
+ # Stores the results to disk
+ print("[benchmark] Saving benchmark results to '" + json_file_name + "'")
+ with open(json_file_name, "wb") as f:
+ json.dump(results, f, sort_keys=True, indent=4)
+
+ # Retrieves the data from the benchmark settings
+ pdf_file_name = benchmark_name.lower() + "_plot.pdf"
+ titles = [utils.precision_to_letter(cl_args.precision) + b["name"].upper() + " " + b["title"] for b in benchmarks]
+ x_keys = [b["x_keys"] for b in benchmarks]
+ y_keys = [b["y_keys"] for b in benchmarks]
+ x_labels = [b["x_label"] for b in benchmarks]
+ y_labels = [b["y_label"] for b in benchmarks]
+ label_names = results["label_names"]
+
+ # For half-precision: also adds single-precision results for comparison
+ if cl_args.precision == 16:
+ label_names = ["CLBlast FP16", "clBLAS FP32", "CLBlast FP32"]
+ y_keys = [y_key + [y_key[0] + "_FP32"] for y_key in y_keys]
+
+ # Plots the graphs
+ plot.plot_graphs(results["benchmarks"], pdf_file_name, results["num_rows"], results["num_cols"],
+ x_keys, y_keys, titles, x_labels, y_labels,
+ label_names, cl_args.plot_title, cl_args.verbose)
+
+ print("[benchmark] All done")
+
+
+if __name__ == '__main__':
+ main(sys.argv[1:])
diff --git a/scripts/benchmark/plot.py b/scripts/benchmark/plot.py
new file mode 100644
index 00000000..dc4800fe
--- /dev/null
+++ b/scripts/benchmark/plot.py
@@ -0,0 +1,76 @@
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This file follows the
+# PEP8 Python style guide and uses a max-width of 120 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+
+import utils
+
+import matplotlib.pyplot as plt
+
+
+BLUEISH = [c / 255.0 for c in [71, 101, 177]] # #4765b1
+REDISH = [c / 255.0 for c in [214, 117, 104]] # #d67568
+PURPLISH = [c / 255.0 for c in [85, 0, 119]] # #550077
+COLORS = [BLUEISH, REDISH, PURPLISH]
+MARKERS = ["o-", "x-", ".-"]
+
+
+def plot_graphs(results, file_name, num_rows, num_cols,
+ x_keys, y_keys, titles, x_labels, y_labels,
+ label_names, title, verbose):
+ assert len(results) == num_rows * num_cols
+ assert len(results) != 1
+ assert len(x_keys) == len(results)
+ assert len(y_keys) == len(results)
+ assert len(titles) == len(results)
+ assert len(x_labels) == len(results)
+ assert len(y_labels) == len(results)
+
+ # Initializes the plot
+ size_x = 6 * num_cols
+ size_y = 6 * num_rows
+ fig, axes = plt.subplots(nrows=num_rows, ncols=num_cols, figsize=(size_x, size_y), facecolor='w', edgecolor='k')
+ fig.text(.5, .93, title, horizontalalignment="center", fontsize=18)
+
+ # Loops over each subplot
+ for index, result in enumerate(results):
+ ax = axes.flat[index]
+ plt.sca(ax)
+ print("[plot] Plotting subplot %d" % index)
+
+ # Sets the x-axis labels
+ x_list = [[r[x_key] for r in result] for x_key in x_keys[index]]
+ x_ticks = [",".join([utils.float_to_kilo_mega(v) for v in values]) for values in zip(*x_list)]
+ x_location = range(len(x_ticks))
+
+ # Sets the y-data
+ y_list = [[r[y_key] for r in result] for y_key in y_keys[index]]
+ y_max = max([max(y) for y in y_list])
+
+ # Sets the axes
+ y_rounding = 10 if y_max < 80 else 50 if y_max < 400 else 200
+ y_axis_limit = (y_max * 1.2) - ((y_max * 1.2) % y_rounding) + y_rounding
+ plt.ylim(ymin=0, ymax=y_axis_limit)
+ plt.xticks(x_location, x_ticks, rotation='vertical')
+
+ # Sets the labels
+ ax.set_title(titles[index], fontsize=14, y=0.93)
+ ax.set_ylabel(y_labels[index], fontsize=14)
+ ax.set_xlabel(x_labels[index], fontsize=14)
+ ax.xaxis.set_label_coords(0.5, 0.06)
+
+ # Plots the graph
+ assert len(COLORS) >= len(y_keys[index])
+ assert len(MARKERS) >= len(y_keys[index])
+ assert len(label_names) == len(y_keys[index])
+ for i in range(len(y_keys[index])):
+ ax.plot(x_location, y_list[i], MARKERS[i], label=label_names[i], color=COLORS[i])
+
+ # Sets the legend
+ leg = ax.legend(loc=(0.02, 0.88 - 0.05 * len(y_keys[index])))
+ leg.draw_frame(False)
+
+ # Saves the plot to disk
+ fig.savefig(file_name, bbox_inches='tight')
+ plt.show()
diff --git a/scripts/benchmark/settings.py b/scripts/benchmark/settings.py
new file mode 100644
index 00000000..0243832f
--- /dev/null
+++ b/scripts/benchmark/settings.py
@@ -0,0 +1,321 @@
+#!/usr/bin/env python
+
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This file follows the
+# PEP8 Python style guide and uses a max-width of 120 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+
+import utils
+
+
+AXPY = {
+ "label_names": ["CLBlast", "clBLAS"],
+ "num_rows": 2, "num_cols": 3,
+ "benchmarks": [
+ {
+ "name": "axpy",
+ "title": "multiples of 256K",
+ "x_label": "vector sizes (n)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": utils.k(256), "incx": 1, "incy": 1, "step": utils.k(256), "num_steps": 16}],
+ },
+ {
+ "name": "axpy",
+ "title": "multiples of 256K+1",
+ "x_label": "vector sizes (n)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": utils.k(256) + 1, "incx": 1, "incy": 1, "step": utils.k(256) + 1, "num_steps": 16}],
+ },
+ {
+ "name": "axpy",
+ "title": "around n=1M",
+ "x_label": "vector sizes (n)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": utils.m(1), "incx": 1, "incy": 1, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "axpy",
+ "title": "around n=16M",
+ "x_label": "vector sizes (n)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": utils.m(16), "incx": 1, "incy": 1, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "axpy",
+ "title": "strides (n=8M)",
+ "x_label": "increments/strides for x,y", "x_keys": ["incx", "incy"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": utils.m(8), "incx": inc_x, "incy": inc_y, "step": 0, "num_steps": 1}
+ for inc_x in [1, 2, 4] for inc_y in [1, 2, 4]],
+ },
+ {
+ "name": "axpy",
+ "title": "powers of 2",
+ "x_label": "vector sizes (n)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": n, "incx": 1, "incy": 1, "step": 0, "num_steps": 1}
+ for n in utils.powers_of_2(utils.k(32), utils.m(64))],
+ }
+ ]
+}
+
+GEMV = {
+ "label_names": ["CLBlast", "clBLAS"],
+ "num_rows": 2, "num_cols": 3,
+ "benchmarks": [
+ {
+ "name": "gemv",
+ "title": "multiples of 256",
+ "x_label": "matrix/vector sizes (n=m)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": 256, "m": 256, "incx": 1, "incy": 1, "layout": 102, "step": 256, "num_steps": 20}],
+ },
+ {
+ "name": "gemv",
+ "title": "multiples of 257",
+ "x_label": "matrix/vector sizes (n=m)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": 257, "m": 257, "incx": 1, "incy": 1, "layout": 102, "step": 257, "num_steps": 20}],
+ },
+ {
+ "name": "gemv",
+ "title": "around n=m=4K",
+ "x_label": "matrix/vector sizes (n=m)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": 4096, "m": 4096, "incx": 1, "incy": 1, "layout": 102, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "gemv",
+ "title": "multiples of 256 rotated",
+ "x_label": "matrix/vector sizes (n=m)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": 256, "m": 256, "incx": 1, "incy": 1, "layout": 101, "step": 256, "num_steps": 20}],
+ },
+ {
+ "name": "gemv",
+ "title": "multiples of 257 rotated",
+ "x_label": "matrix/vector sizes (n=m)", "x_keys": ["n"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": 257, "m": 257, "incx": 1, "incy": 1, "layout": 101, "step": 257, "num_steps": 20}],
+ },
+ {
+ "name": "gemv",
+ "title": "strides (n=m=4K)",
+ "x_label": "increments/strides for x,y", "x_keys": ["incx", "incy"],
+ "y_label": "GB/s (higher is better)", "y_keys": ["GBs_1", "GBs_2"],
+ "arguments": [{"n": 4096, "m": 4096, "incx": inc_x, "incy": inc_y, "layout": 102, "step": 0, "num_steps": 1}
+ for inc_x in [1, 2, 4] for inc_y in [1, 2, 4]],
+ }
+ ]
+}
+
+GEMM = {
+ "label_names": ["CLBlast", "clBLAS"],
+ "num_rows": 2, "num_cols": 3,
+ "benchmarks": [
+ {
+ "name": "gemm",
+ "title": "multiples of 128",
+ "x_label": "matrix sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 128, "n": 128, "k": 128, "layout": 102,
+ "transA": 111, "transB": 111, "step": 128, "num_steps": 20}],
+ },
+ {
+ "name": "gemm",
+ "title": "multiples of 129",
+ "x_label": "matrix sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 129, "n": 129, "k": 129, "layout": 102,
+ "transA": 111, "transB": 111, "step": 129, "num_steps": 20}],
+ },
+ {
+ "name": "gemm",
+ "title": "around m=n=k=512",
+ "x_label": "matrix sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 512, "n": 512, "k": 512, "layout": 102,
+ "transA": 111, "transB": 111, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "gemm",
+ "title": "around m=n=k=2048",
+ "x_label": "matrix sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 2048, "n": 2048, "k": 2048, "layout": 102,
+ "transA": 111, "transB": 111, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "gemm",
+ "title": "layouts/transposing (m=n=k=1K)",
+ "x_label": "layout, transA, transB", "x_keys": ["layout", "transA", "transB"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 1024, "n": 1024, "k": 1024, "layout": layout,
+ "transA": transA, "transB": transB, "step": 0, "num_steps": 1}
+ for layout in [101, 102] for transA in [111, 112] for transB in [111, 112]],
+ },
+ {
+ "name": "gemm",
+ "title": "powers of 2",
+ "x_label": "matrix sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": n, "n": n, "k": n, "layout": 102,
+ "transA": 111, "transB": 111, "step": 0, "num_steps": 1}
+ for n in utils.powers_of_2(8, utils.k(4))],
+ }
+ ]
+}
+
+GEMM_SMALL = {
+ "label_names": ["CLBlast", "clBLAS"],
+ "num_rows": 2, "num_cols": 1,
+ "benchmarks": [
+ {
+ "name": "gemm",
+ "title": "small matrices in steps of 16",
+ "x_label": "matrix sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 128, "n": 128, "k": 128, "layout": 102,
+ "transA": 111, "transB": 111, "step": 16, "num_steps": 57}],
+ },
+ {
+ "name": "gemm",
+ "title": "small matrices in steps of 1",
+ "x_label": "matrix sizes (m=n=k)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 128, "n": 128, "k": 128, "layout": 102,
+ "transA": 111, "transB": 111, "step": 1, "num_steps": 385}],
+ },
+
+ ]
+}
+
+SYMM = {
+ "label_names": ["CLBlast", "clBLAS"],
+ "num_rows": 2, "num_cols": 3,
+ "benchmarks": [
+ {
+ "name": "symm",
+ "title": "multiples of 128",
+ "x_label": "matrix sizes (m=n)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 128, "n": 128, "layout": 102,
+ "side": 141, "triangle": 121, "step": 128, "num_steps": 20}],
+ },
+ {
+ "name": "symm",
+ "title": "multiples of 129",
+ "x_label": "matrix sizes (m=n)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 129, "n": 129, "layout": 102,
+ "side": 141, "triangle": 121, "step": 129, "num_steps": 20}],
+ },
+ {
+ "name": "symm",
+ "title": "around m=n=512",
+ "x_label": "matrix sizes (m=n)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 512, "n": 512, "layout": 102,
+ "side": 141, "triangle": 121, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "symm",
+ "title": "around m=n=2048",
+ "x_label": "matrix sizes (m=n)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 2048, "n": 2048, "layout": 102,
+ "side": 141, "triangle": 121, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "symm",
+ "title": "layouts/sides/triangles (m=n=1K)",
+ "x_label": "layout, side, triangle", "x_keys": ["layout", "side", "triangle"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": 1024, "n": 1024, "layout": layout,
+ "side": side, "triangle": triangle, "step": 0, "num_steps": 1}
+ for layout in [101, 102] for side in [141, 142] for triangle in [121, 122]],
+ },
+ {
+ "name": "symm",
+ "title": "powers of 2",
+ "x_label": "matrix sizes (m=n)", "x_keys": ["m"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"m": n, "n": n, "layout": 102,
+ "side": 141, "triangle": 121, "step": 0, "num_steps": 1}
+ for n in utils.powers_of_2(8, utils.k(4))],
+ }
+ ]
+}
+
+SYRK = {
+ "label_names": ["CLBlast", "clBLAS"],
+ "num_rows": 2, "num_cols": 3,
+ "benchmarks": [
+ {
+ "name": "syrk",
+ "title": "multiples of 128",
+ "x_label": "matrix sizes (n=k)", "x_keys": ["n"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"n": 128, "k": 128, "layout": 102,
+ "side": 141, "triangle": 121, "step": 128, "num_steps": 20}],
+ },
+ {
+ "name": "syrk",
+ "title": "multiples of 129",
+ "x_label": "matrix sizes (n=k)", "x_keys": ["n"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"n": 129, "k": 129, "layout": 102,
+ "side": 141, "triangle": 121, "step": 129, "num_steps": 20}],
+ },
+ {
+ "name": "syrk",
+ "title": "around n=k=512",
+ "x_label": "matrix sizes (n=k)", "x_keys": ["n"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"n": 512, "k": 512, "layout": 102,
+ "side": 141, "triangle": 121, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "syrk",
+ "title": "around n=k=2048",
+ "x_label": "matrix sizes (n=k)", "x_keys": ["n"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"n": 2048, "k": 2048, "layout": 102,
+ "side": 141, "triangle": 121, "step": 1, "num_steps": 16}],
+ },
+ {
+ "name": "syrk",
+ "title": "layouts/sides/triangles (n=k=1K)",
+ "x_label": "layout, triangle, transA", "x_keys": ["layout", "triangle", "transA"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"n": 1024, "k": 1024, "layout": layout,
+ "triangle": triangle, "transA": transA, "step": 0, "num_steps": 1}
+ for layout in [101, 102] for triangle in [121, 122] for transA in [111, 112]],
+ },
+ {
+ "name": "syrk",
+ "title": "powers of 2",
+ "x_label": "matrix sizes (n=k)", "x_keys": ["n"],
+ "y_label": "GFLOPS (higher is better)", "y_keys": ["GFLOPS_1", "GFLOPS_2"],
+ "arguments": [{"n": n, "k": n, "layout": 102,
+ "side": 141, "triangle": 121, "step": 0, "num_steps": 1}
+ for n in utils.powers_of_2(8, utils.k(4))],
+ }
+ ]
+}
+
+SUMMARY = {
+ "label_names": ["CLBlast", "clBLAS"],
+ "num_rows": 4, "num_cols": 2,
+ "benchmarks": [
+ AXPY["benchmarks"][0],
+ AXPY["benchmarks"][1],
+ GEMV["benchmarks"][0],
+ GEMV["benchmarks"][1],
+ GEMM["benchmarks"][0],
+ GEMM["benchmarks"][1],
+ SYMM["benchmarks"][0],
+ SYMM["benchmarks"][1],
+ ]
+}
diff --git a/scripts/benchmark/utils.py b/scripts/benchmark/utils.py
new file mode 100644
index 00000000..62e18de2
--- /dev/null
+++ b/scripts/benchmark/utils.py
@@ -0,0 +1,66 @@
+# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This file follows the
+# PEP8 Python style guide and uses a max-width of 120 characters per line.
+#
+# Author(s):
+# Cedric Nugteren <www.cedricnugteren.nl>
+
+import csv
+import subprocess
+
+
+def k(value):
+ return value * 1024
+
+
+def m(value):
+ return value * 1024 * 1024
+
+
+def float_to_kilo_mega(value):
+ if value % 1024 or value <= 1024:
+ return "%.0f" % value
+ elif value % (1024 * 1024) or value <= (1024 * 1024):
+ return "%.0fK" % (value / 1024.0)
+ else:
+ return "%.0fM" % (value / (1024.0 * 1024.0))
+
+
+def powers_of_2(start, stop):
+ while start <= stop:
+ yield start
+ start *= 2
+
+
+def precision_to_letter(precision):
+ if precision == 16:
+ return "H"
+ elif precision == 32:
+ return "S"
+ elif precision == 64:
+ return "D"
+ elif precision == 3232:
+ return "C"
+ elif precision == 6464:
+ return "Z"
+ else:
+ return "X"
+
+
+def run_binary(command, arguments):
+ full_command = command + " " + " ".join(arguments)
+ print("[benchmark] Calling binary: %s" % str(full_command))
+ try:
+ return subprocess.Popen(full_command, shell=True, stdout=subprocess.PIPE).stdout.read()
+ except OSError as e:
+ print("[benchmark] Error while running the binary, got exception: %s" + str(e))
+ return False
+
+
+def parse_results(csv_data):
+ csv_data = csv_data.split("\n")
+ results = csv.DictReader(csv_data, delimiter=";", skipinitialspace=True)
+ results = [r for r in results]
+ for result in results:
+ for key in result:
+ result[key] = float(result[key]) if "." in result[key] else int(result[key])
+ return results
diff --git a/scripts/graphs/common.r b/scripts/graphs/common.r
deleted file mode 100644
index 2c437a9f..00000000
--- a/scripts/graphs/common.r
+++ /dev/null
@@ -1,262 +0,0 @@
-
-# ==================================================================================================
-# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-# project uses a tab-size of two spaces and a max-width of 100 characters per line.
-#
-# Author(s):
-# Cedric Nugteren <www.cedricnugteren.nl>
-#
-# This file implements the common performance scripts, such as creating a graph.
-#
-# ==================================================================================================
-
-# Colours
-black = "#000000"
-grey = "#888888"
-purplish = "#550077" # [ 85, 0,119] lumi=26
-blueish = "#4765b1" # [ 71,101,177] lumi=100
-redish = "#d67568" # [214,117,104] lumi=136
-greenish = "#9bd4ca" # [155,212,202] lumi=199
-
-# Sets the graph markers (circles, triangles, etc.)
-pchs = c(15, 18, 17, 12)
-
-# Other constants
-kilo = 1024
-mega = 1024*1024
-
-# R options
-options("width"=170)
-
-# ==================================================================================================
-
-# Settings
-num_runs <- 5
-num_runs_short <- 50
-xtics_subset_threshold <- 100
-xtics_subset_stepsize <- 8
-
-devices <- c("-platform","-device")
-options_string <- "-q -no_abbrv -cblas 0"
-
-# Command-line arguments
-command_line <- commandArgs(trailingOnly=TRUE)
-if (length(command_line) != 2) {
- print("Usage for device Z on platform Y: Rscript xxxxx.r Y Z")
- quit()
-}
-platform_id <- command_line[1]
-device_id <- command_line[2]
-
-# Selects the device
-devices_values <- c(platform_id, device_id)
-devices_string <- paste(devices, devices_values, collapse=" ")
-
-
-# Filter the string: only lines containing a ";" can be valid lines
-filter_string <- function(raw_result_string) {
- result_string <- c()
- for (line in raw_result_string) {
- if (grepl(";",line)) {
- result_string <-
- c(result_string, line)
- }
- }
- return(result_string)
-}
-
-# ==================================================================================================
-
-# The main function
-main <- function(routine_name, precision, test_names, test_values,
- test_xlabels, test_xaxis, metric_gflops) {
-
- # Names
- display_name <- toupper(routine_name)
- if (precision == 16) { display_name <- gsub("^X","H",display_name); }
- if (precision == 32) { display_name <- gsub("^X","S",display_name); }
- if (precision == 64) { display_name <- gsub("^X","D",display_name); }
- if (precision == 3232) { display_name <- gsub("^X","C",display_name); }
- if (precision == 6464) { display_name <- gsub("^X","Z",display_name); }
- executable <- paste("./clblast_client_", routine_name, sep="")
-
- # Display
- library_names <- c("CLBlast", "clBLAS")
- if (precision == 16) { library_names <- c("CLBlast FP16", "CLBlast FP32", "clBLAS FP32"); }
- colourset <- c(blueish, redish)
- if (precision == 16) { colourset <- c(blueish, purplish, redish); }
-
- # Configures the outputfile
- file_name <- paste(display_name, ".pdf", sep="")
- if (length(test_names) == 6) {
- pdf(file_name, height=8, width=13)
- par(mfrow=c(2, 3))
- par(oma=c(0, 0, 0, 0))
- par(mar=c(4.6, 4.4, 1.5, 0)) # bottom, left, top, right [c(5.1, 4.1, 4.1, 2.1)]
- par(mgp=c(2.8, 0.6, 0)) # location of xlab/ylab, tick-mark labels, tick marks [c(3, 1, 0)]
- }
- else { # length(test_names) == 2
- pdf(file_name, height=8, width=13)
- par(mfrow=c(2, 1))
- par(oma=c(0, 0, 0, 0))
- par(mar=c(4.6, 4.4, 1.5, 0)) # bottom, left, top, right [c(5.1, 4.1, 4.1, 2.1)]
- par(mgp=c(2.8, 0.6, 0)) # location of xlab/ylab, tick-mark labels, tick marks [c(3, 1, 0)]
- }
-
- # Loops over the test-cases
- for (test_id in 1:length(test_names)) {
- params_values <- test_values[[test_id]]
-
- # Loops over the commands within a single list (within a case)
- for (command_id in 1:length(params_values)) {
-
- # Runs the client and captures the result
- params_string <- paste(parameters, params_values[[command_id]], collapse=" ")
- arguments <- paste(devices_string, params_string, options_string, sep=" ")
- print(paste("Running", executable, arguments, sep=" "))
- raw_result_string <- system2(command=executable, args=arguments, stdout=TRUE)
- result_string <- filter_string(raw_result_string)
-
- # Reads the result into a dataframe
- command_db <- read.csv(text=result_string, sep=";")
-
- # For half-precision: also runs the FP32 version for comparison
- if (precision == 16) {
- params_string <- gsub("-precision 16", "-precision 32", params_string)
- arguments <- paste(devices_string, params_string, options_string, sep=" ")
- print(paste("Running", executable, arguments, sep=" "))
- raw_result_string <- system2(command=executable, args=arguments, stdout=TRUE)
- result_string <- filter_string(raw_result_string)
-
- # Reads the result into a dataframe
- command_db_32 <- read.csv(text=result_string, sep=";")
- stopifnot(nrow(command_db) == nrow(command_db_32))
-
- # Combines the results
- command_db["ms_FP32_1"] = command_db_32$ms_1
- command_db["GFLOPS_FP32_1"] = command_db_32$GFLOPS_1
- command_db["GBs_FP32_1"] = command_db_32$GBs_1
- command_db["ms_FP32_2"] = command_db_32$ms_2
- command_db["GFLOPS_FP32_2"] = command_db_32$GFLOPS_2
- command_db["GBs_FP32_2"] = command_db_32$GBs_2
- }
-
- # Append the results to the final dataframe
- if (command_id == 1) {
- db <- command_db
- } else {
- db <- rbind(db, command_db)
- }
- }
- print(db)
-
- # Sets the values on the x-axis and their labels (test dependent)
- if (is.character(test_xaxis[[test_id]][[1]])) {
- xdata <- db[,test_xaxis[[test_id]][[1]]]
- xtics <- xdata
- log_scale <- test_xaxis[[test_id]][[2]]
- }
- else {
- xdata <- test_xaxis[[test_id]][[1]]
- xtics <- test_xaxis[[test_id]][[2]]
- log_scale <- ""
- }
-
- # Plots the graph with GFLOPS on the Y-axis
- if (metric_gflops) {
- if (precision == 16) {
- ydata = list(db$GFLOPS_1, db$GFLOPS_FP32_1, db$GFLOPS_FP32_2)
- ymax = max(max(db$GFLOPS_1), max(db$GFLOPS_FP32_1), max(db$GFLOPS_FP32_2))
- } else {
- ydata = list(db$GFLOPS_1, db$GFLOPS_2)
- ymax = max(max(db$GFLOPS_1), max(db$GFLOPS_2))
- }
- plot_graph(xdata=xdata, ydata=ydata, log_setting=log_scale,
- xmin=min(xdata), xmax=max(xdata),
- ymin=0, ymax=ymax,
- xtics=xtics,
- xlabel=test_xlabels[[test_id]], ylabel="GFLOPS (higher is better)",
- graph_title=paste(display_name, test_names[[test_id]], sep=" "),
- multiple=50, experiment_names=library_names, colourset=colourset)
- # Plots the graph with GB/s on the Y-axis
- } else {
- if (precision == 16) {
- ydata = list(db$GBs_1, db$GBs_FP32_1, db$GBs_FP32_2)
- ymax = max(max(db$GBs_1), max(db$GBs_FP32_1), max(db$GBs_FP32_2))
- } else {
- ydata = list(db$GBs_1, db$GBs_2)
- ymax = max(max(db$GBs_1), max(db$GBs_2))
- }
- plot_graph(xdata=xdata, ydata=ydata, log_setting=log_scale,
- xmin=min(xdata), xmax=max(xdata),
- ymin=0, ymax=ymax,
- xtics=xtics,
- xlabel=test_xlabels[[test_id]], ylabel="GB/s (higher is better)",
- graph_title=paste(display_name, test_names[[test_id]], sep=" "),
- multiple=10, experiment_names=library_names, colourset=colourset)
- }
- }
-}
-
-# ==================================================================================================
-
-# Plots data
-plot_graph <- function(xdata, ydata, log_setting,
- xmin, xmax, ymin, ymax,
- xtics, xlabel, ylabel,
- graph_title,
- multiple, experiment_names, colourset) {
-
- # Update the ymax to the next multiple of something
- ymax <- multiple*ceiling(ymax/multiple)
-
- # Add kilo or mega to the x-labels
- for (i in 1:length(xtics)) {
- if (!is.na(as.numeric(xtics[i]))) {
- if (as.numeric(xtics[i])%%mega == 0) {
- xtics[i] <- paste(as.character(as.numeric(xtics[i])/mega), "M", sep="")
- } else if (as.numeric(xtics[i])%%kilo == 0) {
- xtics[i] <- paste(as.character(as.numeric(xtics[i])/kilo), "K", sep="")
- }
- }
- }
-
- # Creates an initial graph with axis but without data
- par(new=F)
- plot(x=xmin:xmax, y=rep(1, length(xmin:xmax)), log=log_setting,
- main="", xlab="", ylab="",
- ylim=c(ymin, ymax), xlim=c(xmin, xmax), axes=F, "n")
- axis(side=2, las=2)
- if (length(xdata) > xtics_subset_threshold) { # Too many indices to print, plot only every Nth
- subset <- seq(from=1, to=length(xdata), by=xtics_subset_stepsize)
- axis(side=1, at=xdata[subset], labels=xtics[subset], las=2)
- } else {
- axis(side=1, at=xdata, labels=xtics, las=2)
- }
- title(xlab=xlabel, line=-1)
- title(ylab=ylabel, line=2)
- title(graph_title, line=-2)
- par(new=T)
-
- # Loops over all experiments
- num_experiments <- length(ydata)
- for (id in 1:num_experiments) {
-
- # Plots the data for this experiment
- plot(x=xdata, y=ydata[[id]], log=log_setting,
- col=colourset[id], pch=pchs[id], lty=1, lwd=1, cex=1,
- xlab="", ylab="", ylim=c(ymin, ymax), xlim=c(xmin, xmax),
- axes=F, "b", xpd=T)
- par(new=T)
- }
-
- # Add a legend
- legend("bottomright", experiment_names,
- lwd=1, ncol=1, col=colourset, pch=pchs, lty=1, cex=1,
- bty="n", xpd=T)
-
- # Done
- par(new=F)
-}
-
-# ==================================================================================================
diff --git a/scripts/graphs/xaxpy.r b/scripts/graphs/xaxpy.r
deleted file mode 100644
index 187590aa..00000000
--- a/scripts/graphs/xaxpy.r
+++ /dev/null
@@ -1,96 +0,0 @@
-
-# ==================================================================================================
-# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-# project uses a tab-size of two spaces and a max-width of 100 characters per line.
-#
-# Author(s):
-# Cedric Nugteren <www.cedricnugteren.nl>
-#
-# This file implements the performance script for the Xaxpy routine
-#
-# ==================================================================================================
-
-# Includes the common functions
-args <- commandArgs(trailingOnly = FALSE)
-thisfile <- (normalizePath(sub("--file=", "", args[grep("--file=", args)])))
-source(file.path(dirname(thisfile), "common.r"))
-
-# ==================================================================================================
-
-# Settings
-routine_name <- "xaxpy"
-parameters <- c("-n","-incx","-incy",
- "-num_steps","-step","-runs","-precision")
-precision <- 32
-
-# Sets the names of the test-cases
-test_names <- list(
- "multiples of 256K",
- "multiples of 256K (+1)",
- "around n=1M",
- "around n=16M",
- "strides (n=8M)",
- "powers of 2"
-)
-
-# Defines the test-cases
-test_values <- list(
- list(c(256*kilo, 1, 1, 16, 256*kilo, num_runs, precision)),
- list(c(256*kilo+1, 1, 1, 16, 256*kilo, num_runs, precision)),
- list(c(1*mega, 1, 1, 16, 1, num_runs, precision)),
- list(c(16*mega, 1, 1, 16, 1, num_runs, precision)),
- list(
- c(8*mega, 1, 1, 1, 0, num_runs, precision),
- c(8*mega, 2, 1, 1, 0, num_runs, precision),
- c(8*mega, 4, 1, 1, 0, num_runs, precision),
- c(8*mega, 8, 1, 1, 0, num_runs, precision),
- c(8*mega, 1, 2, 1, 0, num_runs, precision),
- c(8*mega, 1, 4, 1, 0, num_runs, precision),
- c(8*mega, 1, 8, 1, 0, num_runs, precision),
- c(8*mega, 2, 2, 1, 0, num_runs, precision),
- c(8*mega, 4, 4, 1, 0, num_runs, precision),
- c(8*mega, 8, 8, 1, 0, num_runs, precision)
- ),
- list(
- c(32*kilo, 1, 1, 1, 0, num_runs, precision),
- c(64*kilo, 1, 1, 1, 0, num_runs, precision),
- c(128*kilo, 1, 1, 1, 0, num_runs, precision),
- c(256*kilo, 1, 1, 1, 0, num_runs, precision),
- c(512*kilo, 1, 1, 1, 0, num_runs, precision),
- c(1*mega, 1, 1, 1, 0, num_runs, precision),
- c(2*mega, 1, 1, 1, 0, num_runs, precision),
- c(4*mega, 1, 1, 1, 0, num_runs, precision),
- c(8*mega, 1, 1, 1, 0, num_runs, precision),
- c(16*mega, 1, 1, 1, 0, num_runs, precision),
- c(32*mega, 1, 1, 1, 0, num_runs, precision),
- c(64*mega, 1, 1, 1, 0, num_runs, precision)
- )
-)
-
-# Defines the x-labels corresponding to the test-cases
-test_xlabels <- list(
- "vector sizes (n)",
- "vector sizes (n)",
- "vector sizes (n)",
- "vector sizes (n)",
- "increments/strides for x and y",
- "vector sizes (n)"
-)
-
-# Defines the x-axis of the test-cases
-test_xaxis <- list(
- c("n", ""),
- c("n", ""),
- c("n", ""),
- c("n", ""),
- list(1:10, c("x1y1", "x2y1", "x4y1", "x8y1", "x1y2", "x1y4", "x1y8", "x2y2", "x4y4", "x8y8")),
- c("n", "x")
-)
-
-# ==================================================================================================
-
-# Start the script
-main(routine_name=routine_name, precision=precision, test_names=test_names, test_values=test_values,
- test_xlabels=test_xlabels, test_xaxis=test_xaxis, metric_gflops=FALSE)
-
-# ================================================================================================== \ No newline at end of file
diff --git a/scripts/graphs/xgemm.r b/scripts/graphs/xgemm.r
deleted file mode 100755
index e758f460..00000000
--- a/scripts/graphs/xgemm.r
+++ /dev/null
@@ -1,94 +0,0 @@
-
-# ==================================================================================================
-# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-# project uses a tab-size of two spaces and a max-width of 100 characters per line.
-#
-# Author(s):
-# Cedric Nugteren <www.cedricnugteren.nl>
-#
-# This file implements the performance script for the Xgemm routine
-#
-# ==================================================================================================
-
-# Includes the common functions
-args <- commandArgs(trailingOnly = FALSE)
-thisfile <- (normalizePath(sub("--file=", "", args[grep("--file=", args)])))
-source(file.path(dirname(thisfile), "common.r"))
-
-# ==================================================================================================
-
-# Settings
-routine_name <- "xgemm"
-parameters <- c("-m","-n","-k","-layout","-transA","-transB",
- "-num_steps","-step","-runs","-precision")
-precision <- 32
-
-# Sets the names of the test-cases
-test_names <- list(
- "multiples of 128",
- "multiples of 128 (+1)",
- "around m=n=k=512",
- "around m=n=k=2048",
- "layouts and transposing (m=n=k=1024)",
- "powers of 2"
-)
-
-# Defines the test-cases
-test_values <- list(
- list(c( 128, 128, 128, 102, 111, 111, 16, 128, num_runs, precision)),
- list(c( 129, 129, 129, 102, 111, 111, 16, 128, num_runs, precision)),
- list(c( 512, 512, 512, 102, 111, 111, 16, 1, num_runs, precision)),
- list(c(2048, 2048, 2048, 102, 111, 111, 16, 1, num_runs, precision)),
- list(
- c(1024, 1024, 1024, 101, 111, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 1024, 101, 111, 112, 1, 0, num_runs, precision),
- c(1024, 1024, 1024, 101, 112, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 1024, 101, 112, 112, 1, 0, num_runs, precision),
- c(1024, 1024, 1024, 102, 111, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 1024, 102, 111, 112, 1, 0, num_runs, precision),
- c(1024, 1024, 1024, 102, 112, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 1024, 102, 112, 112, 1, 0, num_runs, precision)
- ),
- list(
- c( 8, 8, 8, 102, 111, 111, 1, 0, num_runs, precision),
- c( 16, 16, 16, 102, 111, 111, 1, 0, num_runs, precision),
- c( 32, 32, 32, 102, 111, 111, 1, 0, num_runs, precision),
- c( 64, 64, 64, 102, 111, 111, 1, 0, num_runs, precision),
- c( 128, 128, 128, 102, 111, 111, 1, 0, num_runs, precision),
- c( 256, 256, 256, 102, 111, 111, 1, 0, num_runs, precision),
- c( 512, 512, 512, 102, 111, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 1024, 102, 111, 111, 1, 0, num_runs, precision),
- c(2048, 2048, 2048, 102, 111, 111, 1, 0, num_runs, precision),
- c(4096, 4096, 4096, 102, 111, 111, 1, 0, num_runs, precision),
- c(8192, 8192, 8192, 102, 111, 111, 1, 0, num_runs, precision)
- )
-)
-
-# Defines the x-labels corresponding to the test-cases
-test_xlabels <- list(
- "matrix sizes (m=n=k)",
- "matrix sizes (m=n=k)",
- "matrix sizes (m=n=k)",
- "matrix sizes (m=n=k)",
- "layout (row/col), transA (n/y), transB (n/y)",
- "matrix sizes (m=n=k)"
-)
-
-# Defines the x-axis of the test-cases
-test_xaxis <- list(
- c("m", ""),
- c("m", ""),
- c("m", ""),
- c("m", ""),
- list(1:8, c("row,n,n", "row,n,y", "row,y,n", "row,y,y",
- "col,n,n", "col,n,y", "col,y,n", "col,y,y")),
- c("m", "x")
-)
-
-# ==================================================================================================
-
-# Start the script
-main(routine_name=routine_name, precision=precision, test_names=test_names, test_values=test_values,
- test_xlabels=test_xlabels, test_xaxis=test_xaxis, metric_gflops=TRUE)
-
-# ================================================================================================== \ No newline at end of file
diff --git a/scripts/graphs/xgemm_small.r b/scripts/graphs/xgemm_small.r
deleted file mode 100644
index ef94ef20..00000000
--- a/scripts/graphs/xgemm_small.r
+++ /dev/null
@@ -1,56 +0,0 @@
-
-# ==================================================================================================
-# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-# project uses a tab-size of two spaces and a max-width of 100 characters per line.
-#
-# Author(s):
-# Cedric Nugteren <www.cedricnugteren.nl>
-#
-# This file implements the performance script for small sizes of Xgemm, testing the direct kernel
-#
-# ==================================================================================================
-
-# Includes the common functions
-args <- commandArgs(trailingOnly = FALSE)
-thisfile <- (normalizePath(sub("--file=", "", args[grep("--file=", args)])))
-source(file.path(dirname(thisfile), "common.r"))
-
-# ==================================================================================================
-
-# Settings
-routine_name <- "xgemm"
-parameters <- c("-m","-n","-k","-layout","-transA","-transB",
- "-num_steps","-step","-runs","-precision")
-precision <- 32
-
-# Sets the names of the test-cases
-test_names <- list(
- "small matrices in steps of 16",
- "small matrices in steps of 1"
-)
-
-# Defines the test-cases
-test_values <- list(
- list(c( 128, 128, 128, 102, 111, 111, 57, 16, num_runs_short, precision)),
- list(c( 128, 128, 128, 102, 111, 111, 385, 1, num_runs_short, precision))
-)
-
-# Defines the x-labels corresponding to the test-cases
-test_xlabels <- list(
- "matrix sizes (m=n=k)",
- "matrix sizes (m=n=k)"
-)
-
-# Defines the x-axis of the test-cases
-test_xaxis <- list(
- c("m", ""),
- c("m", "")
-)
-
-# ==================================================================================================
-
-# Start the script
-main(routine_name=routine_name, precision=precision, test_names=test_names, test_values=test_values,
- test_xlabels=test_xlabels, test_xaxis=test_xaxis, metric_gflops=TRUE)
-
-# ================================================================================================== \ No newline at end of file
diff --git a/scripts/graphs/xgemv.r b/scripts/graphs/xgemv.r
deleted file mode 100644
index 9a8040f7..00000000
--- a/scripts/graphs/xgemv.r
+++ /dev/null
@@ -1,83 +0,0 @@
-
-# ==================================================================================================
-# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-# project uses a tab-size of two spaces and a max-width of 100 characters per line.
-#
-# Author(s):
-# Cedric Nugteren <www.cedricnugteren.nl>
-#
-# This file implements the performance script for the Xgemv routine
-#
-# ==================================================================================================
-
-# Includes the common functions
-args <- commandArgs(trailingOnly = FALSE)
-thisfile <- (normalizePath(sub("--file=", "", args[grep("--file=", args)])))
-source(file.path(dirname(thisfile), "common.r"))
-
-# ==================================================================================================
-
-# Settings
-routine_name <- "xgemv"
-parameters <- c("-n","-m","-incx","-incy","-layout",
- "-num_steps","-step","-runs","-precision")
-precision <- 32
-
-# Sets the names of the test-cases
-test_names <- list(
- "multiples of 256",
- "multiples of 256 (+1)",
- "around n=m=2K",
- "multiples of 256 [rotated]",
- "multiples of 256 (+1) [rotated]",
- "strides (n=2K)"
-)
-
-# Defines the test-cases
-test_values <- list(
- list(c(256, 256, 1, 1, 102, 16, 256, num_runs, precision)),
- list(c(256+1, 256+1, 1, 1, 102, 16, 256, num_runs, precision)),
- list(c(2*kilo, 2*kilo, 1, 1, 102, 16, 1, num_runs, precision)),
- list(c(256, 256, 1, 1, 101, 16, 256, num_runs, precision)),
- list(c(256+1, 256+1, 1, 1, 101, 16, 256, num_runs, precision)),
- list(
- c(2*kilo, 2*kilo, 1, 1, 102, 1, 0, num_runs, precision),
- c(2*kilo, 2*kilo, 2, 1, 102, 1, 0, num_runs, precision),
- c(2*kilo, 2*kilo, 4, 1, 102, 1, 0, num_runs, precision),
- c(2*kilo, 2*kilo, 8, 1, 102, 1, 0, num_runs, precision),
- c(2*kilo, 2*kilo, 1, 2, 102, 1, 0, num_runs, precision),
- c(2*kilo, 2*kilo, 1, 4, 102, 1, 0, num_runs, precision),
- c(2*kilo, 2*kilo, 1, 8, 102, 1, 0, num_runs, precision),
- c(2*kilo, 2*kilo, 2, 2, 102, 1, 0, num_runs, precision),
- c(2*kilo, 2*kilo, 4, 4, 102, 1, 0, num_runs, precision),
- c(2*kilo, 2*kilo, 8, 8, 102, 1, 0, num_runs, precision)
- )
-)
-
-# Defines the x-labels corresponding to the test-cases
-test_xlabels <- list(
- "vector sizes (n)",
- "vector sizes (n)",
- "vector sizes (n)",
- "vector sizes (n)",
- "vector sizes (n)",
- "increments/strides for x and y"
-)
-
-# Defines the x-axis of the test-cases
-test_xaxis <- list(
- c("n", ""),
- c("n", ""),
- c("n", ""),
- c("n", ""),
- c("n", ""),
- list(1:10, c("x1y1", "x2y1", "x4y1", "x8y1", "x1y2", "x1y4", "x1y8", "x2y2", "x4y4", "x8y8"))
-)
-
-# ==================================================================================================
-
-# Start the script
-main(routine_name=routine_name, precision=precision, test_names=test_names, test_values=test_values,
- test_xlabels=test_xlabels, test_xaxis=test_xaxis, metric_gflops=FALSE)
-
-# ================================================================================================== \ No newline at end of file
diff --git a/scripts/graphs/xsymm.r b/scripts/graphs/xsymm.r
deleted file mode 100644
index 89d137d2..00000000
--- a/scripts/graphs/xsymm.r
+++ /dev/null
@@ -1,94 +0,0 @@
-
-# ==================================================================================================
-# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-# project uses a tab-size of two spaces and a max-width of 100 characters per line.
-#
-# Author(s):
-# Cedric Nugteren <www.cedricnugteren.nl>
-#
-# This file implements the performance script for the Xsymm routine
-#
-# ==================================================================================================
-
-# Includes the common functions
-args <- commandArgs(trailingOnly = FALSE)
-thisfile <- (normalizePath(sub("--file=", "", args[grep("--file=", args)])))
-source(file.path(dirname(thisfile), "common.r"))
-
-# ==================================================================================================
-
-# Settings
-routine_name <- "xsymm"
-parameters <- c("-m","-n","-layout","-side","-triangle",
- "-num_steps","-step","-runs","-precision")
-precision <- 32
-
-# Sets the names of the test-cases
-test_names <- list(
- "multiples of 128",
- "multiples of 128 (+1)",
- "around m=n=512",
- "around m=n=2048",
- "layouts and side/triangle (m=n=1024)",
- "powers of 2"
-)
-
-# Defines the test-cases
-test_values <- list(
- list(c( 128, 128, 102, 141, 121, 16, 128, num_runs, precision)),
- list(c( 129, 129, 102, 141, 121, 16, 128, num_runs, precision)),
- list(c( 512, 512, 102, 141, 121, 16, 1, num_runs, precision)),
- list(c(2048, 2048, 102, 141, 121, 16, 1, num_runs, precision)),
- list(
- c(1024, 1024, 101, 141, 121, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 141, 122, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 142, 121, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 142, 122, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 141, 121, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 141, 122, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 142, 121, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 142, 122, 1, 0, num_runs, precision)
- ),
- list(
- c( 8, 8, 102, 141, 121, 1, 0, num_runs, precision),
- c( 16, 16, 102, 141, 121, 1, 0, num_runs, precision),
- c( 32, 32, 102, 141, 121, 1, 0, num_runs, precision),
- c( 64, 64, 102, 141, 121, 1, 0, num_runs, precision),
- c( 128, 128, 102, 141, 121, 1, 0, num_runs, precision),
- c( 256, 256, 102, 141, 121, 1, 0, num_runs, precision),
- c( 512, 512, 102, 141, 121, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 141, 121, 1, 0, num_runs, precision),
- c(2048, 2048, 102, 141, 121, 1, 0, num_runs, precision),
- c(4096, 4096, 102, 141, 121, 1, 0, num_runs, precision),
- c(8192, 8192, 102, 141, 121, 1, 0, num_runs, precision)
- )
-)
-
-# Defines the x-labels corresponding to the test-cases
-test_xlabels <- list(
- "matrix sizes (m=n)",
- "matrix sizes (m=n)",
- "matrix sizes (m=n)",
- "matrix sizes (m=n)",
- "layout (row/col), side (l/r), triangle (up/lo)",
- "matrix sizes (m=n)"
-)
-
-# Defines the x-axis of the test-cases
-test_xaxis <- list(
- c("m", ""),
- c("m", ""),
- c("m", ""),
- c("m", ""),
- list(1:8, c("row,l,up", "row,r,up", "row,l,lo", "row,r,lo",
- "col,l,up", "col,r,up", "col,l,lo", "col,r,lo")),
- c("m", "x")
-)
-
-# ==================================================================================================
-
-# Start the script
-main(routine_name=routine_name, precision=precision, test_names=test_names, test_values=test_values,
- test_xlabels=test_xlabels, test_xaxis=test_xaxis, metric_gflops=TRUE)
-
-# ================================================================================================== \ No newline at end of file
diff --git a/scripts/graphs/xsyr2k.r b/scripts/graphs/xsyr2k.r
deleted file mode 100644
index 4b2dd4a0..00000000
--- a/scripts/graphs/xsyr2k.r
+++ /dev/null
@@ -1,94 +0,0 @@
-
-# ==================================================================================================
-# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-# project uses a tab-size of two spaces and a max-width of 100 characters per line.
-#
-# Author(s):
-# Cedric Nugteren <www.cedricnugteren.nl>
-#
-# This file implements the performance script for the Xsyr2k routine
-#
-# ==================================================================================================
-
-# Includes the common functions
-args <- commandArgs(trailingOnly = FALSE)
-thisfile <- (normalizePath(sub("--file=", "", args[grep("--file=", args)])))
-source(file.path(dirname(thisfile), "common.r"))
-
-# ==================================================================================================
-
-# Settings
-routine_name <- "xsyr2k"
-parameters <- c("-n","-k","-layout","-triangle","-transA",
- "-num_steps","-step","-runs","-precision")
-precision <- 32
-
-# Sets the names of the test-cases
-test_names <- list(
- "multiples of 128",
- "multiples of 128 (+1)",
- "around n=k=512",
- "around n=k=1536",
- "layouts and transposing (n=k=1024)",
- "powers of 2"
-)
-
-# Defines the test-cases
-test_values <- list(
- list(c( 128, 128, 102, 111, 111, 16, 128, num_runs, precision)),
- list(c( 129, 129, 102, 111, 111, 16, 128, num_runs, precision)),
- list(c( 512, 512, 102, 111, 111, 16, 1, num_runs, precision)),
- list(c(1536, 1536, 102, 111, 111, 16, 1, num_runs, precision)),
- list(
- c(1024, 1024, 101, 111, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 111, 112, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 112, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 112, 112, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 111, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 111, 112, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 112, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 112, 112, 1, 0, num_runs, precision)
- ),
- list(
- c( 8, 8, 102, 111, 111, 1, 0, num_runs, precision),
- c( 16, 16, 102, 111, 111, 1, 0, num_runs, precision),
- c( 32, 32, 102, 111, 111, 1, 0, num_runs, precision),
- c( 64, 64, 102, 111, 111, 1, 0, num_runs, precision),
- c( 128, 128, 102, 111, 111, 1, 0, num_runs, precision),
- c( 256, 256, 102, 111, 111, 1, 0, num_runs, precision),
- c( 512, 512, 102, 111, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 111, 111, 1, 0, num_runs, precision),
- c(2048, 2048, 102, 111, 111, 1, 0, num_runs, precision),
- c(4096, 4096, 102, 111, 111, 1, 0, num_runs, precision),
- c(8192, 8192, 102, 111, 111, 1, 0, num_runs, precision)
- )
-)
-
-# Defines the x-labels corresponding to the test-cases
-test_xlabels <- list(
- "matrix sizes (n=k)",
- "matrix sizes (n=k)",
- "matrix sizes (n=k)",
- "matrix sizes (n=k)",
- "layout (row/col), triangle (u/l), transA (n/y)",
- "matrix sizes (n=k)"
-)
-
-# Defines the x-axis of the test-cases
-test_xaxis <- list(
- c("n", ""),
- c("n", ""),
- c("n", ""),
- c("n", ""),
- list(1:8, c("row,u,n", "row,u,y", "row,l,n", "row,l,y",
- "col,u,n", "col,u,y", "col,l,n", "col,l,y")),
- c("n", "x")
-)
-
-# ==================================================================================================
-
-# Start the script
-main(routine_name=routine_name, precision=precision, test_names=test_names, test_values=test_values,
- test_xlabels=test_xlabels, test_xaxis=test_xaxis, metric_gflops=TRUE)
-
-# ================================================================================================== \ No newline at end of file
diff --git a/scripts/graphs/xsyrk.r b/scripts/graphs/xsyrk.r
deleted file mode 100644
index 754c93e2..00000000
--- a/scripts/graphs/xsyrk.r
+++ /dev/null
@@ -1,94 +0,0 @@
-
-# ==================================================================================================
-# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-# project uses a tab-size of two spaces and a max-width of 100 characters per line.
-#
-# Author(s):
-# Cedric Nugteren <www.cedricnugteren.nl>
-#
-# This file implements the performance script for the Xsyrk routine
-#
-# ==================================================================================================
-
-# Includes the common functions
-args <- commandArgs(trailingOnly = FALSE)
-thisfile <- (normalizePath(sub("--file=", "", args[grep("--file=", args)])))
-source(file.path(dirname(thisfile), "common.r"))
-
-# ==================================================================================================
-
-# Settings
-routine_name <- "xsyrk"
-parameters <- c("-n","-k","-layout","-triangle","-transA",
- "-num_steps","-step","-runs","-precision")
-precision <- 32
-
-# Sets the names of the test-cases
-test_names <- list(
- "multiples of 128",
- "multiples of 128 (+1)",
- "around n=k=512",
- "around n=k=2048",
- "layouts and transposing (n=k=1024)",
- "powers of 2"
-)
-
-# Defines the test-cases
-test_values <- list(
- list(c( 128, 128, 102, 121, 111, 16, 128, num_runs, precision)),
- list(c( 129, 129, 102, 121, 111, 16, 128, num_runs, precision)),
- list(c( 512, 512, 102, 121, 111, 16, 1, num_runs, precision)),
- list(c(2048, 2048, 102, 121, 111, 16, 1, num_runs, precision)),
- list(
- c(1024, 1024, 101, 121, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 121, 112, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 122, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 122, 112, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 121, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 121, 112, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 122, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 122, 112, 1, 0, num_runs, precision)
- ),
- list(
- c( 8, 8, 102, 121, 111, 1, 0, num_runs, precision),
- c( 16, 16, 102, 121, 111, 1, 0, num_runs, precision),
- c( 32, 32, 102, 121, 111, 1, 0, num_runs, precision),
- c( 64, 64, 102, 121, 111, 1, 0, num_runs, precision),
- c( 128, 128, 102, 121, 111, 1, 0, num_runs, precision),
- c( 256, 256, 102, 121, 111, 1, 0, num_runs, precision),
- c( 512, 512, 102, 121, 111, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 121, 111, 1, 0, num_runs, precision),
- c(2048, 2048, 102, 121, 111, 1, 0, num_runs, precision),
- c(4096, 4096, 102, 121, 111, 1, 0, num_runs, precision),
- c(8192, 8192, 102, 121, 111, 1, 0, num_runs, precision)
- )
-)
-
-# Defines the x-labels corresponding to the test-cases
-test_xlabels <- list(
- "matrix sizes (n=k)",
- "matrix sizes (n=k)",
- "matrix sizes (n=k)",
- "matrix sizes (n=k)",
- "layout (row/col), triangle (u/l), transA (n/y)",
- "matrix sizes (n=k)"
-)
-
-# Defines the x-axis of the test-cases
-test_xaxis <- list(
- c("n", ""),
- c("n", ""),
- c("n", ""),
- c("n", ""),
- list(1:8, c("row,u,n", "row,u,y", "row,l,n", "row,l,y",
- "col,u,n", "col,u,y", "col,l,n", "col,l,y")),
- c("n", "x")
-)
-
-# ==================================================================================================
-
-# Start the script
-main(routine_name=routine_name, precision=precision, test_names=test_names, test_values=test_values,
- test_xlabels=test_xlabels, test_xaxis=test_xaxis, metric_gflops=TRUE)
-
-# ================================================================================================== \ No newline at end of file
diff --git a/scripts/graphs/xtrmm.r b/scripts/graphs/xtrmm.r
deleted file mode 100644
index c2faaa8b..00000000
--- a/scripts/graphs/xtrmm.r
+++ /dev/null
@@ -1,127 +0,0 @@
-
-# ==================================================================================================
-# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-# project uses a tab-size of two spaces and a max-width of 100 characters per line.
-#
-# Author(s):
-# Cedric Nugteren <www.cedricnugteren.nl>
-#
-# This file implements the performance script for the Xtrmm routine
-#
-# ==================================================================================================
-
-# Includes the common functions
-args <- commandArgs(trailingOnly = FALSE)
-thisfile <- (normalizePath(sub("--file=", "", args[grep("--file=", args)])))
-source(file.path(dirname(thisfile), "common.r"))
-
-# ==================================================================================================
-
-# Settings
-routine_name <- "xtrmm"
-parameters <- c("-m","-n","-layout","-side","-triangle","-transA","-diagonal",
- "-num_steps","-step","-runs","-precision")
-precision <- 32
-
-# Sets the names of the test-cases
-test_names <- list(
- "multiples of 128",
- "multiples of 128 (+1)",
- "around m=n=512",
- "around m=n=2048",
- "layouts and side/triangle (m=n=1024)",
- "powers of 2"
-)
-
-# Defines the test-cases
-test_values <- list(
- list(c( 128, 128, 102, 141, 121, 111, 131, 16, 128, num_runs, precision)),
- list(c( 129, 129, 102, 141, 121, 111, 131, 16, 128, num_runs, precision)),
- list(c( 512, 512, 102, 141, 121, 111, 131, 16, 1, num_runs, precision)),
- list(c(2048, 2048, 102, 141, 121, 111, 131, 16, 1, num_runs, precision)),
- list(
- c(1024, 1024, 101, 141, 121, 111, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 141, 121, 111, 132, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 141, 121, 112, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 141, 121, 112, 132, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 141, 122, 111, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 141, 122, 111, 132, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 141, 122, 112, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 141, 122, 112, 132, 1, 0, num_runs, precision),
-
- c(1024, 1024, 101, 142, 121, 111, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 142, 121, 111, 132, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 142, 121, 112, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 142, 121, 112, 132, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 142, 122, 111, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 142, 122, 111, 132, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 142, 122, 112, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 101, 142, 122, 112, 132, 1, 0, num_runs, precision),
-
- c(1024, 1024, 102, 141, 121, 111, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 141, 121, 111, 132, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 141, 121, 112, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 141, 121, 112, 132, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 141, 122, 111, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 141, 122, 111, 132, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 141, 122, 112, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 141, 122, 112, 132, 1, 0, num_runs, precision),
-
- c(1024, 1024, 102, 142, 121, 111, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 142, 121, 111, 132, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 142, 121, 112, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 142, 121, 112, 132, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 142, 122, 111, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 142, 122, 111, 132, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 142, 122, 112, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 142, 122, 112, 132, 1, 0, num_runs, precision)
- ),
- list(
- c( 8, 8, 102, 141, 121, 111, 131, 1, 0, num_runs, precision),
- c( 16, 16, 102, 141, 121, 111, 131, 1, 0, num_runs, precision),
- c( 32, 32, 102, 141, 121, 111, 131, 1, 0, num_runs, precision),
- c( 64, 64, 102, 141, 121, 111, 131, 1, 0, num_runs, precision),
- c( 128, 128, 102, 141, 121, 111, 131, 1, 0, num_runs, precision),
- c( 256, 256, 102, 141, 121, 111, 131, 1, 0, num_runs, precision),
- c( 512, 512, 102, 141, 121, 111, 131, 1, 0, num_runs, precision),
- c(1024, 1024, 102, 141, 121, 111, 131, 1, 0, num_runs, precision),
- c(2048, 2048, 102, 141, 121, 111, 131, 1, 0, num_runs, precision),
- c(4096, 4096, 102, 141, 121, 111, 131, 1, 0, num_runs, precision),
- c(8192, 8192, 102, 141, 121, 111, 131, 1, 0, num_runs, precision)
- )
-)
-
-# Defines the x-labels corresponding to the test-cases
-test_xlabels <- list(
- "matrix sizes (m=n)",
- "matrix sizes (m=n)",
- "matrix sizes (m=n)",
- "matrix sizes (m=n)",
- "layout (row/col), side (l/r), triangle (up/lo), transA (n/y), diag (u/nu)",
- "matrix sizes (m=n)"
-)
-
-# Defines the x-axis of the test-cases
-test_xaxis <- list(
- c("m", ""),
- c("m", ""),
- c("m", ""),
- c("m", ""),
- list(1:32, c("row,l,up,n,u", "row,l,up,n,nu", "row,l,up,y,u", "row,l,up,y,nu",
- "row,r,up,n,u", "row,r,up,n,nu", "row,r,up,y,u", "row,r,up,y,nu",
- "row,l,lo,n,u", "row,l,lo,n,nu", "row,l,lo,y,u", "row,l,lo,y,nu",
- "row,r,lo,n,u", "row,r,lo,n,nu", "row,r,lo,y,u", "row,r,lo,y,nu",
- "col,l,up,n,u", "col,l,up,n,nu", "col,l,up,y,u", "col,l,up,y,nu",
- "col,r,up,n,u", "col,r,up,n,nu", "col,r,up,y,u", "col,r,up,y,nu",
- "col,l,lo,n,u", "col,l,lo,n,nu", "col,l,lo,y,u", "col,l,lo,y,nu",
- "col,r,lo,n,u", "col,r,lo,n,nu", "col,r,lo,y,u", "col,r,lo,y,nu")),
- c("m", "x")
-)
-
-# ==================================================================================================
-
-# Start the script
-main(routine_name=routine_name, precision=precision, test_names=test_names, test_values=test_values,
- test_xlabels=test_xlabels, test_xaxis=test_xaxis, metric_gflops=TRUE)
-
-# ================================================================================================== \ No newline at end of file