diff options
author | CNugteren <web@cedricnugteren.nl> | 2015-06-24 07:50:18 +0200 |
---|---|---|
committer | CNugteren <web@cedricnugteren.nl> | 2015-06-24 07:50:18 +0200 |
commit | 60a88aac8672d360eb05ba25b1c4ffbf53a78dff (patch) | |
tree | 25b6c8d59b293b3c7e0d7fb48bb8b6ca64a1f2d9 /test/performance | |
parent | a17297937d757d9747adde600f832d1e0c2753c1 (diff) |
Added the SYRK routine, tester, and client
Diffstat (limited to 'test/performance')
-rw-r--r-- | test/performance/graphs/xsyrk.r | 94 | ||||
-rw-r--r-- | test/performance/routines/xsyrk.cc | 113 |
2 files changed, 207 insertions, 0 deletions
diff --git a/test/performance/graphs/xsyrk.r b/test/performance/graphs/xsyrk.r new file mode 100644 index 00000000..fe8598e9 --- /dev/null +++ b/test/performance/graphs/xsyrk.r @@ -0,0 +1,94 @@ + +# ================================================================================================== +# This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +# project uses a tab-size of two spaces and a max-width of 100 characters per line. +# +# Author(s): +# Cedric Nugteren <www.cedricnugteren.nl> +# +# This file implements the performance script for the Xsyrk routine +# +# ================================================================================================== + +# Includes the common functions +args <- commandArgs(trailingOnly = FALSE) +thisfile <- (normalizePath(sub("--file=", "", args[grep("--file=", args)]))) +source(file.path(dirname(thisfile), "common.r")) + +# ================================================================================================== + +# Settings +routine_name <- "xsyrk" +parameters <- c("-n","-k","-layout","-triangle","-transA", + "-num_steps","-step","-runs","-precision") +precision <- 32 + +# Sets the names of the test-cases +test_names <- list( + "multiples of 128", + "multiples of 128 (+1)", + "around n=k=512", + "around n=k=2048", + "layouts and transposing (n=k=1024)", + "powers of 2" +) + +# Defines the test-cases +test_values <- list( + list(c(128, 128, 0, 0, 0, 16, 128, num_runs, precision)), + list(c(129, 129, 0, 0, 0, 16, 128, num_runs, precision)), + list(c(512, 512, 0, 0, 0, 16, 1, num_runs, precision)), + list(c(2048, 2048, 0, 0, 0, 16, 1, num_runs, precision)), + list( + c(1024, 1024, 0, 0, 0, 1, 0, num_runs, precision), + c(1024, 1024, 0, 0, 1, 1, 0, num_runs, precision), + c(1024, 1024, 0, 1, 0, 1, 0, num_runs, precision), + c(1024, 1024, 0, 1, 1, 1, 0, num_runs, precision), + c(1024, 1024, 1, 0, 0, 1, 0, num_runs, precision), + c(1024, 1024, 1, 0, 1, 1, 0, num_runs, precision), + c(1024, 1024, 1, 1, 0, 1, 0, num_runs, precision), + c(1024, 1024, 1, 1, 1, 1, 0, num_runs, precision) + ), + list( + c(8, 8, 0, 0, 0, 1, 0, num_runs, precision), + c(16, 16, 0, 0, 0, 1, 0, num_runs, precision), + c(32, 32, 0, 0, 0, 1, 0, num_runs, precision), + c(64, 64, 0, 0, 0, 1, 0, num_runs, precision), + c(128, 128, 0, 0, 0, 1, 0, num_runs, precision), + c(256, 256, 0, 0, 0, 1, 0, num_runs, precision), + c(512, 512, 0, 0, 0, 1, 0, num_runs, precision), + c(1024, 1024, 0, 0, 0, 1, 0, num_runs, precision), + c(2048, 2048, 0, 0, 0, 1, 0, num_runs, precision), + c(4096, 4096, 0, 0, 0, 1, 0, num_runs, precision), + c(8192, 8192, 0, 0, 0, 1, 0, num_runs, precision) + ) +) + +# Defines the x-labels corresponding to the test-cases +test_xlabels <- list( + "matrix sizes (n=k)", + "matrix sizes (n=k)", + "matrix sizes (n=k)", + "matrix sizes (n=k)", + "layout (row/col), triangle (u/l), transA (n/y)", + "matrix sizes (n=k)" +) + +# Defines the x-axis of the test-cases +test_xaxis <- list( + c("n", ""), + c("n", ""), + c("n", ""), + c("n", ""), + list(1:8, c("row,u,n", "row,u,y", "row,l,n", "row,l,y", + "col,u,n", "col,u,y", "col,l,n", "col,l,y")), + c("n", "x") +) + +# ================================================================================================== + +# Start the script +main(routine_name=routine_name, precision=precision, test_names=test_names, test_values=test_values, + test_xlabels=test_xlabels, test_xaxis=test_xaxis, metric_gflops=TRUE) + +# ==================================================================================================
\ No newline at end of file diff --git a/test/performance/routines/xsyrk.cc b/test/performance/routines/xsyrk.cc new file mode 100644 index 00000000..f36d665a --- /dev/null +++ b/test/performance/routines/xsyrk.cc @@ -0,0 +1,113 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the Xsyrk command-line interface tester. +// +// ================================================================================================= + +#include <string> +#include <vector> +#include <exception> + +#include "wrapper_clblas.h" +#include "performance/client.h" + +namespace clblast { +// ================================================================================================= + +// The client, used for performance testing. It contains the function calls to CLBlast and to other +// libraries to compare against. +template <typename T> +void PerformanceXsyrk(const Arguments<T> &args, + const Buffer &a_mat, const Buffer &c_mat, + CommandQueue &queue) { + + // Creates the CLBlast lambda + auto clblast_lambda = [&args, &a_mat, &c_mat, &queue]() { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Syrk(args.layout, args.triangle, args.a_transpose, + args.n, args.k, + args.alpha, + a_mat(), args.a_offset, args.a_ld, + args.beta, + c_mat(), args.c_offset, args.c_ld, + &queue_plain, &event); + clWaitForEvents(1, &event); + if (status != StatusCode::kSuccess) { + throw std::runtime_error("CLBlast error: "+ToString(static_cast<int>(status))); + } + }; + + // Creates the clBLAS lambda (for comparison) + auto clblas_lambda = [&args, &a_mat, &c_mat, &queue]() { + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = clblasXsyrk(static_cast<clblasOrder>(args.layout), + static_cast<clblasUplo>(args.triangle), + static_cast<clblasTranspose>(args.a_transpose), + args.n, args.k, + args.alpha, + a_mat(), args.a_offset, args.a_ld, + args.beta, + c_mat(), args.c_offset, args.c_ld, + 1, &queue_plain, 0, nullptr, &event); + clWaitForEvents(1, &event); + if (status != CL_SUCCESS) { + throw std::runtime_error("clBLAS error: "+ToString(static_cast<int>(status))); + } + }; + + // Runs the routines and collect the timings + auto ms_clblast = TimedExecution(args.num_runs, clblast_lambda); + auto ms_clblas = TimedExecution(args.num_runs, clblas_lambda); + + // Prints the performance of both libraries + const auto flops = args.n * args.n * args.k; + const auto bytes = (args.n*args.k + args.n*args.n) * sizeof(T); + const auto output_ints = std::vector<size_t>{args.n, args.k, + static_cast<size_t>(args.layout), + static_cast<size_t>(args.triangle), + static_cast<size_t>(args.a_transpose), + args.a_ld, args.c_ld, + args.a_offset, args.c_offset}; + const auto output_strings = std::vector<std::string>{ToString(args.alpha), + ToString(args.beta)}; + PrintTableRow(output_ints, output_strings, args.no_abbrv, + ms_clblast, ms_clblas, flops, bytes); +} + +// ================================================================================================= + +// Main function which calls the common client code with the routine-specific function as argument. +void ClientXsyrk(int argc, char *argv[]) { + const auto o = std::vector<std::string>{kArgN, kArgK, + kArgLayout, kArgTriangle, kArgATransp, + kArgALeadDim, kArgCLeadDim, + kArgAOffset, kArgCOffset, + kArgAlpha, kArgBeta}; + switch(GetPrecision(argc, argv)) { + case Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case Precision::kSingle: ClientAC<float>(argc, argv, PerformanceXsyrk<float>, o); break; + case Precision::kDouble: ClientAC<double>(argc, argv, PerformanceXsyrk<double>, o); break; + case Precision::kComplexSingle: ClientAC<float2>(argc, argv, PerformanceXsyrk<float2>, o); break; + case Precision::kComplexDouble: ClientAC<double2>(argc, argv, PerformanceXsyrk<double2>, o); break; + } +} + +// ================================================================================================= +} // namespace clblast + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::ClientXsyrk(argc, argv); + return 0; +} + +// ================================================================================================= |