summaryrefslogtreecommitdiff
path: root/src/routines
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-05-14 18:06:00 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-05-14 18:06:00 +0200
commit5e1b2e021f4b746a77619e0ad5ab35d9a0d4df54 (patch)
tree4c41089bda681a26b677d946eef40b916e80afac /src/routines
parent120c31a30f933eea12d4dfffd4951fa22102ef5f (diff)
Set kernel arguments for AXPY as constant memory buffers, making it possible to transfer half-precision values as well
Diffstat (limited to 'src/routines')
-rw-r--r--src/routines/level1/xaxpy.cc8
1 files changed, 6 insertions, 2 deletions
diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc
index b7956bf2..66aa2336 100644
--- a/src/routines/level1/xaxpy.cc
+++ b/src/routines/level1/xaxpy.cc
@@ -68,16 +68,20 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha,
const auto program = GetProgramFromCache();
auto kernel = Kernel(program, kernel_name);
+ // Upload the scalar argument as a constant buffer to the device (needed for half-precision)
+ auto alpha_buffer = Buffer<T>(context_, 1);
+ alpha_buffer.Write(queue_, 1, &alpha);
+
// Sets the kernel arguments
if (use_fast_kernel) {
kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, static_cast<typename RealArg<T>::Type>(alpha));
+ kernel.SetArgument(1, alpha_buffer());
kernel.SetArgument(2, x_buffer());
kernel.SetArgument(3, y_buffer());
}
else {
kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, static_cast<typename RealArg<T>::Type>(alpha));
+ kernel.SetArgument(1, alpha_buffer());
kernel.SetArgument(2, x_buffer());
kernel.SetArgument(3, static_cast<int>(x_offset));
kernel.SetArgument(4, static_cast<int>(x_inc));