summaryrefslogtreecommitdiff
path: root/src/routines/level2/xger.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/routines/level2/xger.cc')
-rw-r--r--src/routines/level2/xger.cc8
1 files changed, 6 insertions, 2 deletions
diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc
index 686c7e60..47d7abe2 100644
--- a/src/routines/level2/xger.cc
+++ b/src/routines/level2/xger.cc
@@ -64,7 +64,11 @@ StatusCode Xger<T>::DoGer(const Layout layout,
status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T));
if (ErrorIn(status)) { return status; }
- // Retrieves the Xgemv kernel from the compiled binary
+ // Upload the scalar argument as a constant buffer to the device (needed for half-precision)
+ auto alpha_buffer = Buffer<T>(context_, 1);
+ alpha_buffer.Write(queue_, 1, &alpha);
+
+ // Retrieves the kernel from the compiled binary
try {
const auto program = GetProgramFromCache();
auto kernel = Kernel(program, "Xger");
@@ -72,7 +76,7 @@ StatusCode Xger<T>::DoGer(const Layout layout,
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(a_one));
kernel.SetArgument(1, static_cast<int>(a_two));
- kernel.SetArgument(2, alpha);
+ kernel.SetArgument(2, alpha_buffer());
kernel.SetArgument(3, x_buffer());
kernel.SetArgument(4, static_cast<int>(x_offset));
kernel.SetArgument(5, static_cast<int>(x_inc));