summaryrefslogtreecommitdiff
path: root/src/routines/level2
diff options
context:
space:
mode:
Diffstat (limited to 'src/routines/level2')
-rw-r--r--src/routines/level2/xger.cc8
-rw-r--r--src/routines/level2/xher.cc14
-rw-r--r--src/routines/level2/xher2.cc8
3 files changed, 21 insertions, 9 deletions
diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc
index 686c7e60..47d7abe2 100644
--- a/src/routines/level2/xger.cc
+++ b/src/routines/level2/xger.cc
@@ -64,7 +64,11 @@ StatusCode Xger<T>::DoGer(const Layout layout,
status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T));
if (ErrorIn(status)) { return status; }
- // Retrieves the Xgemv kernel from the compiled binary
+ // Upload the scalar argument as a constant buffer to the device (needed for half-precision)
+ auto alpha_buffer = Buffer<T>(context_, 1);
+ alpha_buffer.Write(queue_, 1, &alpha);
+
+ // Retrieves the kernel from the compiled binary
try {
const auto program = GetProgramFromCache();
auto kernel = Kernel(program, "Xger");
@@ -72,7 +76,7 @@ StatusCode Xger<T>::DoGer(const Layout layout,
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(a_one));
kernel.SetArgument(1, static_cast<int>(a_two));
- kernel.SetArgument(2, alpha);
+ kernel.SetArgument(2, alpha_buffer());
kernel.SetArgument(3, x_buffer());
kernel.SetArgument(4, static_cast<int>(x_offset));
kernel.SetArgument(5, static_cast<int>(x_inc));
diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cc
index a7116213..852e3f15 100644
--- a/src/routines/level2/xher.cc
+++ b/src/routines/level2/xher.cc
@@ -63,9 +63,6 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle,
(triangle == Triangle::kLower && layout == Layout::kRowMajor));
const auto is_rowmajor = (layout == Layout::kRowMajor);
- // Creates a matching version of alpha
- const auto matching_alpha = GetAlpha(alpha);
-
// Tests the matrix and the vectors for validity
auto status = StatusCode::kSuccess;
if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); }
@@ -77,14 +74,21 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle,
// If alpha is zero an update is not required
if (alpha == U{0}) { return StatusCode::kSuccess; }
- // Retrieves the Xgemv kernel from the compiled binary
+ // Creates a matching version of alpha
+ const auto matching_alpha = GetAlpha(alpha);
+
+ // Upload the scalar argument as a constant buffer to the device (needed for half-precision)
+ auto alpha_buffer = Buffer<T>(context_, 1);
+ alpha_buffer.Write(queue_, 1, &matching_alpha);
+
+ // Retrieves the kernel from the compiled binary
try {
const auto program = GetProgramFromCache();
auto kernel = Kernel(program, "Xher");
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, matching_alpha);
+ kernel.SetArgument(1, alpha_buffer());
kernel.SetArgument(2, x_buffer());
kernel.SetArgument(3, static_cast<int>(x_offset));
kernel.SetArgument(4, static_cast<int>(x_inc));
diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cc
index 3fd1a961..82052187 100644
--- a/src/routines/level2/xher2.cc
+++ b/src/routines/level2/xher2.cc
@@ -66,14 +66,18 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle,
status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T));
if (ErrorIn(status)) { return status; }
- // Retrieves the Xgemv kernel from the compiled binary
+ // Upload the scalar argument as a constant buffer to the device (needed for half-precision)
+ auto alpha_buffer = Buffer<T>(context_, 1);
+ alpha_buffer.Write(queue_, 1, &alpha);
+
+ // Retrieves the kernel from the compiled binary
try {
const auto program = GetProgramFromCache();
auto kernel = Kernel(program, "Xher2");
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, alpha);
+ kernel.SetArgument(1, alpha_buffer());
kernel.SetArgument(2, x_buffer());
kernel.SetArgument(3, static_cast<int>(x_offset));
kernel.SetArgument(4, static_cast<int>(x_inc));