diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-05-22 16:18:08 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-05-22 16:18:08 +0200 |
commit | c8ff3f143fe94c87b23fd1bf36c1a4f91d305f01 (patch) | |
tree | 9c32e1a944c58ffcd711a7a69903fe6f9f95911d /src/routines/level2 | |
parent | 95b828da124b9c5c101d95cb51a12e9d387d1a34 (diff) |
Prepared the GER kernels and tuner for half-precision support
Diffstat (limited to 'src/routines/level2')
-rw-r--r-- | src/routines/level2/xger.cc | 8 | ||||
-rw-r--r-- | src/routines/level2/xher.cc | 14 | ||||
-rw-r--r-- | src/routines/level2/xher2.cc | 8 |
3 files changed, 21 insertions, 9 deletions
diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc index 686c7e60..47d7abe2 100644 --- a/src/routines/level2/xger.cc +++ b/src/routines/level2/xger.cc @@ -64,7 +64,11 @@ StatusCode Xger<T>::DoGer(const Layout layout, status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T)); if (ErrorIn(status)) { return status; } - // Retrieves the Xgemv kernel from the compiled binary + // Upload the scalar argument as a constant buffer to the device (needed for half-precision) + auto alpha_buffer = Buffer<T>(context_, 1); + alpha_buffer.Write(queue_, 1, &alpha); + + // Retrieves the kernel from the compiled binary try { const auto program = GetProgramFromCache(); auto kernel = Kernel(program, "Xger"); @@ -72,7 +76,7 @@ StatusCode Xger<T>::DoGer(const Layout layout, // Sets the kernel arguments kernel.SetArgument(0, static_cast<int>(a_one)); kernel.SetArgument(1, static_cast<int>(a_two)); - kernel.SetArgument(2, alpha); + kernel.SetArgument(2, alpha_buffer()); kernel.SetArgument(3, x_buffer()); kernel.SetArgument(4, static_cast<int>(x_offset)); kernel.SetArgument(5, static_cast<int>(x_inc)); diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cc index a7116213..852e3f15 100644 --- a/src/routines/level2/xher.cc +++ b/src/routines/level2/xher.cc @@ -63,9 +63,6 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle, (triangle == Triangle::kLower && layout == Layout::kRowMajor)); const auto is_rowmajor = (layout == Layout::kRowMajor); - // Creates a matching version of alpha - const auto matching_alpha = GetAlpha(alpha); - // Tests the matrix and the vectors for validity auto status = StatusCode::kSuccess; if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); } @@ -77,14 +74,21 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle, // If alpha is zero an update is not required if (alpha == U{0}) { return StatusCode::kSuccess; } - // Retrieves the Xgemv kernel from the compiled binary + // Creates a matching version of alpha + const auto matching_alpha = GetAlpha(alpha); + + // Upload the scalar argument as a constant buffer to the device (needed for half-precision) + auto alpha_buffer = Buffer<T>(context_, 1); + alpha_buffer.Write(queue_, 1, &matching_alpha); + + // Retrieves the kernel from the compiled binary try { const auto program = GetProgramFromCache(); auto kernel = Kernel(program, "Xher"); // Sets the kernel arguments kernel.SetArgument(0, static_cast<int>(n)); - kernel.SetArgument(1, matching_alpha); + kernel.SetArgument(1, alpha_buffer()); kernel.SetArgument(2, x_buffer()); kernel.SetArgument(3, static_cast<int>(x_offset)); kernel.SetArgument(4, static_cast<int>(x_inc)); diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cc index 3fd1a961..82052187 100644 --- a/src/routines/level2/xher2.cc +++ b/src/routines/level2/xher2.cc @@ -66,14 +66,18 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle, status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T)); if (ErrorIn(status)) { return status; } - // Retrieves the Xgemv kernel from the compiled binary + // Upload the scalar argument as a constant buffer to the device (needed for half-precision) + auto alpha_buffer = Buffer<T>(context_, 1); + alpha_buffer.Write(queue_, 1, &alpha); + + // Retrieves the kernel from the compiled binary try { const auto program = GetProgramFromCache(); auto kernel = Kernel(program, "Xher2"); // Sets the kernel arguments kernel.SetArgument(0, static_cast<int>(n)); - kernel.SetArgument(1, alpha); + kernel.SetArgument(1, alpha_buffer()); kernel.SetArgument(2, x_buffer()); kernel.SetArgument(3, static_cast<int>(x_offset)); kernel.SetArgument(4, static_cast<int>(x_inc)); |