diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-07-10 20:32:01 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-07-10 20:32:01 +0200 |
commit | c87e877bf23d2fe38a7da2898e1734a3cdeaf48c (patch) | |
tree | d091dfdd826dd11e5c9e533eb46b22aeb7f6f823 /src/routines/level2 | |
parent | 57f09178d89a1cf4f38a0bb338c864ed850d5470 (diff) |
Now passing alpha/beta to the kernel as arguments as before fp16 support; in case of fp16 arguments are cast on host and in kernel
Diffstat (limited to 'src/routines/level2')
-rw-r--r-- | src/routines/level2/xgemv.cpp | 10 | ||||
-rw-r--r-- | src/routines/level2/xger.cpp | 6 | ||||
-rw-r--r-- | src/routines/level2/xher.cpp | 6 | ||||
-rw-r--r-- | src/routines/level2/xher2.cpp | 6 |
4 files changed, 5 insertions, 23 deletions
diff --git a/src/routines/level2/xgemv.cpp b/src/routines/level2/xgemv.cpp index 21fb397c..2842ef07 100644 --- a/src/routines/level2/xgemv.cpp +++ b/src/routines/level2/xgemv.cpp @@ -126,12 +126,6 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose, local_size = db_["WGS3"]; } - // Upload the scalar arguments as constant buffers to the device (needed for half-precision) - auto alpha_buffer = Buffer<T>(context_, 1); - auto beta_buffer = Buffer<T>(context_, 1); - alpha_buffer.Write(queue_, 1, &alpha); - beta_buffer.Write(queue_, 1, &beta); - // Retrieves the Xgemv kernel from the compiled binary try { const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); @@ -140,8 +134,8 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose, // Sets the kernel arguments kernel.SetArgument(0, static_cast<int>(m_real)); kernel.SetArgument(1, static_cast<int>(n_real)); - kernel.SetArgument(2, alpha_buffer()); - kernel.SetArgument(3, beta_buffer()); + kernel.SetArgument(2, GetRealArg(alpha)); + kernel.SetArgument(3, GetRealArg(beta)); kernel.SetArgument(4, static_cast<int>(a_rotated)); kernel.SetArgument(5, a_buffer()); kernel.SetArgument(6, static_cast<int>(a_offset)); diff --git a/src/routines/level2/xger.cpp b/src/routines/level2/xger.cpp index 353047d2..29cffe0c 100644 --- a/src/routines/level2/xger.cpp +++ b/src/routines/level2/xger.cpp @@ -56,10 +56,6 @@ StatusCode Xger<T>::DoGer(const Layout layout, status = TestVectorY(n, y_buffer, y_offset, y_inc); if (ErrorIn(status)) { return status; } - // Upload the scalar argument as a constant buffer to the device (needed for half-precision) - auto alpha_buffer = Buffer<T>(context_, 1); - alpha_buffer.Write(queue_, 1, &alpha); - // Retrieves the kernel from the compiled binary try { const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); @@ -68,7 +64,7 @@ StatusCode Xger<T>::DoGer(const Layout layout, // Sets the kernel arguments kernel.SetArgument(0, static_cast<int>(a_one)); kernel.SetArgument(1, static_cast<int>(a_two)); - kernel.SetArgument(2, alpha_buffer()); + kernel.SetArgument(2, GetRealArg(alpha)); kernel.SetArgument(3, x_buffer()); kernel.SetArgument(4, static_cast<int>(x_offset)); kernel.SetArgument(5, static_cast<int>(x_inc)); diff --git a/src/routines/level2/xher.cpp b/src/routines/level2/xher.cpp index ed8ba9e9..6dd95938 100644 --- a/src/routines/level2/xher.cpp +++ b/src/routines/level2/xher.cpp @@ -70,10 +70,6 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle, // Creates a matching version of alpha const auto matching_alpha = GetAlpha(alpha); - // Upload the scalar argument as a constant buffer to the device (needed for half-precision) - auto alpha_buffer = Buffer<T>(context_, 1); - alpha_buffer.Write(queue_, 1, &matching_alpha); - // Retrieves the kernel from the compiled binary try { const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); @@ -81,7 +77,7 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle, // Sets the kernel arguments kernel.SetArgument(0, static_cast<int>(n)); - kernel.SetArgument(1, alpha_buffer()); + kernel.SetArgument(1, GetRealArg(matching_alpha)); kernel.SetArgument(2, x_buffer()); kernel.SetArgument(3, static_cast<int>(x_offset)); kernel.SetArgument(4, static_cast<int>(x_inc)); diff --git a/src/routines/level2/xher2.cpp b/src/routines/level2/xher2.cpp index 50572cea..3d57a9b9 100644 --- a/src/routines/level2/xher2.cpp +++ b/src/routines/level2/xher2.cpp @@ -58,10 +58,6 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle, status = TestVectorY(n, y_buffer, y_offset, y_inc); if (ErrorIn(status)) { return status; } - // Upload the scalar argument as a constant buffer to the device (needed for half-precision) - auto alpha_buffer = Buffer<T>(context_, 1); - alpha_buffer.Write(queue_, 1, &alpha); - // Retrieves the kernel from the compiled binary try { const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_); @@ -69,7 +65,7 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle, // Sets the kernel arguments kernel.SetArgument(0, static_cast<int>(n)); - kernel.SetArgument(1, alpha_buffer()); + kernel.SetArgument(1, GetRealArg(alpha)); kernel.SetArgument(2, x_buffer()); kernel.SetArgument(3, static_cast<int>(x_offset)); kernel.SetArgument(4, static_cast<int>(x_inc)); |