summaryrefslogtreecommitdiff
path: root/src/routines/level2
diff options
context:
space:
mode:
Diffstat (limited to 'src/routines/level2')
-rw-r--r--src/routines/level2/xgemv.cpp10
-rw-r--r--src/routines/level2/xger.cpp6
-rw-r--r--src/routines/level2/xher.cpp6
-rw-r--r--src/routines/level2/xher2.cpp6
4 files changed, 5 insertions, 23 deletions
diff --git a/src/routines/level2/xgemv.cpp b/src/routines/level2/xgemv.cpp
index 21fb397c..2842ef07 100644
--- a/src/routines/level2/xgemv.cpp
+++ b/src/routines/level2/xgemv.cpp
@@ -126,12 +126,6 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
local_size = db_["WGS3"];
}
- // Upload the scalar arguments as constant buffers to the device (needed for half-precision)
- auto alpha_buffer = Buffer<T>(context_, 1);
- auto beta_buffer = Buffer<T>(context_, 1);
- alpha_buffer.Write(queue_, 1, &alpha);
- beta_buffer.Write(queue_, 1, &beta);
-
// Retrieves the Xgemv kernel from the compiled binary
try {
const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
@@ -140,8 +134,8 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(m_real));
kernel.SetArgument(1, static_cast<int>(n_real));
- kernel.SetArgument(2, alpha_buffer());
- kernel.SetArgument(3, beta_buffer());
+ kernel.SetArgument(2, GetRealArg(alpha));
+ kernel.SetArgument(3, GetRealArg(beta));
kernel.SetArgument(4, static_cast<int>(a_rotated));
kernel.SetArgument(5, a_buffer());
kernel.SetArgument(6, static_cast<int>(a_offset));
diff --git a/src/routines/level2/xger.cpp b/src/routines/level2/xger.cpp
index 353047d2..29cffe0c 100644
--- a/src/routines/level2/xger.cpp
+++ b/src/routines/level2/xger.cpp
@@ -56,10 +56,6 @@ StatusCode Xger<T>::DoGer(const Layout layout,
status = TestVectorY(n, y_buffer, y_offset, y_inc);
if (ErrorIn(status)) { return status; }
- // Upload the scalar argument as a constant buffer to the device (needed for half-precision)
- auto alpha_buffer = Buffer<T>(context_, 1);
- alpha_buffer.Write(queue_, 1, &alpha);
-
// Retrieves the kernel from the compiled binary
try {
const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
@@ -68,7 +64,7 @@ StatusCode Xger<T>::DoGer(const Layout layout,
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(a_one));
kernel.SetArgument(1, static_cast<int>(a_two));
- kernel.SetArgument(2, alpha_buffer());
+ kernel.SetArgument(2, GetRealArg(alpha));
kernel.SetArgument(3, x_buffer());
kernel.SetArgument(4, static_cast<int>(x_offset));
kernel.SetArgument(5, static_cast<int>(x_inc));
diff --git a/src/routines/level2/xher.cpp b/src/routines/level2/xher.cpp
index ed8ba9e9..6dd95938 100644
--- a/src/routines/level2/xher.cpp
+++ b/src/routines/level2/xher.cpp
@@ -70,10 +70,6 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle,
// Creates a matching version of alpha
const auto matching_alpha = GetAlpha(alpha);
- // Upload the scalar argument as a constant buffer to the device (needed for half-precision)
- auto alpha_buffer = Buffer<T>(context_, 1);
- alpha_buffer.Write(queue_, 1, &matching_alpha);
-
// Retrieves the kernel from the compiled binary
try {
const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
@@ -81,7 +77,7 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle,
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, alpha_buffer());
+ kernel.SetArgument(1, GetRealArg(matching_alpha));
kernel.SetArgument(2, x_buffer());
kernel.SetArgument(3, static_cast<int>(x_offset));
kernel.SetArgument(4, static_cast<int>(x_inc));
diff --git a/src/routines/level2/xher2.cpp b/src/routines/level2/xher2.cpp
index 50572cea..3d57a9b9 100644
--- a/src/routines/level2/xher2.cpp
+++ b/src/routines/level2/xher2.cpp
@@ -58,10 +58,6 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle,
status = TestVectorY(n, y_buffer, y_offset, y_inc);
if (ErrorIn(status)) { return status; }
- // Upload the scalar argument as a constant buffer to the device (needed for half-precision)
- auto alpha_buffer = Buffer<T>(context_, 1);
- alpha_buffer.Write(queue_, 1, &alpha);
-
// Retrieves the kernel from the compiled binary
try {
const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
@@ -69,7 +65,7 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle,
// Sets the kernel arguments
kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, alpha_buffer());
+ kernel.SetArgument(1, GetRealArg(alpha));
kernel.SetArgument(2, x_buffer());
kernel.SetArgument(3, static_cast<int>(x_offset));
kernel.SetArgument(4, static_cast<int>(x_inc));