summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG1
-rw-r--r--src/routines/level2/xtbmv.cpp4
-rw-r--r--src/routines/level2/xtpmv.cpp4
-rw-r--r--src/routines/level2/xtrmv.cpp4
-rw-r--r--src/routines/level3/xsyr2k.cpp2
-rw-r--r--src/routines/level3/xtrmm.cpp4
6 files changed, 10 insertions, 9 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 38bbaa07..f7cae9b8 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -4,6 +4,7 @@ Development version (next release)
- Fixed a bug having to re-create the binary even if it was in the cache
- Fixed a bug when using offsets in the direct version of the GEMM kernels
- Fixed a missing cl_khr_fp64 when running double-precision on Intel CPUs
+- Fixed bugs in the half-precision routines HTBMV/HTPMV/HTRMV/HSYR2K/HTRMM
- Tests now also exit with an error code when OpenCL errors or compilation errors occur
- Added the OverrideParameters function to the API to be able to supply custom tuning parmeters
- Various minor fixes and enhancements
diff --git a/src/routines/level2/xtbmv.cpp b/src/routines/level2/xtbmv.cpp
index f4a58ed2..117d26e0 100644
--- a/src/routines/level2/xtbmv.cpp
+++ b/src/routines/level2/xtbmv.cpp
@@ -52,9 +52,9 @@ void Xtbmv<T>::DoTbmv(const Layout layout, const Triangle triangle,
auto fast_kernels = false;
try {
MatVec(layout, a_transpose,
- n, n, static_cast<T>(1),
+ n, n, ConstantOne<T>(),
a_buffer, a_offset, a_ld,
- scratch_buffer, x_offset, x_inc, static_cast<T>(0),
+ scratch_buffer, x_offset, x_inc, ConstantZero<T>(),
x_buffer, x_offset, x_inc,
fast_kernels, fast_kernels,
parameter, false, k, 0);
diff --git a/src/routines/level2/xtpmv.cpp b/src/routines/level2/xtpmv.cpp
index c0d26699..00282378 100644
--- a/src/routines/level2/xtpmv.cpp
+++ b/src/routines/level2/xtpmv.cpp
@@ -52,9 +52,9 @@ void Xtpmv<T>::DoTpmv(const Layout layout, const Triangle triangle,
auto fast_kernels = false;
try {
MatVec(layout, a_transpose,
- n, n, static_cast<T>(1),
+ n, n, ConstantOne<T>(),
ap_buffer, ap_offset, n,
- scratch_buffer, x_offset, x_inc, static_cast<T>(0),
+ scratch_buffer, x_offset, x_inc, ConstantZero<T>(),
x_buffer, x_offset, x_inc,
fast_kernels, fast_kernels,
parameter, true, 0, 0);
diff --git a/src/routines/level2/xtrmv.cpp b/src/routines/level2/xtrmv.cpp
index 5fff9b31..80e29009 100644
--- a/src/routines/level2/xtrmv.cpp
+++ b/src/routines/level2/xtrmv.cpp
@@ -52,9 +52,9 @@ void Xtrmv<T>::DoTrmv(const Layout layout, const Triangle triangle,
auto fast_kernels = false;
try {
MatVec(layout, a_transpose,
- n, n, static_cast<T>(1),
+ n, n, ConstantOne<T>(),
a_buffer, a_offset, a_ld,
- scratch_buffer, x_offset, x_inc, static_cast<T>(0),
+ scratch_buffer, x_offset, x_inc, ConstantZero<T>(),
x_buffer, x_offset, x_inc,
fast_kernels, fast_kernels,
parameter, false, 0, 0);
diff --git a/src/routines/level3/xsyr2k.cpp b/src/routines/level3/xsyr2k.cpp
index fdef43dc..7900eb74 100644
--- a/src/routines/level3/xsyr2k.cpp
+++ b/src/routines/level3/xsyr2k.cpp
@@ -149,7 +149,7 @@ void Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, const Tran
eventWaitList.push_back(eventKernel1);
// Swaps the arguments for matrices A and B, and sets 'beta' to 1
- auto one = static_cast<T>(1);
+ auto one = ConstantOne<T>();
kernel.SetArgument(3, GetRealArg(one));
kernel.SetArgument(4, b_temp());
kernel.SetArgument(5, a_temp());
diff --git a/src/routines/level3/xtrmm.cpp b/src/routines/level3/xtrmm.cpp
index 02c295ac..26ef2a5e 100644
--- a/src/routines/level3/xtrmm.cpp
+++ b/src/routines/level3/xtrmm.cpp
@@ -101,7 +101,7 @@ void Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle trian
alpha,
temp_triangular, 0, k,
b_buffer_copy, b_offset, b_ld,
- static_cast<T>(0.0),
+ ConstantZero<T>(),
b_buffer, b_offset, b_ld);
}
@@ -113,7 +113,7 @@ void Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle trian
alpha,
b_buffer_copy, b_offset, b_ld,
temp_triangular, 0, k,
- static_cast<T>(0.0),
+ ConstantZero<T>(),
b_buffer, b_offset, b_ld);
} catch (BLASError &e) {
// A and B are now reversed, so also reverse the error codes returned from the Xgemm routine