diff options
-rw-r--r-- | CHANGELOG | 1 | ||||
-rw-r--r-- | src/routines/level2/xtbmv.cpp | 4 | ||||
-rw-r--r-- | src/routines/level2/xtpmv.cpp | 4 | ||||
-rw-r--r-- | src/routines/level2/xtrmv.cpp | 4 | ||||
-rw-r--r-- | src/routines/level3/xsyr2k.cpp | 2 | ||||
-rw-r--r-- | src/routines/level3/xtrmm.cpp | 4 |
6 files changed, 10 insertions, 9 deletions
@@ -4,6 +4,7 @@ Development version (next release) - Fixed a bug having to re-create the binary even if it was in the cache - Fixed a bug when using offsets in the direct version of the GEMM kernels - Fixed a missing cl_khr_fp64 when running double-precision on Intel CPUs +- Fixed bugs in the half-precision routines HTBMV/HTPMV/HTRMV/HSYR2K/HTRMM - Tests now also exit with an error code when OpenCL errors or compilation errors occur - Added the OverrideParameters function to the API to be able to supply custom tuning parmeters - Various minor fixes and enhancements diff --git a/src/routines/level2/xtbmv.cpp b/src/routines/level2/xtbmv.cpp index f4a58ed2..117d26e0 100644 --- a/src/routines/level2/xtbmv.cpp +++ b/src/routines/level2/xtbmv.cpp @@ -52,9 +52,9 @@ void Xtbmv<T>::DoTbmv(const Layout layout, const Triangle triangle, auto fast_kernels = false; try { MatVec(layout, a_transpose, - n, n, static_cast<T>(1), + n, n, ConstantOne<T>(), a_buffer, a_offset, a_ld, - scratch_buffer, x_offset, x_inc, static_cast<T>(0), + scratch_buffer, x_offset, x_inc, ConstantZero<T>(), x_buffer, x_offset, x_inc, fast_kernels, fast_kernels, parameter, false, k, 0); diff --git a/src/routines/level2/xtpmv.cpp b/src/routines/level2/xtpmv.cpp index c0d26699..00282378 100644 --- a/src/routines/level2/xtpmv.cpp +++ b/src/routines/level2/xtpmv.cpp @@ -52,9 +52,9 @@ void Xtpmv<T>::DoTpmv(const Layout layout, const Triangle triangle, auto fast_kernels = false; try { MatVec(layout, a_transpose, - n, n, static_cast<T>(1), + n, n, ConstantOne<T>(), ap_buffer, ap_offset, n, - scratch_buffer, x_offset, x_inc, static_cast<T>(0), + scratch_buffer, x_offset, x_inc, ConstantZero<T>(), x_buffer, x_offset, x_inc, fast_kernels, fast_kernels, parameter, true, 0, 0); diff --git a/src/routines/level2/xtrmv.cpp b/src/routines/level2/xtrmv.cpp index 5fff9b31..80e29009 100644 --- a/src/routines/level2/xtrmv.cpp +++ b/src/routines/level2/xtrmv.cpp @@ -52,9 +52,9 @@ void Xtrmv<T>::DoTrmv(const Layout layout, const Triangle triangle, auto fast_kernels = false; try { MatVec(layout, a_transpose, - n, n, static_cast<T>(1), + n, n, ConstantOne<T>(), a_buffer, a_offset, a_ld, - scratch_buffer, x_offset, x_inc, static_cast<T>(0), + scratch_buffer, x_offset, x_inc, ConstantZero<T>(), x_buffer, x_offset, x_inc, fast_kernels, fast_kernels, parameter, false, 0, 0); diff --git a/src/routines/level3/xsyr2k.cpp b/src/routines/level3/xsyr2k.cpp index fdef43dc..7900eb74 100644 --- a/src/routines/level3/xsyr2k.cpp +++ b/src/routines/level3/xsyr2k.cpp @@ -149,7 +149,7 @@ void Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, const Tran eventWaitList.push_back(eventKernel1); // Swaps the arguments for matrices A and B, and sets 'beta' to 1 - auto one = static_cast<T>(1); + auto one = ConstantOne<T>(); kernel.SetArgument(3, GetRealArg(one)); kernel.SetArgument(4, b_temp()); kernel.SetArgument(5, a_temp()); diff --git a/src/routines/level3/xtrmm.cpp b/src/routines/level3/xtrmm.cpp index 02c295ac..26ef2a5e 100644 --- a/src/routines/level3/xtrmm.cpp +++ b/src/routines/level3/xtrmm.cpp @@ -101,7 +101,7 @@ void Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle trian alpha, temp_triangular, 0, k, b_buffer_copy, b_offset, b_ld, - static_cast<T>(0.0), + ConstantZero<T>(), b_buffer, b_offset, b_ld); } @@ -113,7 +113,7 @@ void Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle trian alpha, b_buffer_copy, b_offset, b_ld, temp_triangular, 0, k, - static_cast<T>(0.0), + ConstantZero<T>(), b_buffer, b_offset, b_ld); } catch (BLASError &e) { // A and B are now reversed, so also reverse the error codes returned from the Xgemm routine |