summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2023-05-10 12:48:25 +0200
committerGitHub <noreply@github.com>2023-05-10 12:48:25 +0200
commitd94d086d6f92ff1f73bd2a8595a974f6802b3f24 (patch)
treec06a19431635f2477578199cf9d87ff8050b901a
parent4f24d927302078a416dea1bc29d714a95732b8e9 (diff)
TBMV/TPMV/TRSV: Use the minimum x buffer size for copying to a temp buffer (#461)
-rw-r--r--CHANGELOG4
-rw-r--r--src/routines/level2/xtbmv.cpp5
-rw-r--r--src/routines/level2/xtpmv.cpp5
-rw-r--r--src/routines/level2/xtrsv.cpp2
4 files changed, 9 insertions, 7 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 39813f94..345bab2a 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -1,6 +1,4 @@
Development version (next version)
-- Fixes a minor issue with the expected input buffer size in the TRMV routine
-- Fixes two small issues in the plotting script
- Modifications to improve performance on Qualcomm Adreno GPUs:
* Unique database entries for specific Adreno devices
* Toggle OpenCL kernel compilation options for Adreno
@@ -9,6 +7,8 @@ Development version (next version)
- Fixed a bug in XAMAX/XAMIN routines related to inadvertently including the increment and offset in the result
- Fixed a bug in XAMAX/XAMIN routines that would cause only the real part of a complex number to be taken into account
- Fixed a bug that caused tests to not properly do integer-output testing (for XAMAX/XAMIN)
+- Fixes a minor issue with the expected input buffer size in the TRMV/TBMV/TPMV/TRSV routines
+- Fixes two small issues in the plotting script
- Fixed a documentation bug in the 'ld' requirements
- Added tuned parameters for various devices (see doc/tuning.md)
diff --git a/src/routines/level2/xtbmv.cpp b/src/routines/level2/xtbmv.cpp
index 117d26e0..87053deb 100644
--- a/src/routines/level2/xtbmv.cpp
+++ b/src/routines/level2/xtbmv.cpp
@@ -36,8 +36,9 @@ void Xtbmv<T>::DoTbmv(const Layout layout, const Triangle triangle,
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
// Creates a copy of X: a temporary scratch buffer
- auto scratch_buffer = Buffer<T>(context_, n*x_inc + x_offset);
- x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer);
+ const auto x_size = (1 + (n - 1) * x_inc) + x_offset;
+ auto scratch_buffer = Buffer<T>(context_, x_size);
+ x_buffer.CopyTo(queue_, x_size, scratch_buffer);
// The data is either in the upper or lower triangle
size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
diff --git a/src/routines/level2/xtpmv.cpp b/src/routines/level2/xtpmv.cpp
index 00282378..2190a6f5 100644
--- a/src/routines/level2/xtpmv.cpp
+++ b/src/routines/level2/xtpmv.cpp
@@ -36,8 +36,9 @@ void Xtpmv<T>::DoTpmv(const Layout layout, const Triangle triangle,
const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
// Creates a copy of X: a temporary scratch buffer
- auto scratch_buffer = Buffer<T>(context_, n*x_inc + x_offset);
- x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer);
+ const auto x_size = (1 + (n - 1) * x_inc) + x_offset;
+ auto scratch_buffer = Buffer<T>(context_, x_size);
+ x_buffer.CopyTo(queue_, x_size, scratch_buffer);
// The data is either in the upper or lower triangle
size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
diff --git a/src/routines/level2/xtrsv.cpp b/src/routines/level2/xtrsv.cpp
index 2a5a5664..b50b259b 100644
--- a/src/routines/level2/xtrsv.cpp
+++ b/src/routines/level2/xtrsv.cpp
@@ -99,7 +99,7 @@ void Xtrsv<T>::DoTrsv(const Layout layout, const Triangle triangle,
// TODO: Make x with 0 offset and unit increment by creating custom copy-to and copy-from kernels
const auto x_offset = b_offset;
const auto x_inc = b_inc;
- const auto x_size = n*x_inc + x_offset;
+ const auto x_size = (1 + (n - 1) * x_inc) + x_offset;
auto x_buffer = Buffer<T>(context_, x_size);
b_buffer.CopyTo(queue_, x_size, x_buffer);