summaryrefslogtreecommitdiff
path: root/src/kernels
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-07-23 16:58:11 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-07-23 16:58:11 +0200
commit40a72259eba491631d8875aae465c5a93d7fed02 (patch)
treea192d68c4c7331334721da58401a88a21fa9a88b /src/kernels
parent7a4f9637639ce83191bc2d6e8485f9a9dfd949af (diff)
Fixe a bug in the new XgemvFastRot kernel related to local memory size
Diffstat (limited to 'src/kernels')
-rw-r--r--src/kernels/level2/xgemv_fast.opencl10
1 files changed, 6 insertions, 4 deletions
diff --git a/src/kernels/level2/xgemv_fast.opencl b/src/kernels/level2/xgemv_fast.opencl
index 359c3770..210c42c1 100644
--- a/src/kernels/level2/xgemv_fast.opencl
+++ b/src/kernels/level2/xgemv_fast.opencl
@@ -97,7 +97,7 @@ __kernel void XgemvFast(const int m, const int n,
const __global real* restrict xgm, const int x_offset, const int x_inc,
__global real* ygm, const int y_offset, const int y_inc,
const int do_conjugate, const int parameter,
- const int kl, const int ku) {
+ const int kl_unused, const int ku_unused) {
const real alpha = GetRealArg(arg_alpha);
const real beta = GetRealArg(arg_beta);
@@ -199,7 +199,7 @@ __kernel void XgemvFastRot(const int m, const int n,
const __global real* restrict xgm, const int x_offset, const int x_inc,
__global real* ygm, const int y_offset, const int y_inc,
const int do_conjugate, const int parameter,
- const int kl, const int ku) {
+ const int kl_unused, const int ku_unused) {
const real alpha = GetRealArg(arg_alpha);
const real beta = GetRealArg(arg_beta);
@@ -216,11 +216,13 @@ __kernel void XgemvFastRot(const int m, const int n,
real acc;
SetToZero(acc);
- // Loops over work-group sized portions of the work
+ // Loops over tile-sized portions of the work
for (int kwg=0; kwg<n; kwg+=WPT3) {
// Loads the vector X into local memory
- xlm[lid] = xgm[(kwg + lid) * x_inc + x_offset];
+ if (lid < WPT3) {
+ xlm[lid] = xgm[(kwg + lid) * x_inc + x_offset];
+ }
// Loads the matrix A into local memory
#pragma unroll