summaryrefslogtreecommitdiff
path: root/src/kernels/level2/xgemv_fast.opencl
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-06-28 22:32:25 +0200
committerGitHub <noreply@github.com>2016-06-28 22:32:25 +0200
commit7c13bacf129291e3e295ecb6e833788477085fa0 (patch)
treed114eeca418444d0b1c70cc9cce983de041235c9 /src/kernels/level2/xgemv_fast.opencl
parent181eb20bbf15cf11baaf6112b6965050c49dd543 (diff)
parent577f0ee1179014ece853af39d6f0ff0c87316eb3 (diff)
Merge pull request #70 from CNugteren/development
Update to version 0.8.0
Diffstat (limited to 'src/kernels/level2/xgemv_fast.opencl')
-rw-r--r--src/kernels/level2/xgemv_fast.opencl14
1 files changed, 12 insertions, 2 deletions
diff --git a/src/kernels/level2/xgemv_fast.opencl b/src/kernels/level2/xgemv_fast.opencl
index 61fdffa3..6a494e84 100644
--- a/src/kernels/level2/xgemv_fast.opencl
+++ b/src/kernels/level2/xgemv_fast.opencl
@@ -95,13 +95,18 @@ inline realVFR LoadMatrixAVFR(const __global realVFR* restrict agm, const int x,
// --> 'a_rotated' is 0
// --> 'do_conjugate' is 0
__attribute__((reqd_work_group_size(WGS2, 1, 1)))
-__kernel void XgemvFast(const int m, const int n, const real alpha, const real beta,
+__kernel void XgemvFast(const int m, const int n,
+ const __constant real* restrict arg_alpha,
+ const __constant real* restrict arg_beta,
const int a_rotated,
const __global realVF* restrict agm, const int a_offset, const int a_ld,
const __global real* restrict xgm, const int x_offset, const int x_inc,
__global real* ygm, const int y_offset, const int y_inc,
const int do_conjugate, const int parameter,
const int kl, const int ku) {
+ const real alpha = arg_alpha[0];
+ const real beta = arg_beta[0];
+
// Local memory for the vector X
__local real xlm[WGS2];
@@ -192,13 +197,18 @@ __kernel void XgemvFast(const int m, const int n, const real alpha, const real b
// --> 'a_rotated' is 1
// --> 'do_conjugate' is 0
__attribute__((reqd_work_group_size(WGS3, 1, 1)))
-__kernel void XgemvFastRot(const int m, const int n, const real alpha, const real beta,
+__kernel void XgemvFastRot(const int m, const int n,
+ const __constant real* restrict arg_alpha,
+ const __constant real* restrict arg_beta,
const int a_rotated,
const __global realVFR* restrict agm, const int a_offset, const int a_ld,
const __global real* restrict xgm, const int x_offset, const int x_inc,
__global real* ygm, const int y_offset, const int y_inc,
const int do_conjugate, const int parameter,
const int kl, const int ku) {
+ const real alpha = arg_alpha[0];
+ const real beta = arg_beta[0];
+
// Local memory for the vector X
__local real xlm[WGS3];