summaryrefslogtreecommitdiff
path: root/src/kernels/level2/xgemv_fast.opencl
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-07-10 20:32:01 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-07-10 20:32:01 +0200
commitc87e877bf23d2fe38a7da2898e1734a3cdeaf48c (patch)
treed091dfdd826dd11e5c9e533eb46b22aeb7f6f823 /src/kernels/level2/xgemv_fast.opencl
parent57f09178d89a1cf4f38a0bb338c864ed850d5470 (diff)
Now passing alpha/beta to the kernel as arguments as before fp16 support; in case of fp16 arguments are cast on host and in kernel
Diffstat (limited to 'src/kernels/level2/xgemv_fast.opencl')
-rw-r--r--src/kernels/level2/xgemv_fast.opencl16
1 files changed, 8 insertions, 8 deletions
diff --git a/src/kernels/level2/xgemv_fast.opencl b/src/kernels/level2/xgemv_fast.opencl
index 6a494e84..1127a0b6 100644
--- a/src/kernels/level2/xgemv_fast.opencl
+++ b/src/kernels/level2/xgemv_fast.opencl
@@ -96,16 +96,16 @@ inline realVFR LoadMatrixAVFR(const __global realVFR* restrict agm, const int x,
// --> 'do_conjugate' is 0
__attribute__((reqd_work_group_size(WGS2, 1, 1)))
__kernel void XgemvFast(const int m, const int n,
- const __constant real* restrict arg_alpha,
- const __constant real* restrict arg_beta,
+ const real_arg arg_alpha,
+ const real_arg arg_beta,
const int a_rotated,
const __global realVF* restrict agm, const int a_offset, const int a_ld,
const __global real* restrict xgm, const int x_offset, const int x_inc,
__global real* ygm, const int y_offset, const int y_inc,
const int do_conjugate, const int parameter,
const int kl, const int ku) {
- const real alpha = arg_alpha[0];
- const real beta = arg_beta[0];
+ const real alpha = GetRealArg(arg_alpha);
+ const real beta = GetRealArg(arg_beta);
// Local memory for the vector X
__local real xlm[WGS2];
@@ -198,16 +198,16 @@ __kernel void XgemvFast(const int m, const int n,
// --> 'do_conjugate' is 0
__attribute__((reqd_work_group_size(WGS3, 1, 1)))
__kernel void XgemvFastRot(const int m, const int n,
- const __constant real* restrict arg_alpha,
- const __constant real* restrict arg_beta,
+ const real_arg arg_alpha,
+ const real_arg arg_beta,
const int a_rotated,
const __global realVFR* restrict agm, const int a_offset, const int a_ld,
const __global real* restrict xgm, const int x_offset, const int x_inc,
__global real* ygm, const int y_offset, const int y_inc,
const int do_conjugate, const int parameter,
const int kl, const int ku) {
- const real alpha = arg_alpha[0];
- const real beta = arg_beta[0];
+ const real alpha = GetRealArg(arg_alpha);
+ const real beta = GetRealArg(arg_beta);
// Local memory for the vector X
__local real xlm[WGS3];