summaryrefslogtreecommitdiff
path: root/src/kernels/level2/xgemv_fast.opencl
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-05-22 15:22:54 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-05-22 15:22:54 +0200
commit88551b40055a51800118865940626020d3fc064e (patch)
treed9ced489d62d21b26cc45b5c2d400b6ed3e7ce4a /src/kernels/level2/xgemv_fast.opencl
parent803aaf3070a6b04095b29100e628a4308bb9dcf7 (diff)
Prepared the GEMV kernels and tuner for half-precision support
Diffstat (limited to 'src/kernels/level2/xgemv_fast.opencl')
-rw-r--r--src/kernels/level2/xgemv_fast.opencl14
1 files changed, 12 insertions, 2 deletions
diff --git a/src/kernels/level2/xgemv_fast.opencl b/src/kernels/level2/xgemv_fast.opencl
index 61fdffa3..6a494e84 100644
--- a/src/kernels/level2/xgemv_fast.opencl
+++ b/src/kernels/level2/xgemv_fast.opencl
@@ -95,13 +95,18 @@ inline realVFR LoadMatrixAVFR(const __global realVFR* restrict agm, const int x,
// --> 'a_rotated' is 0
// --> 'do_conjugate' is 0
__attribute__((reqd_work_group_size(WGS2, 1, 1)))
-__kernel void XgemvFast(const int m, const int n, const real alpha, const real beta,
+__kernel void XgemvFast(const int m, const int n,
+ const __constant real* restrict arg_alpha,
+ const __constant real* restrict arg_beta,
const int a_rotated,
const __global realVF* restrict agm, const int a_offset, const int a_ld,
const __global real* restrict xgm, const int x_offset, const int x_inc,
__global real* ygm, const int y_offset, const int y_inc,
const int do_conjugate, const int parameter,
const int kl, const int ku) {
+ const real alpha = arg_alpha[0];
+ const real beta = arg_beta[0];
+
// Local memory for the vector X
__local real xlm[WGS2];
@@ -192,13 +197,18 @@ __kernel void XgemvFast(const int m, const int n, const real alpha, const real b
// --> 'a_rotated' is 1
// --> 'do_conjugate' is 0
__attribute__((reqd_work_group_size(WGS3, 1, 1)))
-__kernel void XgemvFastRot(const int m, const int n, const real alpha, const real beta,
+__kernel void XgemvFastRot(const int m, const int n,
+ const __constant real* restrict arg_alpha,
+ const __constant real* restrict arg_beta,
const int a_rotated,
const __global realVFR* restrict agm, const int a_offset, const int a_ld,
const __global real* restrict xgm, const int x_offset, const int x_inc,
__global real* ygm, const int y_offset, const int y_inc,
const int do_conjugate, const int parameter,
const int kl, const int ku) {
+ const real alpha = arg_alpha[0];
+ const real beta = arg_beta[0];
+
// Local memory for the vector X
__local real xlm[WGS3];