diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-05-22 15:22:54 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-05-22 15:22:54 +0200 |
commit | 88551b40055a51800118865940626020d3fc064e (patch) | |
tree | d9ced489d62d21b26cc45b5c2d400b6ed3e7ce4a /src/kernels | |
parent | 803aaf3070a6b04095b29100e628a4308bb9dcf7 (diff) |
Prepared the GEMV kernels and tuner for half-precision support
Diffstat (limited to 'src/kernels')
-rw-r--r-- | src/kernels/level2/xgemv.opencl | 6 | ||||
-rw-r--r-- | src/kernels/level2/xgemv_fast.opencl | 14 |
2 files changed, 17 insertions, 3 deletions
diff --git a/src/kernels/level2/xgemv.opencl b/src/kernels/level2/xgemv.opencl index 30b131b4..65b4291f 100644 --- a/src/kernels/level2/xgemv.opencl +++ b/src/kernels/level2/xgemv.opencl @@ -211,13 +211,17 @@ inline real LoadMatrixA(const __global real* restrict agm, const int x, const in // Full version of the kernel __attribute__((reqd_work_group_size(WGS1, 1, 1))) -__kernel void Xgemv(const int m, const int n, const real alpha, const real beta, +__kernel void Xgemv(const int m, const int n, + const __constant real* restrict arg_alpha, + const __constant real* restrict arg_beta, const int a_rotated, const __global real* restrict agm, const int a_offset, const int a_ld, const __global real* restrict xgm, const int x_offset, const int x_inc, __global real* ygm, const int y_offset, const int y_inc, const int do_conjugate, const int parameter, const int kl, const int ku) { + const real alpha = arg_alpha[0]; + const real beta = arg_beta[0]; // Local memory for the vector X __local real xlm[WGS1]; diff --git a/src/kernels/level2/xgemv_fast.opencl b/src/kernels/level2/xgemv_fast.opencl index 61fdffa3..6a494e84 100644 --- a/src/kernels/level2/xgemv_fast.opencl +++ b/src/kernels/level2/xgemv_fast.opencl @@ -95,13 +95,18 @@ inline realVFR LoadMatrixAVFR(const __global realVFR* restrict agm, const int x, // --> 'a_rotated' is 0 // --> 'do_conjugate' is 0 __attribute__((reqd_work_group_size(WGS2, 1, 1))) -__kernel void XgemvFast(const int m, const int n, const real alpha, const real beta, +__kernel void XgemvFast(const int m, const int n, + const __constant real* restrict arg_alpha, + const __constant real* restrict arg_beta, const int a_rotated, const __global realVF* restrict agm, const int a_offset, const int a_ld, const __global real* restrict xgm, const int x_offset, const int x_inc, __global real* ygm, const int y_offset, const int y_inc, const int do_conjugate, const int parameter, const int kl, const int ku) { + const real alpha = arg_alpha[0]; + const real beta = arg_beta[0]; + // Local memory for the vector X __local real xlm[WGS2]; @@ -192,13 +197,18 @@ __kernel void XgemvFast(const int m, const int n, const real alpha, const real b // --> 'a_rotated' is 1 // --> 'do_conjugate' is 0 __attribute__((reqd_work_group_size(WGS3, 1, 1))) -__kernel void XgemvFastRot(const int m, const int n, const real alpha, const real beta, +__kernel void XgemvFastRot(const int m, const int n, + const __constant real* restrict arg_alpha, + const __constant real* restrict arg_beta, const int a_rotated, const __global realVFR* restrict agm, const int a_offset, const int a_ld, const __global real* restrict xgm, const int x_offset, const int x_inc, __global real* ygm, const int y_offset, const int y_inc, const int do_conjugate, const int parameter, const int kl, const int ku) { + const real alpha = arg_alpha[0]; + const real beta = arg_beta[0]; + // Local memory for the vector X __local real xlm[WGS3]; |