summaryrefslogtreecommitdiff
path: root/src/kernels/level2
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-05-22 16:18:08 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-05-22 16:18:08 +0200
commitc8ff3f143fe94c87b23fd1bf36c1a4f91d305f01 (patch)
tree9c32e1a944c58ffcd711a7a69903fe6f9f95911d /src/kernels/level2
parent95b828da124b9c5c101d95cb51a12e9d387d1a34 (diff)
Prepared the GER kernels and tuner for half-precision support
Diffstat (limited to 'src/kernels/level2')
-rw-r--r--src/kernels/level2/xger.opencl4
-rw-r--r--src/kernels/level2/xher.opencl4
-rw-r--r--src/kernels/level2/xher2.opencl4
3 files changed, 9 insertions, 3 deletions
diff --git a/src/kernels/level2/xger.opencl b/src/kernels/level2/xger.opencl
index d377fbb0..63817afb 100644
--- a/src/kernels/level2/xger.opencl
+++ b/src/kernels/level2/xger.opencl
@@ -19,11 +19,13 @@ R"(
// Regular version of the rank-1 matrix update kernel (GER, GERU, GERC)
__attribute__((reqd_work_group_size(WGS1, WGS2, 1)))
-__kernel void Xger(const int max1, const int max2, const real alpha,
+__kernel void Xger(const int max1, const int max2,
+ const __constant real* restrict arg_alpha,
const __global real* restrict xgm, const int x_offset, const int x_inc,
const __global real* ygm, const int y_offset, const int y_inc,
__global real* restrict agm, const int a_offset, const int a_ld,
const int is_rowmajor) {
+ const real alpha = arg_alpha[0];
// Register storage for X and Y
real xvalues[WPT];
diff --git a/src/kernels/level2/xher.opencl b/src/kernels/level2/xher.opencl
index edb94ca8..fc635f2e 100644
--- a/src/kernels/level2/xher.opencl
+++ b/src/kernels/level2/xher.opencl
@@ -19,10 +19,12 @@ R"(
// Symmetric version of the rank-1 matrix update kernel (HER, HPR, SYR, SPR)
__attribute__((reqd_work_group_size(WGS1, WGS2, 1)))
-__kernel void Xher(const int n, const real alpha,
+__kernel void Xher(const int n,
+ const __constant real* restrict arg_alpha,
const __global real* restrict xgm, const int x_offset, const int x_inc,
__global real* restrict agm, const int a_offset, const int a_ld,
const int is_upper, const int is_rowmajor) {
+ const real alpha = arg_alpha[0];
// Register storage for X and XT
real xvalues[WPT];
diff --git a/src/kernels/level2/xher2.opencl b/src/kernels/level2/xher2.opencl
index 4a2edce8..a66f255f 100644
--- a/src/kernels/level2/xher2.opencl
+++ b/src/kernels/level2/xher2.opencl
@@ -19,11 +19,13 @@ R"(
// Symmetric version of the rank-2 matrix update kernel (HER2, HPR2, SYR2, SPR2)
__attribute__((reqd_work_group_size(WGS1, WGS2, 1)))
-__kernel void Xher2(const int n, const real alpha,
+__kernel void Xher2(const int n,
+ const __constant real* restrict arg_alpha,
const __global real* restrict xgm, const int x_offset, const int x_inc,
const __global real* restrict ygm, const int y_offset, const int y_inc,
__global real* restrict agm, const int a_offset, const int a_ld,
const int is_upper, const int is_rowmajor) {
+ const real alpha = arg_alpha[0];
// Register storage for X and Y
real xvalues[WPT];