diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-09-21 21:32:18 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-09-21 21:32:18 +0200 |
commit | 6aa652d6ea2389744195ae5cd19321325b2d71aa (patch) | |
tree | 58243cb4fbebb094c543028124412718cdeb7c97 /src/kernels/level2 | |
parent | 5004a435ff984bba0dff0147a5c4f6a04d703562 (diff) | |
parent | b1929d8ce7022cacbd1812d62098ebd0681bc1ef (diff) |
Merge branch 'development' into gemm_direct
Diffstat (limited to 'src/kernels/level2')
-rw-r--r-- | src/kernels/level2/xgemv.opencl | 4 | ||||
-rw-r--r-- | src/kernels/level2/xgemv_fast.opencl | 40 | ||||
-rw-r--r-- | src/kernels/level2/xger.opencl | 14 | ||||
-rw-r--r-- | src/kernels/level2/xher.opencl | 12 | ||||
-rw-r--r-- | src/kernels/level2/xher2.opencl | 14 |
5 files changed, 42 insertions, 42 deletions
diff --git a/src/kernels/level2/xgemv.opencl b/src/kernels/level2/xgemv.opencl index 83b6b15d..ff011acd 100644 --- a/src/kernels/level2/xgemv.opencl +++ b/src/kernels/level2/xgemv.opencl @@ -210,8 +210,8 @@ inline real LoadMatrixA(const __global real* restrict agm, const int x, const in // ================================================================================================= // Full version of the kernel -__attribute__((reqd_work_group_size(WGS1, 1, 1))) -__kernel void Xgemv(const int m, const int n, +__kernel __attribute__((reqd_work_group_size(WGS1, 1, 1))) +void Xgemv(const int m, const int n, const real_arg arg_alpha, const real_arg arg_beta, const int a_rotated, diff --git a/src/kernels/level2/xgemv_fast.opencl b/src/kernels/level2/xgemv_fast.opencl index 210c42c1..02a1f956 100644 --- a/src/kernels/level2/xgemv_fast.opencl +++ b/src/kernels/level2/xgemv_fast.opencl @@ -88,16 +88,16 @@ inline realVF LoadMatrixAVF(const __global realVF* restrict agm, const int x, co // --> 'a_ld' is a multiple of VW2 // --> 'a_rotated' is 0 // --> 'do_conjugate' is 0 -__attribute__((reqd_work_group_size(WGS2, 1, 1))) -__kernel void XgemvFast(const int m, const int n, - const real_arg arg_alpha, - const real_arg arg_beta, - const int a_rotated, - const __global realVF* restrict agm, const int a_offset, const int a_ld, - const __global real* restrict xgm, const int x_offset, const int x_inc, - __global real* ygm, const int y_offset, const int y_inc, - const int do_conjugate, const int parameter, - const int kl_unused, const int ku_unused) { +__kernel __attribute__((reqd_work_group_size(WGS2, 1, 1))) +void XgemvFast(const int m, const int n, + const real_arg arg_alpha, + const real_arg arg_beta, + const int a_rotated, + const __global realVF* restrict agm, const int a_offset, const int a_ld, + const __global real* restrict xgm, const int x_offset, const int x_inc, + __global real* ygm, const int y_offset, const int y_inc, + const int do_conjugate, const int parameter, + const int kl_unused, const int ku_unused) { const real alpha = GetRealArg(arg_alpha); const real beta = GetRealArg(arg_beta); @@ -190,16 +190,16 @@ __kernel void XgemvFast(const int m, const int n, // --> 'a_ld' is a multiple of VW3 // --> 'a_rotated' is 1 // --> 'do_conjugate' is 0 -__attribute__((reqd_work_group_size(WGS3, 1, 1))) -__kernel void XgemvFastRot(const int m, const int n, - const real_arg arg_alpha, - const real_arg arg_beta, - const int a_rotated, - const __global realVFR* restrict agm, const int a_offset, const int a_ld, - const __global real* restrict xgm, const int x_offset, const int x_inc, - __global real* ygm, const int y_offset, const int y_inc, - const int do_conjugate, const int parameter, - const int kl_unused, const int ku_unused) { +__kernel __attribute__((reqd_work_group_size(WGS3, 1, 1))) +void XgemvFastRot(const int m, const int n, + const real_arg arg_alpha, + const real_arg arg_beta, + const int a_rotated, + const __global realVFR* restrict agm, const int a_offset, const int a_ld, + const __global real* restrict xgm, const int x_offset, const int x_inc, + __global real* ygm, const int y_offset, const int y_inc, + const int do_conjugate, const int parameter, + const int kl_unused, const int ku_unused) { const real alpha = GetRealArg(arg_alpha); const real beta = GetRealArg(arg_beta); diff --git a/src/kernels/level2/xger.opencl b/src/kernels/level2/xger.opencl index f218a346..1b9ded12 100644 --- a/src/kernels/level2/xger.opencl +++ b/src/kernels/level2/xger.opencl @@ -18,13 +18,13 @@ R"( // ================================================================================================= // Regular version of the rank-1 matrix update kernel (GER, GERU, GERC) -__attribute__((reqd_work_group_size(WGS1, WGS2, 1))) -__kernel void Xger(const int max1, const int max2, - const real_arg arg_alpha, - const __global real* restrict xgm, const int x_offset, const int x_inc, - const __global real* ygm, const int y_offset, const int y_inc, - __global real* restrict agm, const int a_offset, const int a_ld, - const int is_rowmajor) { +__kernel __attribute__((reqd_work_group_size(WGS1, WGS2, 1))) +void Xger(const int max1, const int max2, + const real_arg arg_alpha, + const __global real* restrict xgm, const int x_offset, const int x_inc, + const __global real* ygm, const int y_offset, const int y_inc, + __global real* restrict agm, const int a_offset, const int a_ld, + const int is_rowmajor) { const real alpha = GetRealArg(arg_alpha); // Register storage for X and Y diff --git a/src/kernels/level2/xher.opencl b/src/kernels/level2/xher.opencl index 1200ee63..b0772218 100644 --- a/src/kernels/level2/xher.opencl +++ b/src/kernels/level2/xher.opencl @@ -18,12 +18,12 @@ R"( // ================================================================================================= // Symmetric version of the rank-1 matrix update kernel (HER, HPR, SYR, SPR) -__attribute__((reqd_work_group_size(WGS1, WGS2, 1))) -__kernel void Xher(const int n, - const real_arg arg_alpha, - const __global real* restrict xgm, const int x_offset, const int x_inc, - __global real* restrict agm, const int a_offset, const int a_ld, - const int is_upper, const int is_rowmajor) { +__kernel __attribute__((reqd_work_group_size(WGS1, WGS2, 1))) +void Xher(const int n, + const real_arg arg_alpha, + const __global real* restrict xgm, const int x_offset, const int x_inc, + __global real* restrict agm, const int a_offset, const int a_ld, + const int is_upper, const int is_rowmajor) { const real alpha = GetRealArg(arg_alpha); // Register storage for X and XT diff --git a/src/kernels/level2/xher2.opencl b/src/kernels/level2/xher2.opencl index d0f41571..00a756c9 100644 --- a/src/kernels/level2/xher2.opencl +++ b/src/kernels/level2/xher2.opencl @@ -18,13 +18,13 @@ R"( // ================================================================================================= // Symmetric version of the rank-2 matrix update kernel (HER2, HPR2, SYR2, SPR2) -__attribute__((reqd_work_group_size(WGS1, WGS2, 1))) -__kernel void Xher2(const int n, - const real_arg arg_alpha, - const __global real* restrict xgm, const int x_offset, const int x_inc, - const __global real* restrict ygm, const int y_offset, const int y_inc, - __global real* restrict agm, const int a_offset, const int a_ld, - const int is_upper, const int is_rowmajor) { +__kernel __attribute__((reqd_work_group_size(WGS1, WGS2, 1))) +void Xher2(const int n, + const real_arg arg_alpha, + const __global real* restrict xgm, const int x_offset, const int x_inc, + const __global real* restrict ygm, const int y_offset, const int y_inc, + __global real* restrict agm, const int a_offset, const int a_ld, + const int is_upper, const int is_rowmajor) { const real alpha = GetRealArg(arg_alpha); // Register storage for X and Y |