Merge branch 'development' into gemm_direct

author: Cedric Nugteren <web@cedricnugteren.nl> 2016-09-21 21:32:18 +0200
committer: Cedric Nugteren <web@cedricnugteren.nl> 2016-09-21 21:32:18 +0200
commit: 6aa652d6ea2389744195ae5cd19321325b2d71aa (patch)
tree: 58243cb4fbebb094c543028124412718cdeb7c97 /src/kernels/level2
parent: 5004a435ff984bba0dff0147a5c4f6a04d703562 (diff)
parent: b1929d8ce7022cacbd1812d62098ebd0681bc1ef (diff)
5 files changed, 42 insertions, 42 deletions
diff --git a/src/kernels/level2/xgemv.opencl b/src/kernels/level2/xgemv.opencl
index 83b6b15d..ff011acd 100644
--- a/src/kernels/level2/xgemv.opencl
+++ b/src/kernels/level2/xgemv.opencl
@@ -210,8 +210,8 @@ inline real LoadMatrixA(const __global real* restrict agm, const int x, const in
 // =================================================================================================
 
 // Full version of the kernel
-__attribute__((reqd_work_group_size(WGS1, 1, 1)))
-__kernel void Xgemv(const int m, const int n,
+__kernel __attribute__((reqd_work_group_size(WGS1, 1, 1)))
+void Xgemv(const int m, const int n,
                     const real_arg arg_alpha,
                     const real_arg arg_beta,
                     const int a_rotated,
diff --git a/src/kernels/level2/xgemv_fast.opencl b/src/kernels/level2/xgemv_fast.opencl
index 210c42c1..02a1f956 100644
--- a/src/kernels/level2/xgemv_fast.opencl
+++ b/src/kernels/level2/xgemv_fast.opencl
@@ -88,16 +88,16 @@ inline realVF LoadMatrixAVF(const __global realVF* restrict agm, const int x, co
 // --> 'a_ld' is a multiple of VW2
 // --> 'a_rotated' is 0
 // --> 'do_conjugate' is 0
-__attribute__((reqd_work_group_size(WGS2, 1, 1)))
-__kernel void XgemvFast(const int m, const int n,
-                        const real_arg arg_alpha,
-                        const real_arg arg_beta,
-                        const int a_rotated,
-                        const __global realVF* restrict agm, const int a_offset, const int a_ld,
-                        const __global real* restrict xgm, const int x_offset, const int x_inc,
-                        __global real* ygm, const int y_offset, const int y_inc,
-                        const int do_conjugate, const int parameter,
-                        const int kl_unused, const int ku_unused) {
+__kernel __attribute__((reqd_work_group_size(WGS2, 1, 1)))
+void XgemvFast(const int m, const int n,
+               const real_arg arg_alpha,
+               const real_arg arg_beta,
+               const int a_rotated,
+               const __global realVF* restrict agm, const int a_offset, const int a_ld,
+               const __global real* restrict xgm, const int x_offset, const int x_inc,
+               __global real* ygm, const int y_offset, const int y_inc,
+               const int do_conjugate, const int parameter,
+               const int kl_unused, const int ku_unused) {
   const real alpha = GetRealArg(arg_alpha);
   const real beta = GetRealArg(arg_beta);
 
@@ -190,16 +190,16 @@ __kernel void XgemvFast(const int m, const int n,
 // --> 'a_ld' is a multiple of VW3
 // --> 'a_rotated' is 1
 // --> 'do_conjugate' is 0
-__attribute__((reqd_work_group_size(WGS3, 1, 1)))
-__kernel void XgemvFastRot(const int m, const int n,
-                           const real_arg arg_alpha,
-                           const real_arg arg_beta,
-                           const int a_rotated,
-                           const __global realVFR* restrict agm, const int a_offset, const int a_ld,
-                           const __global real* restrict xgm, const int x_offset, const int x_inc,
-                           __global real* ygm, const int y_offset, const int y_inc,
-                           const int do_conjugate, const int parameter,
-                           const int kl_unused, const int ku_unused) {
+__kernel __attribute__((reqd_work_group_size(WGS3, 1, 1)))
+void XgemvFastRot(const int m, const int n,
+                  const real_arg arg_alpha,
+                  const real_arg arg_beta,
+                  const int a_rotated,
+                  const __global realVFR* restrict agm, const int a_offset, const int a_ld,
+                  const __global real* restrict xgm, const int x_offset, const int x_inc,
+                  __global real* ygm, const int y_offset, const int y_inc,
+                  const int do_conjugate, const int parameter,
+                  const int kl_unused, const int ku_unused) {
   const real alpha = GetRealArg(arg_alpha);
   const real beta = GetRealArg(arg_beta);
 
diff --git a/src/kernels/level2/xger.opencl b/src/kernels/level2/xger.opencl
index f218a346..1b9ded12 100644
--- a/src/kernels/level2/xger.opencl
+++ b/src/kernels/level2/xger.opencl
@@ -18,13 +18,13 @@ R"(
 // =================================================================================================
 
 // Regular version of the rank-1 matrix update kernel (GER, GERU, GERC)
-__attribute__((reqd_work_group_size(WGS1, WGS2, 1)))
-__kernel void Xger(const int max1, const int max2,
-                   const real_arg arg_alpha,
-                   const __global real* restrict xgm, const int x_offset, const int x_inc,
-                   const __global real* ygm, const int y_offset, const int y_inc,
-                   __global real* restrict agm, const int a_offset, const int a_ld,
-                   const int is_rowmajor) {
+__kernel __attribute__((reqd_work_group_size(WGS1, WGS2, 1)))
+void Xger(const int max1, const int max2,
+          const real_arg arg_alpha,
+          const __global real* restrict xgm, const int x_offset, const int x_inc,
+          const __global real* ygm, const int y_offset, const int y_inc,
+          __global real* restrict agm, const int a_offset, const int a_ld,
+          const int is_rowmajor) {
   const real alpha = GetRealArg(arg_alpha);
 
   // Register storage for X and Y
diff --git a/src/kernels/level2/xher.opencl b/src/kernels/level2/xher.opencl
index 1200ee63..b0772218 100644
--- a/src/kernels/level2/xher.opencl
+++ b/src/kernels/level2/xher.opencl
@@ -18,12 +18,12 @@ R"(
 // =================================================================================================
 
 // Symmetric version of the rank-1 matrix update kernel (HER, HPR, SYR, SPR)
-__attribute__((reqd_work_group_size(WGS1, WGS2, 1)))
-__kernel void Xher(const int n,
-                   const real_arg arg_alpha,
-                   const __global real* restrict xgm, const int x_offset, const int x_inc,
-                   __global real* restrict agm, const int a_offset, const int a_ld,
-                   const int is_upper, const int is_rowmajor) {
+__kernel __attribute__((reqd_work_group_size(WGS1, WGS2, 1)))
+void Xher(const int n,
+          const real_arg arg_alpha,
+          const __global real* restrict xgm, const int x_offset, const int x_inc,
+          __global real* restrict agm, const int a_offset, const int a_ld,
+          const int is_upper, const int is_rowmajor) {
   const real alpha = GetRealArg(arg_alpha);
 
   // Register storage for X and XT
diff --git a/src/kernels/level2/xher2.opencl b/src/kernels/level2/xher2.opencl
index d0f41571..00a756c9 100644
--- a/src/kernels/level2/xher2.opencl
+++ b/src/kernels/level2/xher2.opencl
@@ -18,13 +18,13 @@ R"(
 // =================================================================================================
 
 // Symmetric version of the rank-2 matrix update kernel (HER2, HPR2, SYR2, SPR2)
-__attribute__((reqd_work_group_size(WGS1, WGS2, 1)))
-__kernel void Xher2(const int n,
-                    const real_arg arg_alpha,
-                    const __global real* restrict xgm, const int x_offset, const int x_inc,
-                    const __global real* restrict ygm, const int y_offset, const int y_inc,
-                    __global real* restrict agm, const int a_offset, const int a_ld,
-                    const int is_upper, const int is_rowmajor) {
+__kernel __attribute__((reqd_work_group_size(WGS1, WGS2, 1)))
+void Xher2(const int n,
+           const real_arg arg_alpha,
+           const __global real* restrict xgm, const int x_offset, const int x_inc,
+           const __global real* restrict ygm, const int y_offset, const int y_inc,
+           __global real* restrict agm, const int a_offset, const int a_ld,
+           const int is_upper, const int is_rowmajor) {
   const real alpha = GetRealArg(arg_alpha);
 
   // Register storage for X and Y
author	Cedric Nugteren <web@cedricnugteren.nl>	2016-09-21 21:32:18 +0200
committer	Cedric Nugteren <web@cedricnugteren.nl>	2016-09-21 21:32:18 +0200
commit	6aa652d6ea2389744195ae5cd19321325b2d71aa (patch)
tree	58243cb4fbebb094c543028124412718cdeb7c97 /src/kernels/level2
parent	5004a435ff984bba0dff0147a5c4f6a04d703562 (diff)
parent	b1929d8ce7022cacbd1812d62098ebd0681bc1ef (diff)