Completed kernel modifications for pre-processor of all other kernels

author: Cedric Nugteren <web@cedricnugteren.nl> 2017-12-09 20:44:21 +0100
committer: Cedric Nugteren <web@cedricnugteren.nl> 2017-12-09 20:44:21 +0100
commit: 9f02fb542ca659bf58d1efefdb334ea386ef10e8 (patch)
tree: 8669e215f38e8b00a4ee73a436479207f2afdbf5 /src/kernels/level3/convert_hermitian.opencl
parent: ca5dbcd2bd31fb0a0e3f6c2f81b3c0fff6250738 (diff)
1 files changed, 11 insertions, 9 deletions
diff --git a/src/kernels/level3/convert_hermitian.opencl b/src/kernels/level3/convert_hermitian.opencl
index ed2ded98..0e89b78b 100644
--- a/src/kernels/level3/convert_hermitian.opencl
+++ b/src/kernels/level3/convert_hermitian.opencl
@@ -16,7 +16,8 @@
 R"(
 
 // =================================================================================================
-#if defined(ROUTINE_HEMM) && (PRECISION == 3232 || PRECISION == 6464)
+#if defined(ROUTINE_HEMM)
+#if PRECISION == 3232 || PRECISION == 6464
 
 // Kernel to populate a squared hermitian matrix, given that the triangle which holds the data is
 // stored as the lower-triangle of the input matrix. This uses the padding kernel's parameters.
@@ -30,11 +31,11 @@ void HermLowerToSquared(const int src_dim,
 
   // Loops over the work per thread in both dimensions
   #pragma unroll
-  for (int w_one=0; w_one<PAD_WPTX; ++w_one) {
-    const int id_one = (get_group_id(0)*PAD_WPTX + w_one) * PAD_DIMX + get_local_id(0);
+  for (int _w_one = 0; _w_one < PAD_WPTX; _w_one += 1) {
+    const int id_one = (get_group_id(0)*PAD_WPTX + _w_one) * PAD_DIMX + get_local_id(0);
     #pragma unroll
-    for (int w_two=0; w_two<PAD_WPTY; ++w_two) {
-      const int id_two = (get_group_id(1)*PAD_WPTY + w_two) * PAD_DIMY + get_local_id(1);
+    for (int _w_two = 0; _w_two < PAD_WPTY; _w_two += 1) {
+      const int id_two = (get_group_id(1)*PAD_WPTY + _w_two) * PAD_DIMY + get_local_id(1);
       if (id_two < dest_dim && id_one < dest_dim) {
 
         // Loads data from the lower-hermitian matrix
@@ -69,11 +70,11 @@ void HermUpperToSquared(const int src_dim,
 
   // Loops over the work per thread in both dimensions
   #pragma unroll
-  for (int w_one=0; w_one<PAD_WPTX; ++w_one) {
-    const int id_one = (get_group_id(0)*PAD_WPTX + w_one) * PAD_DIMX + get_local_id(0);
+  for (int _w_one = 0; _w_one < PAD_WPTX; _w_one += 1) {
+    const int id_one = (get_group_id(0)*PAD_WPTX + _w_one) * PAD_DIMX + get_local_id(0);
     #pragma unroll
-    for (int w_two=0; w_two<PAD_WPTY; ++w_two) {
-      const int id_two = (get_group_id(1)*PAD_WPTY + w_two) * PAD_DIMY + get_local_id(1);
+    for (int _w_two = 0; _w_two < PAD_WPTY; _w_two += 1) {
+      const int id_two = (get_group_id(1)*PAD_WPTY + _w_two) * PAD_DIMY + get_local_id(1);
       if (id_two < dest_dim && id_one < dest_dim) {
 
         // Loads data from the upper-hermitian matrix
@@ -98,6 +99,7 @@ void HermUpperToSquared(const int src_dim,
 }
 
 #endif
+#endif
 // =================================================================================================
 
 // End of the C++11 raw string literal
author	Cedric Nugteren <web@cedricnugteren.nl>	2017-12-09 20:44:21 +0100
committer	Cedric Nugteren <web@cedricnugteren.nl>	2017-12-09 20:44:21 +0100
commit	9f02fb542ca659bf58d1efefdb334ea386ef10e8 (patch)
tree	8669e215f38e8b00a4ee73a436479207f2afdbf5 /src/kernels/level3/convert_hermitian.opencl
parent	ca5dbcd2bd31fb0a0e3f6c2f81b3c0fff6250738 (diff)