summaryrefslogtreecommitdiff
path: root/src/kernels/level3/convert_hermitian.opencl
diff options
context:
space:
mode:
Diffstat (limited to 'src/kernels/level3/convert_hermitian.opencl')
-rw-r--r--src/kernels/level3/convert_hermitian.opencl20
1 files changed, 11 insertions, 9 deletions
diff --git a/src/kernels/level3/convert_hermitian.opencl b/src/kernels/level3/convert_hermitian.opencl
index ed2ded98..0e89b78b 100644
--- a/src/kernels/level3/convert_hermitian.opencl
+++ b/src/kernels/level3/convert_hermitian.opencl
@@ -16,7 +16,8 @@
R"(
// =================================================================================================
-#if defined(ROUTINE_HEMM) && (PRECISION == 3232 || PRECISION == 6464)
+#if defined(ROUTINE_HEMM)
+#if PRECISION == 3232 || PRECISION == 6464
// Kernel to populate a squared hermitian matrix, given that the triangle which holds the data is
// stored as the lower-triangle of the input matrix. This uses the padding kernel's parameters.
@@ -30,11 +31,11 @@ void HermLowerToSquared(const int src_dim,
// Loops over the work per thread in both dimensions
#pragma unroll
- for (int w_one=0; w_one<PAD_WPTX; ++w_one) {
- const int id_one = (get_group_id(0)*PAD_WPTX + w_one) * PAD_DIMX + get_local_id(0);
+ for (int _w_one = 0; _w_one < PAD_WPTX; _w_one += 1) {
+ const int id_one = (get_group_id(0)*PAD_WPTX + _w_one) * PAD_DIMX + get_local_id(0);
#pragma unroll
- for (int w_two=0; w_two<PAD_WPTY; ++w_two) {
- const int id_two = (get_group_id(1)*PAD_WPTY + w_two) * PAD_DIMY + get_local_id(1);
+ for (int _w_two = 0; _w_two < PAD_WPTY; _w_two += 1) {
+ const int id_two = (get_group_id(1)*PAD_WPTY + _w_two) * PAD_DIMY + get_local_id(1);
if (id_two < dest_dim && id_one < dest_dim) {
// Loads data from the lower-hermitian matrix
@@ -69,11 +70,11 @@ void HermUpperToSquared(const int src_dim,
// Loops over the work per thread in both dimensions
#pragma unroll
- for (int w_one=0; w_one<PAD_WPTX; ++w_one) {
- const int id_one = (get_group_id(0)*PAD_WPTX + w_one) * PAD_DIMX + get_local_id(0);
+ for (int _w_one = 0; _w_one < PAD_WPTX; _w_one += 1) {
+ const int id_one = (get_group_id(0)*PAD_WPTX + _w_one) * PAD_DIMX + get_local_id(0);
#pragma unroll
- for (int w_two=0; w_two<PAD_WPTY; ++w_two) {
- const int id_two = (get_group_id(1)*PAD_WPTY + w_two) * PAD_DIMY + get_local_id(1);
+ for (int _w_two = 0; _w_two < PAD_WPTY; _w_two += 1) {
+ const int id_two = (get_group_id(1)*PAD_WPTY + _w_two) * PAD_DIMY + get_local_id(1);
if (id_two < dest_dim && id_one < dest_dim) {
// Loads data from the upper-hermitian matrix
@@ -98,6 +99,7 @@ void HermUpperToSquared(const int src_dim,
}
#endif
+#endif
// =================================================================================================
// End of the C++11 raw string literal