diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-12-09 20:44:21 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-12-09 20:44:21 +0100 |
commit | 9f02fb542ca659bf58d1efefdb334ea386ef10e8 (patch) | |
tree | 8669e215f38e8b00a4ee73a436479207f2afdbf5 /src/kernels/level3/convert_hermitian.opencl | |
parent | ca5dbcd2bd31fb0a0e3f6c2f81b3c0fff6250738 (diff) |
Completed kernel modifications for pre-processor of all other kernels
Diffstat (limited to 'src/kernels/level3/convert_hermitian.opencl')
-rw-r--r-- | src/kernels/level3/convert_hermitian.opencl | 20 |
1 files changed, 11 insertions, 9 deletions
diff --git a/src/kernels/level3/convert_hermitian.opencl b/src/kernels/level3/convert_hermitian.opencl index ed2ded98..0e89b78b 100644 --- a/src/kernels/level3/convert_hermitian.opencl +++ b/src/kernels/level3/convert_hermitian.opencl @@ -16,7 +16,8 @@ R"( // ================================================================================================= -#if defined(ROUTINE_HEMM) && (PRECISION == 3232 || PRECISION == 6464) +#if defined(ROUTINE_HEMM) +#if PRECISION == 3232 || PRECISION == 6464 // Kernel to populate a squared hermitian matrix, given that the triangle which holds the data is // stored as the lower-triangle of the input matrix. This uses the padding kernel's parameters. @@ -30,11 +31,11 @@ void HermLowerToSquared(const int src_dim, // Loops over the work per thread in both dimensions #pragma unroll - for (int w_one=0; w_one<PAD_WPTX; ++w_one) { - const int id_one = (get_group_id(0)*PAD_WPTX + w_one) * PAD_DIMX + get_local_id(0); + for (int _w_one = 0; _w_one < PAD_WPTX; _w_one += 1) { + const int id_one = (get_group_id(0)*PAD_WPTX + _w_one) * PAD_DIMX + get_local_id(0); #pragma unroll - for (int w_two=0; w_two<PAD_WPTY; ++w_two) { - const int id_two = (get_group_id(1)*PAD_WPTY + w_two) * PAD_DIMY + get_local_id(1); + for (int _w_two = 0; _w_two < PAD_WPTY; _w_two += 1) { + const int id_two = (get_group_id(1)*PAD_WPTY + _w_two) * PAD_DIMY + get_local_id(1); if (id_two < dest_dim && id_one < dest_dim) { // Loads data from the lower-hermitian matrix @@ -69,11 +70,11 @@ void HermUpperToSquared(const int src_dim, // Loops over the work per thread in both dimensions #pragma unroll - for (int w_one=0; w_one<PAD_WPTX; ++w_one) { - const int id_one = (get_group_id(0)*PAD_WPTX + w_one) * PAD_DIMX + get_local_id(0); + for (int _w_one = 0; _w_one < PAD_WPTX; _w_one += 1) { + const int id_one = (get_group_id(0)*PAD_WPTX + _w_one) * PAD_DIMX + get_local_id(0); #pragma unroll - for (int w_two=0; w_two<PAD_WPTY; ++w_two) { - const int id_two = (get_group_id(1)*PAD_WPTY + w_two) * PAD_DIMY + get_local_id(1); + for (int _w_two = 0; _w_two < PAD_WPTY; _w_two += 1) { + const int id_two = (get_group_id(1)*PAD_WPTY + _w_two) * PAD_DIMY + get_local_id(1); if (id_two < dest_dim && id_one < dest_dim) { // Loads data from the upper-hermitian matrix @@ -98,6 +99,7 @@ void HermUpperToSquared(const int src_dim, } #endif +#endif // ================================================================================================= // End of the C++11 raw string literal |