diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-10-14 10:49:25 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-10-14 10:49:25 +0200 |
commit | 2d7b648a243a97d18899677a51c9e441d6edf508 (patch) | |
tree | 5e440ccdb1985d619e878a04a44826068b0147e0 /src | |
parent | cc5b4754250b3c03b9b0f8d72f32d1eacac15b18 (diff) |
Added OpenCL to CUDA translation header for the kernels
Diffstat (limited to 'src')
-rw-r--r-- | src/kernels/opencl_to_cuda.h | 51 | ||||
-rw-r--r-- | src/routine.cpp | 7 |
2 files changed, 58 insertions, 0 deletions
diff --git a/src/kernels/opencl_to_cuda.h b/src/kernels/opencl_to_cuda.h new file mode 100644 index 00000000..43a26a2f --- /dev/null +++ b/src/kernels/opencl_to_cuda.h @@ -0,0 +1,51 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file contains an (incomplete) header to interpret OpenCL kernels as CUDA kernels. +// +// ================================================================================================= + +// Replaces the OpenCL keywords with CUDA equivalent +#define __kernel __placeholder__ +#define __global +#define __placeholder__ extern "C" __global__ +#define __local __shared__ +#define restrict __restrict__ +#define __constant const +#define inline __device__ inline // assumes all device functions are annotated with inline in OpenCL + +// Replaces OpenCL synchronisation with CUDA synchronisation +#define barrier(x) __syncthreads() + +// Replaces the OpenCL get_xxx_ID with CUDA equivalents +__device__ int get_local_id(int x) { + if (x == 0) { return threadIdx.x; } + if (x == 1) { return threadIdx.y; } + return threadIdx.z; +} +__device__ int get_group_id(int x) { + if (x == 0) { return blockIdx.x; } + if (x == 1) { return blockIdx.y;} + return blockIdx.z; +} +__device__ int get_global_id(int x) { + if (x == 0) { return blockIdx.x*blockDim.x + threadIdx.x; } + if (y == 0) { return blockIdx.y*blockDim.y + threadIdx.y; } + return blockIdx.z*blockDim.z + threadIdx.z; +} + +// Adds the data-types which are not available natively under CUDA +typedef struct { float s0; float s1; float s2; float s3; + float s4; float s5; float s6; float s7; } float8; +typedef struct { float s0; float s1; float s2; float s3; + float s4; float s5; float s6; float s7; + float s8; float s9; float s10; float s11; + float s12; float s13; float s14; float s15; } float16; + +// ================================================================================================= diff --git a/src/routine.cpp b/src/routine.cpp index aaa85fde..0f9fe360 100644 --- a/src/routine.cpp +++ b/src/routine.cpp @@ -167,6 +167,13 @@ void Routine::InitProgram(std::initializer_list<const char *> source) { source_string += "#define GLOBAL_MEM_FENCE 1\n"; } + // Optionally adds a translation header from OpenCL kernels to CUDA kernels + #ifdef CUDA_API + source_string += + #include "kernels/opencl_to_cuda.h" + ; + #endif + // Loads the common header (typedefs and defines and such) source_string += #include "kernels/common.opencl" |