diff options
Diffstat (limited to 'src/kernels/common.opencl')
-rw-r--r-- | src/kernels/common.opencl | 120 |
1 files changed, 120 insertions, 0 deletions
diff --git a/src/kernels/common.opencl b/src/kernels/common.opencl new file mode 100644 index 00000000..154265e4 --- /dev/null +++ b/src/kernels/common.opencl @@ -0,0 +1,120 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file contains the common defines and type-defs for the CLBlast OpenCL kernels. +// +// ================================================================================================= + +// Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string +// literal). Comment-out this line for syntax-highlighting when developing. +R"( +// ================================================================================================= + +// Parameters set by the tuner or by the database. Here they are given a basic default value in case +// this file is used outside of the CLBlast library. +#ifndef PRECISION + #define PRECISION 32 // Data-types: single or double precision, complex or regular +#endif + +// ================================================================================================= + +// Enable support for double-precision +#if PRECISION == 64 || PRECISION == 6464 + #if __OPENCL_VERSION__ <= CL_VERSION_1_1 + #pragma OPENCL EXTENSION cl_khr_fp64: enable + #endif +#endif + +// Single-precision +#if PRECISION == 32 + typedef float real; + typedef float2 real2; + typedef float4 real4; + typedef float8 real8; + typedef float16 real16; + #define ZERO 0.0f + +// Double-precision +#elif PRECISION == 64 + typedef double real; + typedef double2 real2; + typedef double4 real4; + typedef double8 real8; + typedef double16 real16; + #define ZERO 0.0 + +// Complex single-precision +#elif PRECISION == 3232 + typedef struct cfloat {float x; float y;} real; + typedef struct cfloat2 {real x; real y;} real2; + typedef struct cfloat4 {real x; real y; real z; real w;} real4; + typedef struct cfloat8 {real s0; real s1; real s2; real s3; + real s4; real s5; real s6; real s7;} real8; + typedef struct cfloat16 {real s0; real s1; real s2; real s3; + real s4; real s5; real s6; real s7; + real s8; real s9; real sA; real sB; + real sC; real sD; real sE; real sF;} real16; + #define ZERO 0.0f + +// Complex Double-precision +#elif PRECISION == 6464 + typedef struct cdouble {double x; double y;} real; + typedef struct cdouble2 {real x; real y;} real2; + typedef struct cdouble4 {real x; real y; real z; real w;} real4; + typedef struct cdouble8 {real s0; real s1; real s2; real s3; + real s4; real s5; real s6; real s7;} real8; + typedef struct cdouble16 {real s0; real s1; real s2; real s3; + real s4; real s5; real s6; real s7; + real s8; real s9; real sA; real sB; + real sC; real sD; real sE; real sF;} real16; + #define ZERO 0.0 +#endif + +// ================================================================================================= + +// Don't use the non-IEEE754 compliant OpenCL built-in mad() instruction +#define USE_CL_MAD 0 + +// Sets a variable to zero +#if PRECISION == 3232 || PRECISION == 6464 + #define SetToZero(a) a.x = ZERO; a.y = ZERO +#else + #define SetToZero(a) a = ZERO +#endif + +// Multiply two complex variables (used in the define below) +#if PRECISION == 3232 || PRECISION == 6464 + #define MulReal(a, b) a.x*b.x - a.y*b.y + #define MulImag(a, b) a.x*b.y + a.y*b.x +#endif + +// The scalar multiply-add function +#if PRECISION == 3232 || PRECISION == 6464 + #define MultiplyAdd(c, a, b) c.x += MulReal(a,b); c.y += MulImag(a,b) +#else + #if USE_CL_MAD == 1 + #define MultiplyAdd(c, a, b) c = mad(a, b, c) + #else + #define MultiplyAdd(c, a, b) c += a * b + #endif +#endif + +// The scalar AXPBY function +#if PRECISION == 3232 || PRECISION == 6464 + #define AXPBY(e, a, b, c, d) e.x = MulReal(a,b) + MulReal(c,d); e.y = MulImag(a,b) + MulImag(c,d) +#else + #define AXPBY(e, a, b, c, d) e = a*b + c*d +#endif + +// ================================================================================================= + +// End of the C++11 raw string literal +)"; + +// ================================================================================================= |