1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
// =================================================================================================
// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
// width of 100 characters per line.
//
// Author(s):
// Cedric Nugteren <www.cedricnugteren.nl>
//
// This file contains the common defines and type-defs for the CLBlast OpenCL kernels.
//
// =================================================================================================
// Enables loading of this file using the C++ pre-processor's #include (C++11 standard raw string
// literal). Comment-out this line for syntax-highlighting when developing.
R"(
// =================================================================================================
// Parameters set by the tuner or by the database. Here they are given a basic default value in case
// this file is used outside of the CLBlast library.
#ifndef PRECISION
#define PRECISION 32 // Data-types: single or double precision, complex or regular
#endif
// =================================================================================================
// Enable support for double-precision
#if PRECISION == 64 || PRECISION == 6464
#if __OPENCL_VERSION__ <= CL_VERSION_1_1
#pragma OPENCL EXTENSION cl_khr_fp64: enable
#endif
#endif
// Single-precision
#if PRECISION == 32
typedef float real;
typedef float2 real2;
typedef float4 real4;
typedef float8 real8;
typedef float16 real16;
#define ZERO 0.0f
// Double-precision
#elif PRECISION == 64
typedef double real;
typedef double2 real2;
typedef double4 real4;
typedef double8 real8;
typedef double16 real16;
#define ZERO 0.0
// Complex single-precision
#elif PRECISION == 3232
typedef struct cfloat {float x; float y;} real;
typedef struct cfloat2 {real x; real y;} real2;
typedef struct cfloat4 {real x; real y; real z; real w;} real4;
typedef struct cfloat8 {real s0; real s1; real s2; real s3;
real s4; real s5; real s6; real s7;} real8;
typedef struct cfloat16 {real s0; real s1; real s2; real s3;
real s4; real s5; real s6; real s7;
real s8; real s9; real sA; real sB;
real sC; real sD; real sE; real sF;} real16;
#define ZERO 0.0f
// Complex Double-precision
#elif PRECISION == 6464
typedef struct cdouble {double x; double y;} real;
typedef struct cdouble2 {real x; real y;} real2;
typedef struct cdouble4 {real x; real y; real z; real w;} real4;
typedef struct cdouble8 {real s0; real s1; real s2; real s3;
real s4; real s5; real s6; real s7;} real8;
typedef struct cdouble16 {real s0; real s1; real s2; real s3;
real s4; real s5; real s6; real s7;
real s8; real s9; real sA; real sB;
real sC; real sD; real sE; real sF;} real16;
#define ZERO 0.0
#endif
// =================================================================================================
// Don't use the non-IEEE754 compliant OpenCL built-in mad() instruction
#define USE_CL_MAD 0
// Sets a variable to zero
#if PRECISION == 3232 || PRECISION == 6464
#define SetToZero(a) a.x = ZERO; a.y = ZERO
#else
#define SetToZero(a) a = ZERO
#endif
// Multiply two complex variables (used in the define below)
#if PRECISION == 3232 || PRECISION == 6464
#define MulReal(a, b) a.x*b.x - a.y*b.y
#define MulImag(a, b) a.x*b.y + a.y*b.x
#endif
// The scalar multiply-add function
#if PRECISION == 3232 || PRECISION == 6464
#define MultiplyAdd(c, a, b) c.x += MulReal(a,b); c.y += MulImag(a,b)
#else
#if USE_CL_MAD == 1
#define MultiplyAdd(c, a, b) c = mad(a, b, c)
#else
#define MultiplyAdd(c, a, b) c += a * b
#endif
#endif
// The scalar AXPBY function
#if PRECISION == 3232 || PRECISION == 6464
#define AXPBY(e, a, b, c, d) e.x = MulReal(a,b) + MulReal(c,d); e.y = MulImag(a,b) + MulImag(c,d)
#else
#define AXPBY(e, a, b, c, d) e = a*b + c*d
#endif
// The complex conjugate operation for complex transforms
#if PRECISION == 3232 || PRECISION == 6464
#define COMPLEX_CONJUGATE(value) value.x = value.x; value.y = -value.y
#else
#define COMPLEX_CONJUGATE(value) value = value
#endif
// =================================================================================================
// End of the C++11 raw string literal
)";
// =================================================================================================
|