1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
|
/* ************************************************************************
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ************************************************************************/
static const char *SYMM_HEMM_HELPER = "
#ifdef DOUBLE_PRECISION
#ifdef cl_khr_fp64
#pragma OPENCL EXTENSION cl_khr_fp64 : enable
#else
#pragma OPENCL EXTENSION cl_amd_fp64 : enable
#endif
#endif
%TYPE SYMM_SCALAR_LOAD(__global %TYPE const * restrict A, uint M, uint lda, uint row, uint col)
{
%TYPE retval;
//PENDING: Remove this Check for M. This will never happen
if (((row) < M) && (col < M))
{
#ifdef __SYMM_UPPER__
if ((row) <= col)
#else
if ((row) >= col)
#endif
{
retval = A[(col)*lda + row];
#ifdef __HEMM__
if (row == col) { retval.odd = 0; }
#endif
} else {
retval = A[(row)*lda + col];
#ifdef __HEMM__
%CONJUGATE(1, retval);
#endif
}
} else {
retval = (%TYPE) 0;
}
return retval;
}
%TYPE%V SYMM_VECTOR_LOAD_USING_SCALAR(__global %TYPE const * restrict A, uint M, uint lda, uint row, uint col)
{
//%TYPE symm_vec_load_temp[%V];
%TYPE%V symm_vec_retval;
//#pragma unroll %V
//for(uint index_i=0; index_i< (%V); index_i++)
%VFOR
{
//PENDING: Remove this Check for M. This will never happen
if (((row + %VFORINDEX) < M) && (col < M))
{
#ifdef __SYMM_UPPER__
if ((row + %VFORINDEX) <= col)
#else
if ((row + %VFORINDEX) >= col)
#endif
{
//symm_vec_load_temp[index_i] = A[(col)*(lda) + ((row) + index_i)];
symm_vec_retval%VFORSUFFIX = A[(col)*(lda) + ((row) + %VFORINDEX)];
#ifdef __HEMM__
//if ((row + index_i) == col) { symm_vec_load_temp[index_i].odd = 0; }
if ((row + %VFORINDEX) == col) { (symm_vec_retval%VFORSUFFIX).odd = 0; }
#endif
} else {
//symm_vec_load_temp[index_i] = A[((row)+index_i)*(lda) + (col)];
symm_vec_retval%VFORSUFFIX = A[((row)+ %VFORINDEX )*(lda) + (col)];
#ifdef __HEMM__
//CONJUGATE(1, (symm_vec_load_temp[index_i]));
{
%TYPE SCALAR;
SCALAR = symm_vec_retval%VFORSUFFIX;
%CONJUGATE(1, SCALAR);
symm_vec_retval%VFORSUFFIX = SCALAR;
}
#endif
}
} else {
//symm_vec_load_temp[index_i] = (%TYPE) 0;
symm_vec_retval%VFORSUFFIX = (%TYPE) 0;
}
}
//%VLOADWITHINCX(symm_vec_retval, symm_vec_load_temp, 1 );
//symm_vec_retval = *(__private %TYPE%V *)symm_vec_load_temp;
return symm_vec_retval;
}
\n";
|