1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
|
/* ************************************************************************
* Copyright 2013 Advanced Micro Devices, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* ************************************************************************/
#include <stdlib.h>
#include <stdio.h>
#include <defbool.h>
#include <clBLAS.h>
#include <clblas-internal.h>
clblasStatus
clblasSelectImplementation(
clblasImplementation impl)
{
switch (impl) {
case clblasDefaultGemm:
case clblasLdsBlockGemm:
case clblasImageBlockGemm:
case clblasBlockGemmWithCaching:
clblasSolvers[CLBLAS_GEMM].defaultPattern =
getGemmMemPatternIndex(impl);
break;
case clblasDefaultTrmm:
case clblasLdsBlockTrmm:
case clblasImageBlockTrmm:
case clblasBlockTrmmWithCaching:
clblasSolvers[CLBLAS_TRMM].defaultPattern =
getTrmmMemPatternIndex(impl);
break;
case clblasDefaultTrsm:
case clblasLdsBlockTrsm:
case clblasImageBlockTrsm:
case clblasBlockTrsmWithCaching:
case clblasBlockTrsmWithoutLds:
clblasSolvers[CLBLAS_TRSM].defaultPattern =
getTrsmMemPatternIndex(impl);
break;
default:
return clblasInvalidValue;
}
return clblasSuccess;
}
int
scratchImagesEnabled(void)
{
int enable = 0;
const char *envImpl;
envImpl = getenv("AMD_CLBLAS_GEMM_IMPLEMENTATION");
if ((envImpl != NULL) && (strcmp(envImpl, "1") == 0)) {
enable = 1;
};
envImpl = getenv("AMD_CLBLAS_TRMM_IMPLEMENTATION");
if ((envImpl != NULL) && (strcmp(envImpl, "1") == 0)) {
enable = 1;
};
envImpl = getenv("AMD_CLBLAS_TRSM_IMPLEMENTATION");
if ((envImpl != NULL) && (strcmp(envImpl, "1") == 0)) {
enable = 1;
};
return enable;
}
void
parseEnvImplementation(void)
{
const char *envImpl;
envImpl = getenv("AMD_CLBLAS_GEMM_IMPLEMENTATION");
clblasSelectImplementation(clblasDefaultGemm);
if (envImpl != NULL) {
if (strcmp(envImpl, "0") == 0) {
clblasSelectImplementation(clblasLdsBlockGemm);
}
else if (strcmp(envImpl, "1") == 0) {
clblasSelectImplementation(clblasImageBlockGemm);
}
else if (strcmp(envImpl, "2") == 0) {
clblasSelectImplementation(clblasBlockGemmWithCaching);
}
}
envImpl = getenv("AMD_CLBLAS_TRMM_IMPLEMENTATION");
clblasSelectImplementation(clblasDefaultTrmm);
if (envImpl != NULL) {
if (strcmp(envImpl, "0") == 0) {
clblasSelectImplementation(clblasLdsBlockTrmm);
}
else if (strcmp(envImpl, "1") == 0) {
clblasSelectImplementation(clblasImageBlockTrmm);
}
else if (strcmp(envImpl, "2") == 0) {
clblasSelectImplementation(clblasBlockTrmmWithCaching);
}
}
envImpl = getenv("AMD_CLBLAS_TRSM_IMPLEMENTATION");
clblasSelectImplementation(clblasDefaultTrsm);
if (envImpl != NULL) {
if (strcmp(envImpl, "0") == 0) {
clblasSelectImplementation(clblasLdsBlockTrsm);
}
else if (strcmp(envImpl, "1") == 0) {
clblasSelectImplementation(clblasImageBlockTrsm);
}
else if (strcmp(envImpl, "2") == 0) {
clblasSelectImplementation(clblasBlockTrsmWithoutLds);
}
else if (strcmp(envImpl, "3") == 0) {
clblasSelectImplementation(clblasBlockTrsmWithCaching);
}
}
}
|