summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-05-18 21:32:56 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-05-18 21:32:56 +0200
commit489c5d76cfe95a97542dfeaa6d8b19cd9100919a (patch)
tree31a7082f5847f3bd21af1f2aa5a7d1eb68d188db /include
parent7a3b695db70810595ae17d9d753c3b926aa738c0 (diff)
Merged in latest changes from 0.7.1 release
Diffstat (limited to 'include')
-rw-r--r--include/internal/database/xgemm.h4
-rw-r--r--include/internal/tuning.h19
2 files changed, 17 insertions, 6 deletions
diff --git a/include/internal/database/xgemm.h b/include/internal/database/xgemm.h
index e24adb19..9ca2bff5 100644
--- a/include/internal/database/xgemm.h
+++ b/include/internal/database/xgemm.h
@@ -18,11 +18,11 @@ const Database::DatabaseEntry Database::XgemmSingle = {
"Xgemm", Precision::kSingle, {
{ // AMD GPUs
kDeviceTypeGPU, "AMD", {
- { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } },
+ { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",8} } },
{ "Hawaii", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",2} } },
{ "Pitcairn", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
{ "Tahiti", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",1} } },
- { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
+ { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } },
}
},
{ // ARM GPUs
diff --git a/include/internal/tuning.h b/include/internal/tuning.h
index 6ba1db61..3eba6fdb 100644
--- a/include/internal/tuning.h
+++ b/include/internal/tuning.h
@@ -50,14 +50,18 @@ void Tuner(int argc, char* argv[]) {
// Tests validity of the given arguments
C::TestValidArguments(args);
- // Tests for validity of the precision
+ // Tests for validity of the precision and retrieves properties
+ auto isAMD = false;
+ auto isGPU = false;
{
- auto platform = Platform(args.platform_id);
- auto device = Device(platform, args.device_id);
+ const auto platform = Platform(args.platform_id);
+ const auto device = Device(platform, args.device_id);
if (!PrecisionSupported<T>(device)) {
printf("* Unsupported precision, skipping this tuning run\n\n");
return;
}
+ isAMD = device.Vendor() == "AMD" || device.Vendor() == "Advanced Micro Devices, Inc.";
+ isGPU = device.Type() == "GPU";
}
// Creates input buffers with random data
@@ -86,8 +90,15 @@ void Tuner(int argc, char* argv[]) {
tuner.UseRandomSearch(1.0/args.fraction);
}
+ // Set extra settings for specific defines. This mimics src/routine.cc.
+ auto defines = std::string{""};
+ if (isAMD && isGPU) {
+ defines += "#define USE_CL_MAD 1\n";
+ defines += "#define USE_STAGGERED_INDICES 1\n";
+ }
+
// Loads the kernel sources and defines the kernel to tune
- auto sources = C::GetSources();
+ auto sources = defines + C::GetSources();
auto id = tuner.AddKernelFromString(sources, C::KernelName(), C::GlobalSize(args), C::LocalSize());
tuner.SetReferenceFromString(sources, C::KernelName(), C::GlobalSizeRef(args), C::LocalSizeRef());