diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-05-18 21:32:56 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-05-18 21:32:56 +0200 |
commit | 489c5d76cfe95a97542dfeaa6d8b19cd9100919a (patch) | |
tree | 31a7082f5847f3bd21af1f2aa5a7d1eb68d188db /include | |
parent | 7a3b695db70810595ae17d9d753c3b926aa738c0 (diff) |
Merged in latest changes from 0.7.1 release
Diffstat (limited to 'include')
-rw-r--r-- | include/internal/database/xgemm.h | 4 | ||||
-rw-r--r-- | include/internal/tuning.h | 19 |
2 files changed, 17 insertions, 6 deletions
diff --git a/include/internal/database/xgemm.h b/include/internal/database/xgemm.h index e24adb19..9ca2bff5 100644 --- a/include/internal/database/xgemm.h +++ b/include/internal/database/xgemm.h @@ -18,11 +18,11 @@ const Database::DatabaseEntry Database::XgemmSingle = { "Xgemm", Precision::kSingle, { { // AMD GPUs kDeviceTypeGPU, "AMD", { - { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",1} } }, + { "AMD Radeon R9 M370X Compute Engine", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",16}, {"MWG",64}, {"NDIMB",8}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",2}, {"VWN",8} } }, { "Hawaii", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",32}, {"NDIMC",8}, {"NWG",64}, {"SA",1}, {"SB",1}, {"STRM",1}, {"STRN",1}, {"VWM",4}, {"VWN",2} } }, { "Pitcairn", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",16}, {"NWG",128}, {"SA",0}, {"SB",0}, {"STRM",1}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, { "Tahiti", { {"KWG",32}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",32}, {"MWG",128}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",4}, {"VWN",1} } }, - { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",8}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",16}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, + { "default", { {"KWG",16}, {"KWI",2}, {"MDIMA",16}, {"MDIMC",8}, {"MWG",32}, {"NDIMB",8}, {"NDIMC",8}, {"NWG",64}, {"SA",0}, {"SB",0}, {"STRM",0}, {"STRN",0}, {"VWM",1}, {"VWN",1} } }, } }, { // ARM GPUs diff --git a/include/internal/tuning.h b/include/internal/tuning.h index 6ba1db61..3eba6fdb 100644 --- a/include/internal/tuning.h +++ b/include/internal/tuning.h @@ -50,14 +50,18 @@ void Tuner(int argc, char* argv[]) { // Tests validity of the given arguments C::TestValidArguments(args); - // Tests for validity of the precision + // Tests for validity of the precision and retrieves properties + auto isAMD = false; + auto isGPU = false; { - auto platform = Platform(args.platform_id); - auto device = Device(platform, args.device_id); + const auto platform = Platform(args.platform_id); + const auto device = Device(platform, args.device_id); if (!PrecisionSupported<T>(device)) { printf("* Unsupported precision, skipping this tuning run\n\n"); return; } + isAMD = device.Vendor() == "AMD" || device.Vendor() == "Advanced Micro Devices, Inc."; + isGPU = device.Type() == "GPU"; } // Creates input buffers with random data @@ -86,8 +90,15 @@ void Tuner(int argc, char* argv[]) { tuner.UseRandomSearch(1.0/args.fraction); } + // Set extra settings for specific defines. This mimics src/routine.cc. + auto defines = std::string{""}; + if (isAMD && isGPU) { + defines += "#define USE_CL_MAD 1\n"; + defines += "#define USE_STAGGERED_INDICES 1\n"; + } + // Loads the kernel sources and defines the kernel to tune - auto sources = C::GetSources(); + auto sources = defines + C::GetSources(); auto id = tuner.AddKernelFromString(sources, C::KernelName(), C::GlobalSize(args), C::LocalSize()); tuner.SetReferenceFromString(sources, C::KernelName(), C::GlobalSizeRef(args), C::LocalSizeRef()); |