From 7c3431a72ae091703a7f36999f2b5a0cd8189220 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 1 Jun 2018 20:59:27 +0200 Subject: Fixes for Apple OpenCL CPU implementation which requires a LWGS of 1 when barriers are present --- src/database/apple_cpu_fallback.hpp | 6 +++--- src/routines/level3/xgemm.hpp | 6 +++--- src/routines/levelx/xinvert.cpp | 4 +++- 3 files changed, 9 insertions(+), 7 deletions(-) (limited to 'src') diff --git a/src/database/apple_cpu_fallback.hpp b/src/database/apple_cpu_fallback.hpp index 177bd322..55bcc220 100644 --- a/src/database/apple_cpu_fallback.hpp +++ b/src/database/apple_cpu_fallback.hpp @@ -41,7 +41,7 @@ const DatabaseEntry XgerApple = { "Xger", Precision::kAny, {"WGS1", "WGS2", "WPT"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } } }; const DatabaseEntry XtrsvApple = { - "Xtrsv", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } } + "Xtrsv", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } } }; const DatabaseEntry XgemmApple = { "Xgemm", Precision::kAny, {"GEMMK", "KREG", "KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1 } } } } } } } @@ -62,10 +62,10 @@ const DatabaseEntry PadtransposeApple = { "Padtranspose", Precision::kAny, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } } }; const DatabaseEntry InvertApple = { - "Invert", Precision::kAny, {"INTERNAL_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } } + "Invert", Precision::kAny, {"INTERNAL_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } } }; const DatabaseEntry TrsvRoutineApple = { - "TrsvRoutine", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } } + "TrsvRoutine", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } } }; // ================================================================================================= diff --git a/src/routines/level3/xgemm.hpp b/src/routines/level3/xgemm.hpp index ec84fbb7..ed8cc69d 100644 --- a/src/routines/level3/xgemm.hpp +++ b/src/routines/level3/xgemm.hpp @@ -25,9 +25,9 @@ class Xgemm: public Routine { public: // Defines the assumptions of the GEMM kernels - static const bool a_want_rotated_(const size_t gemm_kernel_id) { return gemm_kernel_id == 1; } - static const bool b_want_rotated_(const size_t gemm_kernel_id) { return true; } - static const bool c_want_rotated_(const size_t gemm_kernel_id) { return gemm_kernel_id == 1; } + static bool a_want_rotated_(const size_t gemm_kernel_id) { return gemm_kernel_id == 1; } + static bool b_want_rotated_(const size_t) { return true; } + static bool c_want_rotated_(const size_t gemm_kernel_id) { return gemm_kernel_id == 1; } // Computes the size of the temporary GEMM buffer based on user-arguments static size_t GetTempSize(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, diff --git a/src/routines/levelx/xinvert.cpp b/src/routines/levelx/xinvert.cpp index 99f196ec..eea8527a 100644 --- a/src/routines/levelx/xinvert.cpp +++ b/src/routines/levelx/xinvert.cpp @@ -56,7 +56,9 @@ void Xinvert::InvertMatrixDiagonalBlocks(const Layout layout, const Triangle // Helper variables const auto internal_block_size = static_cast(db_["INTERNAL_BLOCK_SIZE"]); - assert(internal_block_size == 16); + if (internal_block_size != 16) { + throw RuntimeErrorCode(StatusCode::kNotImplemented); // e.g. Apple CPU OpenCL with a WGS of 1 + } // when barriers are present const auto num_blocks = CeilDiv(n, block_size); const auto num_internal_blocks = CeilDiv(n, internal_block_size); const auto unit_diagonal = (diag == Diagonal::kUnit) ? true : false; -- cgit v1.2.3