summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-06-01 20:59:27 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-06-01 20:59:44 +0200
commit7c3431a72ae091703a7f36999f2b5a0cd8189220 (patch)
tree1890e22761db0660cad3960412f5c9851bcc6fd2
parent5702bff5ad579466397f6537dc8925ebd64e3ba3 (diff)
Fixes for Apple OpenCL CPU implementation which requires a LWGS of 1 when barriers are present
-rw-r--r--src/database/apple_cpu_fallback.hpp6
-rw-r--r--src/routines/level3/xgemm.hpp6
-rw-r--r--src/routines/levelx/xinvert.cpp4
3 files changed, 9 insertions, 7 deletions
diff --git a/src/database/apple_cpu_fallback.hpp b/src/database/apple_cpu_fallback.hpp
index 177bd322..55bcc220 100644
--- a/src/database/apple_cpu_fallback.hpp
+++ b/src/database/apple_cpu_fallback.hpp
@@ -41,7 +41,7 @@ const DatabaseEntry XgerApple = {
"Xger", Precision::kAny, {"WGS1", "WGS2", "WPT"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
};
const DatabaseEntry XtrsvApple = {
- "Xtrsv", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
+ "Xtrsv", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
};
const DatabaseEntry XgemmApple = {
"Xgemm", Precision::kAny, {"GEMMK", "KREG", "KWG", "KWI", "MDIMA", "MDIMC", "MWG", "NDIMB", "NDIMC", "NWG", "SA", "SB", "STRM", "STRN", "VWM", "VWN"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1 } } } } } } }
@@ -62,10 +62,10 @@ const DatabaseEntry PadtransposeApple = {
"Padtranspose", Precision::kAny, {"PADTRA_PAD", "PADTRA_TILE", "PADTRA_WPT"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
};
const DatabaseEntry InvertApple = {
- "Invert", Precision::kAny, {"INTERNAL_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
+ "Invert", Precision::kAny, {"INTERNAL_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
};
const DatabaseEntry TrsvRoutineApple = {
- "TrsvRoutine", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
+ "TrsvRoutine", Precision::kAny, {"TRSV_BLOCK_SIZE"}, { { kDeviceTypeAll, "default", { { "default", { { kDeviceNameDefault, Params{ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } } } } } } }
};
// =================================================================================================
diff --git a/src/routines/level3/xgemm.hpp b/src/routines/level3/xgemm.hpp
index ec84fbb7..ed8cc69d 100644
--- a/src/routines/level3/xgemm.hpp
+++ b/src/routines/level3/xgemm.hpp
@@ -25,9 +25,9 @@ class Xgemm: public Routine {
public:
// Defines the assumptions of the GEMM kernels
- static const bool a_want_rotated_(const size_t gemm_kernel_id) { return gemm_kernel_id == 1; }
- static const bool b_want_rotated_(const size_t gemm_kernel_id) { return true; }
- static const bool c_want_rotated_(const size_t gemm_kernel_id) { return gemm_kernel_id == 1; }
+ static bool a_want_rotated_(const size_t gemm_kernel_id) { return gemm_kernel_id == 1; }
+ static bool b_want_rotated_(const size_t) { return true; }
+ static bool c_want_rotated_(const size_t gemm_kernel_id) { return gemm_kernel_id == 1; }
// Computes the size of the temporary GEMM buffer based on user-arguments
static size_t GetTempSize(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
diff --git a/src/routines/levelx/xinvert.cpp b/src/routines/levelx/xinvert.cpp
index 99f196ec..eea8527a 100644
--- a/src/routines/levelx/xinvert.cpp
+++ b/src/routines/levelx/xinvert.cpp
@@ -56,7 +56,9 @@ void Xinvert<T>::InvertMatrixDiagonalBlocks(const Layout layout, const Triangle
// Helper variables
const auto internal_block_size = static_cast<size_t>(db_["INTERNAL_BLOCK_SIZE"]);
- assert(internal_block_size == 16);
+ if (internal_block_size != 16) {
+ throw RuntimeErrorCode(StatusCode::kNotImplemented); // e.g. Apple CPU OpenCL with a WGS of 1
+ } // when barriers are present
const auto num_blocks = CeilDiv(n, block_size);
const auto num_internal_blocks = CeilDiv(n, internal_block_size);
const auto unit_diagonal = (diag == Diagonal::kUnit) ? true : false;