summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG2
-rw-r--r--src/database/kernels/copy/copy_16.hpp5
-rw-r--r--src/database/kernels/copy/copy_32.hpp6
-rw-r--r--src/database/kernels/copy/copy_3232.hpp4
-rw-r--r--src/database/kernels/copy/copy_64.hpp7
-rw-r--r--src/database/kernels/copy/copy_6464.hpp7
-rw-r--r--src/database/kernels/gemm_routine/gemm_routine_16.hpp10
-rw-r--r--src/database/kernels/gemm_routine/gemm_routine_32.hpp4
-rw-r--r--src/database/kernels/pad/pad_16.hpp7
-rw-r--r--src/database/kernels/pad/pad_32.hpp6
-rw-r--r--src/database/kernels/pad/pad_3232.hpp7
-rw-r--r--src/database/kernels/pad/pad_64.hpp7
-rw-r--r--src/database/kernels/pad/pad_6464.hpp5
-rw-r--r--src/database/kernels/padtranspose/padtranspose_16.hpp5
-rw-r--r--src/database/kernels/padtranspose/padtranspose_32.hpp4
-rw-r--r--src/database/kernels/padtranspose/padtranspose_3232.hpp5
-rw-r--r--src/database/kernels/padtranspose/padtranspose_64.hpp5
-rw-r--r--src/database/kernels/padtranspose/padtranspose_6464.hpp3
-rw-r--r--src/database/kernels/transpose/transpose_16.hpp5
-rw-r--r--src/database/kernels/transpose/transpose_32.hpp2
-rw-r--r--src/database/kernels/transpose/transpose_3232.hpp5
-rw-r--r--src/database/kernels/transpose/transpose_64.hpp5
-rw-r--r--src/database/kernels/transpose/transpose_6464.hpp5
-rw-r--r--src/database/kernels/xaxpy/xaxpy_16.hpp7
-rw-r--r--src/database/kernels/xaxpy/xaxpy_32.hpp2
-rw-r--r--src/database/kernels/xaxpy/xaxpy_3232.hpp5
-rw-r--r--src/database/kernels/xaxpy/xaxpy_64.hpp5
-rw-r--r--src/database/kernels/xaxpy/xaxpy_6464.hpp5
-rw-r--r--src/database/kernels/xdot/xdot_16.hpp3
-rw-r--r--src/database/kernels/xdot/xdot_32.hpp5
-rw-r--r--src/database/kernels/xdot/xdot_3232.hpp4
-rw-r--r--src/database/kernels/xdot/xdot_64.hpp4
-rw-r--r--src/database/kernels/xdot/xdot_6464.hpp4
-rw-r--r--src/database/kernels/xgemm/xgemm_16.hpp7
-rw-r--r--src/database/kernels/xgemm/xgemm_32.hpp5
-rw-r--r--src/database/kernels/xgemm/xgemm_3232.hpp6
-rw-r--r--src/database/kernels/xgemm/xgemm_64.hpp4
-rw-r--r--src/database/kernels/xgemm/xgemm_6464.hpp4
-rw-r--r--src/database/kernels/xgemm_direct/xgemm_direct_16.hpp5
-rw-r--r--src/database/kernels/xgemm_direct/xgemm_direct_32.hpp5
-rw-r--r--src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp4
-rw-r--r--src/database/kernels/xgemm_direct/xgemm_direct_64.hpp6
-rw-r--r--src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp6
-rw-r--r--src/database/kernels/xgemv/xgemv_32.hpp5
-rw-r--r--src/database/kernels/xgemv/xgemv_3232.hpp4
-rw-r--r--src/database/kernels/xgemv/xgemv_64.hpp4
-rw-r--r--src/database/kernels/xgemv/xgemv_6464.hpp4
-rw-r--r--src/database/kernels/xgemv_fast/xgemv_fast_32.hpp5
-rw-r--r--src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp4
-rw-r--r--src/database/kernels/xgemv_fast/xgemv_fast_64.hpp4
-rw-r--r--src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp4
-rw-r--r--src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp3
-rw-r--r--src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp4
-rw-r--r--src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp4
-rw-r--r--src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp2
-rw-r--r--src/database/kernels/xger/xger_16.hpp6
-rw-r--r--src/database/kernels/xger/xger_32.hpp6
-rw-r--r--src/database/kernels/xger/xger_3232.hpp7
-rw-r--r--src/database/kernels/xger/xger_64.hpp7
-rw-r--r--src/database/kernels/xger/xger_6464.hpp5
-rw-r--r--src/utilities/compile.cpp3
61 files changed, 151 insertions, 147 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 0ff7856d..eef9c93f 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -3,7 +3,7 @@ Development (next version)
- Re-designed and integrated the auto-tuner, no more dependency on CLTune
- Made it possible to override the tuning parameters in the clients straight from JSON tuning files
- Added OpenCL pre-processor to unroll loops and perform array-to-register promotions for compilers
- which don't do this themselves (ARM, Qualcomm) - greatly improves performance on these platforms
+ which don't do this themselves (ARM Mali) - greatly improves performance on these platforms
- Added tuned parameters for various devices (see README)
Version 1.2.0
diff --git a/src/database/kernels/copy/copy_16.hpp b/src/database/kernels/copy/copy_16.hpp
index 818c4c8c..0249ee77 100644
--- a/src/database/kernels/copy/copy_16.hpp
+++ b/src/database/kernels/copy/copy_16.hpp
@@ -26,8 +26,9 @@ const DatabaseEntry CopyHalf = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 32, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 32, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 16, 8, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 8, 16, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 16, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/copy/copy_32.hpp b/src/database/kernels/copy/copy_32.hpp
index 7ab20459..eb5de286 100644
--- a/src/database/kernels/copy/copy_32.hpp
+++ b/src/database/kernels/copy/copy_32.hpp
@@ -59,9 +59,9 @@ const DatabaseEntry CopySingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 32, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 16, 16, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 8, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 16, 8, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/copy/copy_3232.hpp b/src/database/kernels/copy/copy_3232.hpp
index 2fc8156d..36082068 100644
--- a/src/database/kernels/copy/copy_3232.hpp
+++ b/src/database/kernels/copy/copy_3232.hpp
@@ -59,8 +59,8 @@ const DatabaseEntry CopyComplexSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 8, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/copy/copy_64.hpp b/src/database/kernels/copy/copy_64.hpp
index 882cab5a..9f193870 100644
--- a/src/database/kernels/copy/copy_64.hpp
+++ b/src/database/kernels/copy/copy_64.hpp
@@ -51,9 +51,8 @@ const DatabaseEntry CopyDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 16, 8, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
@@ -126,7 +125,7 @@ const DatabaseEntry CopyDouble = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 32, 8, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/copy/copy_6464.hpp b/src/database/kernels/copy/copy_6464.hpp
index b774225b..9fe3af65 100644
--- a/src/database/kernels/copy/copy_6464.hpp
+++ b/src/database/kernels/copy/copy_6464.hpp
@@ -51,9 +51,8 @@ const DatabaseEntry CopyComplexDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
@@ -126,7 +125,7 @@ const DatabaseEntry CopyComplexDouble = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 32, 8, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 8, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/gemm_routine/gemm_routine_16.hpp b/src/database/kernels/gemm_routine/gemm_routine_16.hpp
index 3d849420..0461f399 100644
--- a/src/database/kernels/gemm_routine/gemm_routine_16.hpp
+++ b/src/database/kernels/gemm_routine/gemm_routine_16.hpp
@@ -12,6 +12,14 @@ namespace database {
const DatabaseEntry GemmRoutineHalf = {
"GemmRoutine", Precision::kHalf, {"XGEMM_MIN_INDIRECT_SIZE"}, {
+ { // ARM GPUs
+ kDeviceTypeGPU, "ARM", {
+ { "default", {
+ { Name{"Mali-T628 "}, Params{ 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ } },
+ }
+ },
{ // Intel GPUs
kDeviceTypeGPU, "Intel", {
{ "default", {
@@ -23,7 +31,7 @@ const DatabaseEntry GemmRoutineHalf = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 192, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/gemm_routine/gemm_routine_32.hpp b/src/database/kernels/gemm_routine/gemm_routine_32.hpp
index 37b52b20..5b336752 100644
--- a/src/database/kernels/gemm_routine/gemm_routine_32.hpp
+++ b/src/database/kernels/gemm_routine/gemm_routine_32.hpp
@@ -15,8 +15,8 @@ const DatabaseEntry GemmRoutineSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/pad/pad_16.hpp b/src/database/kernels/pad/pad_16.hpp
index 3c24fdc9..c481e2fa 100644
--- a/src/database/kernels/pad/pad_16.hpp
+++ b/src/database/kernels/pad/pad_16.hpp
@@ -26,8 +26,9 @@ const DatabaseEntry PadHalf = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 16, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 8, 32, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
@@ -51,7 +52,7 @@ const DatabaseEntry PadHalf = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 8, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 8, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/pad/pad_32.hpp b/src/database/kernels/pad/pad_32.hpp
index 6a06d314..412d0a82 100644
--- a/src/database/kernels/pad/pad_32.hpp
+++ b/src/database/kernels/pad/pad_32.hpp
@@ -59,9 +59,9 @@ const DatabaseEntry PadSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 8, 16, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 8, 16, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 16, 16, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/pad/pad_3232.hpp b/src/database/kernels/pad/pad_3232.hpp
index c4b8eeec..f79c20e0 100644
--- a/src/database/kernels/pad/pad_3232.hpp
+++ b/src/database/kernels/pad/pad_3232.hpp
@@ -59,9 +59,8 @@ const DatabaseEntry PadComplexSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 32, 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 8, 32, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 32, 8, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
@@ -156,7 +155,7 @@ const DatabaseEntry PadComplexSingle = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 32, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 16, 8, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/pad/pad_64.hpp b/src/database/kernels/pad/pad_64.hpp
index be3dc81b..369b01b4 100644
--- a/src/database/kernels/pad/pad_64.hpp
+++ b/src/database/kernels/pad/pad_64.hpp
@@ -51,9 +51,8 @@ const DatabaseEntry PadDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 16, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 16, 8, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 32, 8, 4, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
@@ -126,7 +125,7 @@ const DatabaseEntry PadDouble = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 16, 8, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/pad/pad_6464.hpp b/src/database/kernels/pad/pad_6464.hpp
index 92e5a194..d88f2e7d 100644
--- a/src/database/kernels/pad/pad_6464.hpp
+++ b/src/database/kernels/pad/pad_6464.hpp
@@ -51,9 +51,8 @@ const DatabaseEntry PadComplexDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 16, 16, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 16, 8, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 16, 16, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/padtranspose/padtranspose_16.hpp b/src/database/kernels/padtranspose/padtranspose_16.hpp
index 9154d661..51afde55 100644
--- a/src/database/kernels/padtranspose/padtranspose_16.hpp
+++ b/src/database/kernels/padtranspose/padtranspose_16.hpp
@@ -26,8 +26,9 @@ const DatabaseEntry PadtransposeHalf = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 1, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 1, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/padtranspose/padtranspose_32.hpp b/src/database/kernels/padtranspose/padtranspose_32.hpp
index 97c89213..d44df73a 100644
--- a/src/database/kernels/padtranspose/padtranspose_32.hpp
+++ b/src/database/kernels/padtranspose/padtranspose_32.hpp
@@ -60,8 +60,8 @@ const DatabaseEntry PadtransposeSingle = {
kDeviceTypeGPU, "ARM", {
{ "default", {
{ Name{"Mali-T628 "}, Params{ 0, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 0, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/padtranspose/padtranspose_3232.hpp b/src/database/kernels/padtranspose/padtranspose_3232.hpp
index 6e8bce8b..581c70a4 100644
--- a/src/database/kernels/padtranspose/padtranspose_3232.hpp
+++ b/src/database/kernels/padtranspose/padtranspose_3232.hpp
@@ -59,9 +59,8 @@ const DatabaseEntry PadtransposeComplexSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 1, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 0, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/padtranspose/padtranspose_64.hpp b/src/database/kernels/padtranspose/padtranspose_64.hpp
index 219ca6a9..dbc6bc7f 100644
--- a/src/database/kernels/padtranspose/padtranspose_64.hpp
+++ b/src/database/kernels/padtranspose/padtranspose_64.hpp
@@ -51,9 +51,8 @@ const DatabaseEntry PadtransposeDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 0, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 1, 8, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/padtranspose/padtranspose_6464.hpp b/src/database/kernels/padtranspose/padtranspose_6464.hpp
index 58bd2d60..44df51b4 100644
--- a/src/database/kernels/padtranspose/padtranspose_6464.hpp
+++ b/src/database/kernels/padtranspose/padtranspose_6464.hpp
@@ -51,8 +51,7 @@ const DatabaseEntry PadtransposeComplexDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 0, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 1, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
diff --git a/src/database/kernels/transpose/transpose_16.hpp b/src/database/kernels/transpose/transpose_16.hpp
index 2b24e192..ded75554 100644
--- a/src/database/kernels/transpose/transpose_16.hpp
+++ b/src/database/kernels/transpose/transpose_16.hpp
@@ -26,8 +26,9 @@ const DatabaseEntry TransposeHalf = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 8, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 8, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 8, 0, 0, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/transpose/transpose_32.hpp b/src/database/kernels/transpose/transpose_32.hpp
index 4f2b16e6..f3300e19 100644
--- a/src/database/kernels/transpose/transpose_32.hpp
+++ b/src/database/kernels/transpose/transpose_32.hpp
@@ -59,7 +59,7 @@ const DatabaseEntry TransposeSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 8, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 4, 0, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Mali-T760 "}, Params{ 4, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 4, 1, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
diff --git a/src/database/kernels/transpose/transpose_3232.hpp b/src/database/kernels/transpose/transpose_3232.hpp
index 1127fab2..0faa6aab 100644
--- a/src/database/kernels/transpose/transpose_3232.hpp
+++ b/src/database/kernels/transpose/transpose_3232.hpp
@@ -59,9 +59,8 @@ const DatabaseEntry TransposeComplexSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 16, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 4, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 16, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 4, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 4, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/transpose/transpose_64.hpp b/src/database/kernels/transpose/transpose_64.hpp
index a7a808ed..de45cb21 100644
--- a/src/database/kernels/transpose/transpose_64.hpp
+++ b/src/database/kernels/transpose/transpose_64.hpp
@@ -51,9 +51,8 @@ const DatabaseEntry TransposeDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 16, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 16, 0, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/transpose/transpose_6464.hpp b/src/database/kernels/transpose/transpose_6464.hpp
index 75d0c85b..90441910 100644
--- a/src/database/kernels/transpose/transpose_6464.hpp
+++ b/src/database/kernels/transpose/transpose_6464.hpp
@@ -51,9 +51,8 @@ const DatabaseEntry TransposeComplexDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 16, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 16, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 8, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xaxpy/xaxpy_16.hpp b/src/database/kernels/xaxpy/xaxpy_16.hpp
index fa91ae9f..fd678f2e 100644
--- a/src/database/kernels/xaxpy/xaxpy_16.hpp
+++ b/src/database/kernels/xaxpy/xaxpy_16.hpp
@@ -26,8 +26,9 @@ const DatabaseEntry XaxpyHalf = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 8, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 8, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 8, 128, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 4, 256, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 4, 256, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
@@ -51,7 +52,7 @@ const DatabaseEntry XaxpyHalf = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 8, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 8, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xaxpy/xaxpy_32.hpp b/src/database/kernels/xaxpy/xaxpy_32.hpp
index bda415bd..a556a259 100644
--- a/src/database/kernels/xaxpy/xaxpy_32.hpp
+++ b/src/database/kernels/xaxpy/xaxpy_32.hpp
@@ -60,7 +60,7 @@ const DatabaseEntry XaxpySingle = {
kDeviceTypeGPU, "ARM", {
{ "default", {
{ Name{"Mali-T628 "}, Params{ 4, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 4, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 4, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
diff --git a/src/database/kernels/xaxpy/xaxpy_3232.hpp b/src/database/kernels/xaxpy/xaxpy_3232.hpp
index fec34fef..a84b982b 100644
--- a/src/database/kernels/xaxpy/xaxpy_3232.hpp
+++ b/src/database/kernels/xaxpy/xaxpy_3232.hpp
@@ -59,9 +59,8 @@ const DatabaseEntry XaxpyComplexSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 1, 128, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 1, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 1, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xaxpy/xaxpy_64.hpp b/src/database/kernels/xaxpy/xaxpy_64.hpp
index 53afe2f1..82073a24 100644
--- a/src/database/kernels/xaxpy/xaxpy_64.hpp
+++ b/src/database/kernels/xaxpy/xaxpy_64.hpp
@@ -51,9 +51,8 @@ const DatabaseEntry XaxpyDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 2, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 2, 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 2, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xaxpy/xaxpy_6464.hpp b/src/database/kernels/xaxpy/xaxpy_6464.hpp
index 44f73b3d..908a33b1 100644
--- a/src/database/kernels/xaxpy/xaxpy_6464.hpp
+++ b/src/database/kernels/xaxpy/xaxpy_6464.hpp
@@ -51,9 +51,8 @@ const DatabaseEntry XaxpyComplexDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 1, 64, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 1, 128, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 1, 256, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xdot/xdot_16.hpp b/src/database/kernels/xdot/xdot_16.hpp
index 5422f88d..bdb7558d 100644
--- a/src/database/kernels/xdot/xdot_16.hpp
+++ b/src/database/kernels/xdot/xdot_16.hpp
@@ -26,8 +26,9 @@ const DatabaseEntry XdotHalf = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
+ { Name{"Mali-T628 "}, Params{ 64, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ Name{"Mali-T760 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 128, 128, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xdot/xdot_32.hpp b/src/database/kernels/xdot/xdot_32.hpp
index 014ece99..ffdcb737 100644
--- a/src/database/kernels/xdot/xdot_32.hpp
+++ b/src/database/kernels/xdot/xdot_32.hpp
@@ -55,8 +55,9 @@ const DatabaseEntry XdotSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 32, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 128, 64, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xdot/xdot_3232.hpp b/src/database/kernels/xdot/xdot_3232.hpp
index 6679bcdf..ea9182d0 100644
--- a/src/database/kernels/xdot/xdot_3232.hpp
+++ b/src/database/kernels/xdot/xdot_3232.hpp
@@ -55,8 +55,8 @@ const DatabaseEntry XdotComplexSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xdot/xdot_64.hpp b/src/database/kernels/xdot/xdot_64.hpp
index d4143ddb..5ce78307 100644
--- a/src/database/kernels/xdot/xdot_64.hpp
+++ b/src/database/kernels/xdot/xdot_64.hpp
@@ -47,8 +47,8 @@ const DatabaseEntry XdotDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 256, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 32, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xdot/xdot_6464.hpp b/src/database/kernels/xdot/xdot_6464.hpp
index 84b05560..0435bf27 100644
--- a/src/database/kernels/xdot/xdot_6464.hpp
+++ b/src/database/kernels/xdot/xdot_6464.hpp
@@ -47,8 +47,8 @@ const DatabaseEntry XdotComplexDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 128, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 64, 32, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemm/xgemm_16.hpp b/src/database/kernels/xgemm/xgemm_16.hpp
index e2c806d7..e375d317 100644
--- a/src/database/kernels/xgemm/xgemm_16.hpp
+++ b/src/database/kernels/xgemm/xgemm_16.hpp
@@ -26,8 +26,9 @@ const DatabaseEntry XgemmHalf = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 32, 2, 8, 16, 128, 8, 8, 32, 0, 1, 0, 1, 8, 4 } },
- { kDeviceNameDefault , Params{ 32, 2, 8, 16, 128, 8, 8, 32, 0, 1, 0, 1, 8, 4 } },
+ { Name{"Mali-T628 "}, Params{ 32, 2, 8, 16, 128, 8, 8, 32, 0, 1, 0, 1, 8, 4 } },
+ { Name{"Mali-T760 "}, Params{ 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
+ { kDeviceNameDefault , Params{ 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
} },
}
},
@@ -42,7 +43,7 @@ const DatabaseEntry XgemmHalf = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 32, 2, 16, 16, 64, 16, 16, 64, 1, 1, 0, 0, 4, 4 } },
+ { kDeviceNameDefault , Params{ 32, 2, 16, 16, 64, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
} },
}
},
diff --git a/src/database/kernels/xgemm/xgemm_32.hpp b/src/database/kernels/xgemm/xgemm_32.hpp
index 28b0e11b..a99e3abb 100644
--- a/src/database/kernels/xgemm/xgemm_32.hpp
+++ b/src/database/kernels/xgemm/xgemm_32.hpp
@@ -59,8 +59,9 @@ const DatabaseEntry XgemmSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 32, 2, 8, 8, 64, 8, 16, 16, 0, 1, 0, 0, 8, 1 } },
- { kDeviceNameDefault , Params{ 32, 2, 8, 8, 64, 8, 16, 16, 0, 1, 0, 0, 8, 1 } },
+ { Name{"Mali-T628 "}, Params{ 32, 2, 8, 8, 32, 8, 8, 64, 1, 1, 0, 0, 4, 4 } },
+ { Name{"Mali-T760 "}, Params{ 32, 2, 16, 16, 64, 8, 8, 32, 1, 1, 0, 0, 4, 2 } },
+ { kDeviceNameDefault , Params{ 32, 2, 16, 16, 64, 8, 8, 32, 1, 1, 0, 0, 4, 2 } },
} },
}
},
diff --git a/src/database/kernels/xgemm/xgemm_3232.hpp b/src/database/kernels/xgemm/xgemm_3232.hpp
index 95dd4585..8bf2469d 100644
--- a/src/database/kernels/xgemm/xgemm_3232.hpp
+++ b/src/database/kernels/xgemm/xgemm_3232.hpp
@@ -59,8 +59,8 @@ const DatabaseEntry XgemmComplexSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } },
- { kDeviceNameDefault , Params{ 32, 2, 32, 32, 32, 8, 8, 32, 1, 1, 0, 0, 1, 4 } },
+ { Name{"Mali-T760 "}, Params{ 32, 2, 16, 16, 16, 8, 8, 32, 1, 1, 0, 0, 1, 2 } },
+ { kDeviceNameDefault , Params{ 32, 2, 16, 16, 16, 8, 8, 32, 1, 1, 0, 0, 1, 2 } },
} },
}
},
@@ -147,7 +147,7 @@ const DatabaseEntry XgemmComplexSingle = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 32, 2, 8, 8, 32, 8, 8, 32, 0, 0, 0, 0, 4, 4 } },
+ { kDeviceNameDefault , Params{ 32, 2, 16, 16, 16, 8, 8, 16, 1, 1, 0, 0, 1, 2 } },
} },
}
},
diff --git a/src/database/kernels/xgemm/xgemm_64.hpp b/src/database/kernels/xgemm/xgemm_64.hpp
index 35b17702..310af101 100644
--- a/src/database/kernels/xgemm/xgemm_64.hpp
+++ b/src/database/kernels/xgemm/xgemm_64.hpp
@@ -51,8 +51,8 @@ const DatabaseEntry XgemmDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 16, 2, 8, 8, 32, 16, 16, 16, 1, 1, 1, 0, 4, 1 } },
- { kDeviceNameDefault , Params{ 16, 2, 8, 8, 32, 16, 16, 16, 1, 1, 1, 0, 4, 1 } },
+ { Name{"Mali-T760 "}, Params{ 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 4 } },
+ { kDeviceNameDefault , Params{ 32, 2, 8, 8, 32, 8, 8, 32, 1, 1, 0, 0, 4, 4 } },
} },
}
},
diff --git a/src/database/kernels/xgemm/xgemm_6464.hpp b/src/database/kernels/xgemm/xgemm_6464.hpp
index 9e392aec..9468245e 100644
--- a/src/database/kernels/xgemm/xgemm_6464.hpp
+++ b/src/database/kernels/xgemm/xgemm_6464.hpp
@@ -51,8 +51,8 @@ const DatabaseEntry XgemmComplexDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } },
- { kDeviceNameDefault , Params{ 32, 2, 16, 16, 16, 16, 16, 16, 1, 1, 0, 0, 1, 1 } },
+ { Name{"Mali-T760 "}, Params{ 32, 2, 16, 16, 16, 8, 8, 16, 1, 1, 0, 0, 1, 2 } },
+ { kDeviceNameDefault , Params{ 32, 2, 16, 16, 16, 8, 8, 16, 1, 1, 0, 0, 1, 2 } },
} },
}
},
diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp
index b83382b8..9bdc0537 100644
--- a/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp
+++ b/src/database/kernels/xgemm_direct/xgemm_direct_16.hpp
@@ -26,8 +26,9 @@ const DatabaseEntry XgemmDirectHalf = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 8, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 2, 1, 32, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 4, 32, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 4, 32, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp
index 7f1cbd10..bfbbe400 100644
--- a/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp
+++ b/src/database/kernels/xgemm_direct/xgemm_direct_32.hpp
@@ -43,8 +43,9 @@ const DatabaseEntry XgemmDirectSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 2, 2, 32, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 2, 2, 32, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 2, 2, 32, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp
index 7c3c5a14..e91da6ba 100644
--- a/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp
+++ b/src/database/kernels/xgemm_direct/xgemm_direct_3232.hpp
@@ -39,8 +39,8 @@ const DatabaseEntry XgemmDirectComplexSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 16, 16, 16, 16, 16, 1, 0, 1, 1, 16, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 16, 16, 16, 16, 16, 1, 0, 1, 1, 16, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 8, 8, 8, 8, 16, 1, 0, 2, 1, 16, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 8, 8, 8, 8, 16, 1, 0, 2, 1, 16, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp
index 54e53583..7b814e9c 100644
--- a/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp
+++ b/src/database/kernels/xgemm_direct/xgemm_direct_64.hpp
@@ -35,8 +35,8 @@ const DatabaseEntry XgemmDirectDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 16, 16, 16, 16, 16, 1, 0, 1, 1, 16, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 16, 16, 16, 16, 16, 1, 0, 1, 1, 16, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } },
} },
}
},
@@ -85,7 +85,7 @@ const DatabaseEntry XgemmDirectDouble = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 2, 16, 16, 8, 8, 1, 1, 1, 2, 32, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 2, 16, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp
index c6356469..814ebdb4 100644
--- a/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp
+++ b/src/database/kernels/xgemm_direct/xgemm_direct_6464.hpp
@@ -35,8 +35,8 @@ const DatabaseEntry XgemmDirectComplexDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 2, 16, 16, 16, 16, 1, 1, 1, 1, 16, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 2, 8, 8, 8, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 2, 1, 16, 0, 0, 0, 0 } },
} },
}
},
@@ -85,7 +85,7 @@ const DatabaseEntry XgemmDirectComplexDouble = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 2, 16, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 2, 8, 8, 8, 8, 1, 1, 1, 1, 16, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemv/xgemv_32.hpp b/src/database/kernels/xgemv/xgemv_32.hpp
index 6252e2e1..546b6767 100644
--- a/src/database/kernels/xgemv/xgemv_32.hpp
+++ b/src/database/kernels/xgemv/xgemv_32.hpp
@@ -59,8 +59,9 @@ const DatabaseEntry XgemvSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemv/xgemv_3232.hpp b/src/database/kernels/xgemv/xgemv_3232.hpp
index c8c1c749..2e4621f5 100644
--- a/src/database/kernels/xgemv/xgemv_3232.hpp
+++ b/src/database/kernels/xgemv/xgemv_3232.hpp
@@ -59,8 +59,8 @@ const DatabaseEntry XgemvComplexSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemv/xgemv_64.hpp b/src/database/kernels/xgemv/xgemv_64.hpp
index a314e7ac..24d44149 100644
--- a/src/database/kernels/xgemv/xgemv_64.hpp
+++ b/src/database/kernels/xgemv/xgemv_64.hpp
@@ -51,8 +51,8 @@ const DatabaseEntry XgemvDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 64, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemv/xgemv_6464.hpp b/src/database/kernels/xgemv/xgemv_6464.hpp
index 835c7da1..265cdf1a 100644
--- a/src/database/kernels/xgemv/xgemv_6464.hpp
+++ b/src/database/kernels/xgemv/xgemv_6464.hpp
@@ -51,8 +51,8 @@ const DatabaseEntry XgemvComplexDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 32, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp
index c0a1388f..a4d36c22 100644
--- a/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp
+++ b/src/database/kernels/xgemv_fast/xgemv_fast_32.hpp
@@ -59,8 +59,9 @@ const DatabaseEntry XgemvFastSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 2, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 2, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 4, 32, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 4, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 4, 16, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp
index 0348d0f2..792d44ff 100644
--- a/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp
+++ b/src/database/kernels/xgemv_fast/xgemv_fast_3232.hpp
@@ -59,8 +59,8 @@ const DatabaseEntry XgemvFastComplexSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 2, 32, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 2, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 2, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp
index be274fee..69f5c1f5 100644
--- a/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp
+++ b/src/database/kernels/xgemv_fast/xgemv_fast_64.hpp
@@ -51,8 +51,8 @@ const DatabaseEntry XgemvFastDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 2, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 2, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp
index cb81869f..6a5ce6d2 100644
--- a/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp
+++ b/src/database/kernels/xgemv_fast/xgemv_fast_6464.hpp
@@ -51,8 +51,8 @@ const DatabaseEntry XgemvFastComplexDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 2, 64, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 1, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp
index 0b937e40..f7ce153a 100644
--- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp
+++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_32.hpp
@@ -43,7 +43,8 @@ const DatabaseEntry XgemvFastRotSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 8, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 8, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 2, 128, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
{ kDeviceNameDefault , Params{ 8, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp
index 68509d0c..59eee821 100644
--- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp
+++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_3232.hpp
@@ -43,8 +43,8 @@ const DatabaseEntry XgemvFastRotComplexSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 1, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 1, 32, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 4, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 4, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp
index af857e0e..b587c501 100644
--- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp
+++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_64.hpp
@@ -35,8 +35,8 @@ const DatabaseEntry XgemvFastRotDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 2, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 2, 32, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 4, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 4, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp
index de661153..9841eee1 100644
--- a/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp
+++ b/src/database/kernels/xgemv_fast_rot/xgemv_fast_rot_6464.hpp
@@ -75,7 +75,7 @@ const DatabaseEntry XgemvFastRotComplexDouble = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 2, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 4, 16, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xger/xger_16.hpp b/src/database/kernels/xger/xger_16.hpp
index dee3be5c..26f54f1a 100644
--- a/src/database/kernels/xger/xger_16.hpp
+++ b/src/database/kernels/xger/xger_16.hpp
@@ -26,8 +26,8 @@ const DatabaseEntry XgerHalf = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T760 "}, Params{ 64, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 64, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 4, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 4, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
@@ -51,7 +51,7 @@ const DatabaseEntry XgerHalf = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 64, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 4, 64, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xger/xger_32.hpp b/src/database/kernels/xger/xger_32.hpp
index 31d996e2..e38259ed 100644
--- a/src/database/kernels/xger/xger_32.hpp
+++ b/src/database/kernels/xger/xger_32.hpp
@@ -59,9 +59,9 @@ const DatabaseEntry XgerSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 64, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 4, 8, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T628 "}, Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 32, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 32, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xger/xger_3232.hpp b/src/database/kernels/xger/xger_3232.hpp
index b7c83659..c26dff68 100644
--- a/src/database/kernels/xger/xger_3232.hpp
+++ b/src/database/kernels/xger/xger_3232.hpp
@@ -59,9 +59,8 @@ const DatabaseEntry XgerComplexSingle = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 128, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 16, 16, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 4, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 256, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
@@ -139,7 +138,7 @@ const DatabaseEntry XgerComplexSingle = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 64, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xger/xger_64.hpp b/src/database/kernels/xger/xger_64.hpp
index d67be672..04c4ac56 100644
--- a/src/database/kernels/xger/xger_64.hpp
+++ b/src/database/kernels/xger/xger_64.hpp
@@ -51,9 +51,8 @@ const DatabaseEntry XgerDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 64, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 8, 1, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 64, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 64, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
@@ -110,7 +109,7 @@ const DatabaseEntry XgerDouble = {
{ // Default
kDeviceTypeAll, "default", {
{ "default", {
- { kDeviceNameDefault , Params{ 128, 1, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 128, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/database/kernels/xger/xger_6464.hpp b/src/database/kernels/xger/xger_6464.hpp
index 59759994..6b5b8644 100644
--- a/src/database/kernels/xger/xger_6464.hpp
+++ b/src/database/kernels/xger/xger_6464.hpp
@@ -51,9 +51,8 @@ const DatabaseEntry XgerComplexDouble = {
{ // ARM GPUs
kDeviceTypeGPU, "ARM", {
{ "default", {
- { Name{"Mali-T628 "}, Params{ 64, 2, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { Name{"Mali-T760 "}, Params{ 4, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
- { kDeviceNameDefault , Params{ 4, 8, 4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { Name{"Mali-T760 "}, Params{ 64, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
+ { kDeviceNameDefault , Params{ 64, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 } },
} },
}
},
diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp
index 6243d196..a3b90126 100644
--- a/src/utilities/compile.cpp
+++ b/src/utilities/compile.cpp
@@ -79,8 +79,7 @@ Program CompileFromSource(const std::string &source_string, const Precision prec
// Runs a pre-processor to unroll loops and perform array-to-register promotion. Most OpenCL
// compilers do this, but some don't.
auto do_run_preprocessor = false;
- if (run_preprocessor == 0) { do_run_preprocessor = (device.IsARM() && device.IsGPU()) ||
- (device.IsQualcomm() && device.IsGPU()); }
+ if (run_preprocessor == 0) { do_run_preprocessor = (device.IsARM() && device.IsGPU()); }
if (run_preprocessor == 1) { do_run_preprocessor = true; }
auto kernel_string = header_string + source_string;
if (do_run_preprocessor) {