summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2019-05-08 22:01:56 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2019-05-08 22:01:56 +0200
commit9cbffc9b7cbcfa6d6c8a048c45c1ad52d22effb3 (patch)
tree26b0bbb1f887e49d2eb02658322bf0624e9e68a8
parentc5a82f6978b3c459d70a04f0bf3904b424c867b5 (diff)
Changed back to cl_intel_subgroups as suggested
-rw-r--r--src/kernels/level3/xgemm_part1.opencl2
-rw-r--r--src/utilities/compile.cpp1
-rw-r--r--src/utilities/utilities.hpp1
3 files changed, 1 insertions, 3 deletions
diff --git a/src/kernels/level3/xgemm_part1.opencl b/src/kernels/level3/xgemm_part1.opencl
index 306280bc..9d46ab7e 100644
--- a/src/kernels/level3/xgemm_part1.opencl
+++ b/src/kernels/level3/xgemm_part1.opencl
@@ -127,7 +127,7 @@ R"(
// Intel subgroups (https://www.khronos.org/registry/OpenCL/extensions/intel/cl_intel_subgroups.html)
#if USE_SUBGROUP_SHUFFLING == 1 && SUBGROUP_SHUFFLING_INTEL == 1
- #pragma OPENCL EXTENSION cl_intel_device_side_avc_motion_estimation: enable
+ #pragma OPENCL EXTENSION cl_intel_subgroups: enable
#define SUBGROUP_SIZE 8 // Assumes subgroup size is always 8 on Intel GPUs
#endif
diff --git a/src/utilities/compile.cpp b/src/utilities/compile.cpp
index 46cef1d7..00cb90cb 100644
--- a/src/utilities/compile.cpp
+++ b/src/utilities/compile.cpp
@@ -60,7 +60,6 @@ std::shared_ptr<Program> CompileFromSource(
// For Intel GPUs with subgroup support, use subgroup shuffling.
if (device.IsGPU() && device.HasExtension(kKhronosIntelSubgroups) &&
- device.HasExtension(kKhronosIntelAvcMotionEstimation) &&
(precision == Precision::kSingle || precision == Precision::kHalf)) {
header_string += "#define USE_SUBGROUP_SHUFFLING 1\n";
header_string += "#define SUBGROUP_SHUFFLING_INTEL 1\n";
diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp
index 77221277..23486d35 100644
--- a/src/utilities/utilities.hpp
+++ b/src/utilities/utilities.hpp
@@ -48,7 +48,6 @@ using double2 = std::complex<double>;
const std::string kKhronosAttributesAMD = "cl_amd_device_attribute_query";
const std::string kKhronosAttributesNVIDIA = "cl_nv_device_attribute_query";
const std::string kKhronosIntelSubgroups = "cl_intel_subgroups";
-const std::string kKhronosIntelAvcMotionEstimation = "cl_intel_device_side_avc_motion_estimation";
// Catched an unknown error
constexpr auto kUnknownError = -999;