diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-03-09 21:40:22 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-03-09 21:40:22 +0100 |
commit | 6397e6174696b1a2ac6f4f0698e0f67e48891721 (patch) | |
tree | 0dc452bf166bb4e95373eb49eff9e9129f48d29e | |
parent | 49cc8b31ff65a6fff29146d98b16f717e0a84213 (diff) |
Added several more tuner API functions
-rw-r--r-- | include/clblast.h | 32 | ||||
-rw-r--r-- | samples/tuning_api.cpp | 4 | ||||
-rw-r--r-- | src/tuning/tuning_api.cpp | 126 |
3 files changed, 146 insertions, 16 deletions
diff --git a/include/clblast.h b/include/clblast.h index d6118e19..c5d2ddc2 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -706,8 +706,36 @@ StatusCode PUBLIC_API OverrideParameters(const cl_device_id device, const std::s // ================================================================================================= template <typename T> -StatusCode PUBLIC_API TuneCopyMatrixFast(cl_command_queue* queue, const size_t m, const size_t n, - const double fraction, std::unordered_map<std::string,size_t> ¶meters); +StatusCode PUBLIC_API TuneXaxpy(cl_command_queue* queue, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters); + +template <typename T> +StatusCode PUBLIC_API TuneXdot(cl_command_queue* queue, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters); + +template <typename T> +StatusCode PUBLIC_API TuneXgemv(cl_command_queue* queue, const size_t m, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters); + +template <typename T> +StatusCode PUBLIC_API TuneXger(cl_command_queue* queue, const size_t m, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters); + +template <typename T> +StatusCode PUBLIC_API TuneCopy(cl_command_queue* queue, const size_t m, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters); + +template <typename T> +StatusCode PUBLIC_API TunePad(cl_command_queue* queue, const size_t m, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters); + +template <typename T> +StatusCode PUBLIC_API TuneTranspose(cl_command_queue* queue, const size_t m, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters); + +template <typename T> +StatusCode PUBLIC_API TunePadtranspose(cl_command_queue* queue, const size_t m, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters); // ================================================================================================= diff --git a/samples/tuning_api.cpp b/samples/tuning_api.cpp index 3c9ba876..f92b6909 100644 --- a/samples/tuning_api.cpp +++ b/samples/tuning_api.cpp @@ -60,10 +60,10 @@ int main() { printf("Starting the tuning...\n"); std::unordered_map<std::string,size_t> parameters; auto queue_plain = queue(); - auto status = clblast::TuneCopyMatrixFast<float>(&queue_plain, m, n, fraction, parameters); + auto status = clblast::TuneCopy<float>(&queue_plain, m, n, fraction, parameters); // Tuning completed. See "clblast.h" for status codes (0 -> success). - printf("Completed TuneCopyMatrixFast with status %d (0 == OK), found parameters:\n", static_cast<int>(status)); + printf("Completed TuneCopy with status %d (0 == OK), found parameters:\n", static_cast<int>(status)); for (const auto parameter: parameters) { printf("> %s = %zu\n", parameter.first.c_str(), parameter.second); } diff --git a/src/tuning/tuning_api.cpp b/src/tuning/tuning_api.cpp index 94a9a367..61cb0389 100644 --- a/src/tuning/tuning_api.cpp +++ b/src/tuning/tuning_api.cpp @@ -26,23 +26,125 @@ namespace clblast { // ================================================================================================= template <typename T> -StatusCode TuneCopyMatrixFast(RawCommandQueue * queue, const size_t m, const size_t n, - const double fraction, std::unordered_map<std::string,size_t> ¶meters) { - auto args = Arguments<T>(); - args.m = m; - args.n = n; - args.fraction = fraction; +StatusCode TuneXaxpy(RawCommandQueue * queue, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters) { + auto args = Arguments<T>(); args.fraction = fraction; args.n = n; auto queue_cpp = Queue(*queue); return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>, TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); } +template StatusCode TuneXaxpy<half>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXaxpy<float>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXaxpy<double>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXaxpy<float2>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXaxpy<double2>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&); -// Compiles the above -template StatusCode TuneCopyMatrixFast<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); -template StatusCode TuneCopyMatrixFast<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); -template StatusCode TuneCopyMatrixFast<double>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); -template StatusCode TuneCopyMatrixFast<float2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); -template StatusCode TuneCopyMatrixFast<double2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template <typename T> +StatusCode TuneXdot(RawCommandQueue * queue, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters) { + auto args = Arguments<T>(); args.fraction = fraction; args.n = n; + auto queue_cpp = Queue(*queue); + auto status = TunerAPI<T>(queue_cpp, args, 1, GetTunerDefaults, GetTunerSettings<T>, + TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + if (status != StatusCode::kSuccess) { return status; } + return TunerAPI<T>(queue_cpp, args, 2, GetTunerDefaults, GetTunerSettings<T>, + TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); +} +template StatusCode TuneXdot<half>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXdot<float>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXdot<double>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXdot<float2>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXdot<double2>(RawCommandQueue*, const size_t, const double, std::unordered_map<std::string,size_t>&); + +template <typename T> +StatusCode TuneXgemv(RawCommandQueue * queue, const size_t m, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters) { + auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; + auto queue_cpp = Queue(*queue); + auto status = TunerAPI<T>(queue_cpp, args, 1, GetTunerDefaults, GetTunerSettings<T>, + TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + if (status != StatusCode::kSuccess) { return status; } + status = TunerAPI<T>(queue_cpp, args, 2, GetTunerDefaults, GetTunerSettings<T>, + TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); + if (status != StatusCode::kSuccess) { return status; } + return TunerAPI<T>(queue_cpp, args, 3, GetTunerDefaults, GetTunerSettings<T>, + TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); +} +template StatusCode TuneXgemv<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXgemv<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXgemv<double>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXgemv<float2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXgemv<double2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); + +template <typename T> +StatusCode TuneXger(RawCommandQueue * queue, const size_t m, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters) { + auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; + auto queue_cpp = Queue(*queue); + return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>, + TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); +} +template StatusCode TuneXger<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXger<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXger<double>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXger<float2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXger<double2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); + +template <typename T> +StatusCode TuneCopy(RawCommandQueue * queue, const size_t m, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters) { + auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; + auto queue_cpp = Queue(*queue); + return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>, + TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); +} +template StatusCode TuneCopy<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneCopy<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneCopy<double>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneCopy<float2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneCopy<double2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); + +template <typename T> +StatusCode TunePad(RawCommandQueue * queue, const size_t m, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters) { + auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; + auto queue_cpp = Queue(*queue); + return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>, + TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); +} +template StatusCode TunePad<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TunePad<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TunePad<double>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TunePad<float2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TunePad<double2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); + +template <typename T> +StatusCode TuneTranspose(RawCommandQueue * queue, const size_t m, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters) { + auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; + auto queue_cpp = Queue(*queue); + return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>, + TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); +} +template StatusCode TuneTranspose<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneTranspose<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneTranspose<double>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneTranspose<float2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneTranspose<double2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); + +template <typename T> +StatusCode TunePadtranspose(RawCommandQueue * queue, const size_t m, const size_t n, + const double fraction, std::unordered_map<std::string,size_t> ¶meters) { + auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; + auto queue_cpp = Queue(*queue); + return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>, + TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); +} +template StatusCode TunePadtranspose<half>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TunePadtranspose<float>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TunePadtranspose<double>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TunePadtranspose<float2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TunePadtranspose<double2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); // ================================================================================================= |