diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-03-10 10:54:44 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-03-10 10:54:44 +0100 |
commit | 0bdc51e47c97ef8c810d89cc8b2307bea4cbe852 (patch) | |
tree | 93b34336fb55a9c5b36f46b6a5176fee83dcfdc6 | |
parent | 6397e6174696b1a2ac6f4f0698e0f67e48891721 (diff) |
Completed the API for all tuneable kernels
-rw-r--r-- | include/clblast.h | 12 | ||||
-rw-r--r-- | src/tuning/tuning_api.cpp | 42 |
2 files changed, 54 insertions, 0 deletions
diff --git a/include/clblast.h b/include/clblast.h index c5d2ddc2..34efc746 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -722,6 +722,14 @@ StatusCode PUBLIC_API TuneXger(cl_command_queue* queue, const size_t m, const si const double fraction, std::unordered_map<std::string,size_t> ¶meters); template <typename T> +StatusCode PUBLIC_API TuneXgemm(cl_command_queue* queue, const size_t m, const size_t n, const size_t k, + const double fraction, std::unordered_map<std::string,size_t> ¶meters); + +template <typename T> +StatusCode PUBLIC_API TuneXgemmDirect(cl_command_queue* queue, const size_t m, const size_t n, const size_t k, + const double fraction, std::unordered_map<std::string,size_t> ¶meters); + +template <typename T> StatusCode PUBLIC_API TuneCopy(cl_command_queue* queue, const size_t m, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters); @@ -737,6 +745,10 @@ template <typename T> StatusCode PUBLIC_API TunePadtranspose(cl_command_queue* queue, const size_t m, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters); +template <typename T> +StatusCode PUBLIC_API TuneInvert(cl_command_queue* queue, const size_t m, const size_t n, const size_t k, + const double fraction, std::unordered_map<std::string,size_t> ¶meters); + // ================================================================================================= } // namespace clblast diff --git a/src/tuning/tuning_api.cpp b/src/tuning/tuning_api.cpp index 61cb0389..4ffb46c2 100644 --- a/src/tuning/tuning_api.cpp +++ b/src/tuning/tuning_api.cpp @@ -91,6 +91,34 @@ template StatusCode TuneXger<float2>(RawCommandQueue*, const size_t, const size_ template StatusCode TuneXger<double2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); template <typename T> +StatusCode TuneXgemm(RawCommandQueue * queue, const size_t m, const size_t n, const size_t k, + const double fraction, std::unordered_map<std::string,size_t> ¶meters) { + auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; args.k = k; + auto queue_cpp = Queue(*queue); + return TunerAPI<T>(queue_cpp, args, 2, GetTunerDefaults, GetTunerSettings<T>, + TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); +} +template StatusCode TuneXgemm<half>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXgemm<float>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXgemm<double>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXgemm<float2>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXgemm<double2>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); + +template <typename T> +StatusCode TuneXgemmDirect(RawCommandQueue * queue, const size_t m, const size_t n, const size_t k, + const double fraction, std::unordered_map<std::string,size_t> ¶meters) { + auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; args.k = k; + auto queue_cpp = Queue(*queue); + return TunerAPI<T>(queue_cpp, args, 2, GetTunerDefaults, GetTunerSettings<T>, + TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); +} +template StatusCode TuneXgemmDirect<half>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXgemmDirect<float>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXgemmDirect<double>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXgemmDirect<float2>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneXgemmDirect<double2>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); + +template <typename T> StatusCode TuneCopy(RawCommandQueue * queue, const size_t m, const size_t n, const double fraction, std::unordered_map<std::string,size_t> ¶meters) { auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; @@ -146,6 +174,20 @@ template StatusCode TunePadtranspose<double>(RawCommandQueue*, const size_t, con template StatusCode TunePadtranspose<float2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); template StatusCode TunePadtranspose<double2>(RawCommandQueue*, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template <typename T> +StatusCode TuneInvert(RawCommandQueue * queue, const size_t m, const size_t n, const size_t k, + const double fraction, std::unordered_map<std::string,size_t> ¶meters) { + auto args = Arguments<T>(); args.fraction = fraction; args.m = m; args.n = n; args.k = k; + auto queue_cpp = Queue(*queue); + return TunerAPI<T>(queue_cpp, args, 0, GetTunerDefaults, GetTunerSettings<T>, + TestValidArguments<T>, SetConstraints, SetArguments<T>, parameters); +} +template StatusCode TuneInvert<half>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneInvert<float>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneInvert<double>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneInvert<float2>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); +template StatusCode TuneInvert<double2>(RawCommandQueue*, const size_t, const size_t, const size_t, const double, std::unordered_map<std::string,size_t>&); + // ================================================================================================= // The main tuner API, similar to the one in tuning.cpp, but without I/O |