summaryrefslogtreecommitdiff
path: root/src/tuning/tuning.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/tuning/tuning.cc')
-rw-r--r--src/tuning/tuning.cc249
1 files changed, 0 insertions, 249 deletions
diff --git a/src/tuning/tuning.cc b/src/tuning/tuning.cc
deleted file mode 100644
index 2dcb11d5..00000000
--- a/src/tuning/tuning.cc
+++ /dev/null
@@ -1,249 +0,0 @@
-
-// =================================================================================================
-// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
-// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
-// width of 100 characters per line.
-//
-// Author(s):
-// Cedric Nugteren <www.cedricnugteren.nl>
-//
-// This file implements the common auto-tuning code to interface with the CLTune library.
-//
-// =================================================================================================
-
-#include <string>
-#include <vector>
-
-#include "internal/utilities.h"
-#include "internal/tuning.h"
-
-namespace clblast {
-// =================================================================================================
-
-// Function to get command-line argument, set-up the input buffers, configure the tuner, and collect
-// the results. Used for vector-vector routines.
-template <typename T>
-void TunerXY(int argc, char* argv[], const Tuner2<T> &tune_function) {
-
- // Sets the parameters and platform/device for which to tune (command-line options)
- auto help = std::string{"* Options given/available:\n"};
- auto args = Arguments<T>{};
- args.platform_id = GetArgument(argc, argv, help, kArgPlatform, size_t{0});
- args.device_id = GetArgument(argc, argv, help, kArgDevice, size_t{0});
- args.precision = GetArgument(argc, argv, help, kArgPrecision, Precision::kSingle);
- args.n = GetArgument(argc, argv, help, kArgN, size_t{4096*1024});
- args.alpha = GetArgument(argc, argv, help, kArgAlpha, GetScalar<T>());
- fprintf(stdout, "%s\n", help.c_str());
-
- // Creates input buffers with random data
- auto x_vec = std::vector<T>(args.n);
- auto y_vec = std::vector<T>(args.n);
- PopulateVector(x_vec);
- PopulateVector(y_vec);
-
- // Initializes the tuner for the chosen device
- cltune::Tuner tuner(args.platform_id, args.device_id);
-
- // Use full-search to explore all parameter combinations.
- tuner.UseFullSearch();
-
- // Configures the tuning parameters (kernel specific)
- tune_function(args, x_vec, y_vec, tuner);
-
- // Starts the tuning process
- tuner.Tune();
-
- // Prints the results to screen
- auto time_ms = tuner.PrintToScreen();
- tuner.PrintFormatted();
-
- // Also prints the performance of the best-case in terms of GB/s
- const auto mega_bytes = (3*args.n*GetBytes(args.precision)) * 1.0e-6;
- if (time_ms != 0.0) {
- printf("[ -------> ] %.1lf ms or %.1lf GB/s\n", time_ms, mega_bytes/time_ms);
- }
-}
-
-// Compiles the above function
-template void TunerXY<float>(int, char**, const Tuner2<float>&);
-template void TunerXY<double>(int, char**, const Tuner2<double>&);
-template void TunerXY<float2>(int, char**, const Tuner2<float2>&);
-template void TunerXY<double2>(int, char**, const Tuner2<double2>&);
-
-// =================================================================================================
-
-// Function to get command-line argument, set-up the input buffers, configure the tuner, and collect
-// the results. Used for matrix-vector-vector routines.
-template <typename T>
-void TunerAXY(int argc, char* argv[], const size_t num_variations,
- const Tuner3V<T> &tune_function) {
-
- // Sets the parameters and platform/device for which to tune (command-line options)
- auto help = std::string{"* Options given/available:\n"};
- auto args = Arguments<T>{};
- args.platform_id = GetArgument(argc, argv, help, kArgPlatform, size_t{0});
- args.device_id = GetArgument(argc, argv, help, kArgDevice, size_t{0});
- args.precision = GetArgument(argc, argv, help, kArgPrecision, Precision::kSingle);
- args.m = GetArgument(argc, argv, help, kArgM, size_t{2048});
- args.n = GetArgument(argc, argv, help, kArgN, size_t{2048});
- args.alpha = GetArgument(argc, argv, help, kArgAlpha, GetScalar<T>());
- args.beta = GetArgument(argc, argv, help, kArgBeta, GetScalar<T>());
- fprintf(stdout, "%s\n", help.c_str());
-
- // Creates input buffers with random data
- auto a_mat = std::vector<T>(args.m * args.n);
- auto x_vec = std::vector<T>(args.n);
- auto y_vec = std::vector<T>(args.m);
- PopulateVector(a_mat);
- PopulateVector(x_vec);
- PopulateVector(y_vec);
-
- // Loop over the different variations of the kernel
- for (auto variation=size_t{1}; variation<=num_variations; ++variation) {
-
- // Initializes the tuner for the chosen device
- cltune::Tuner tuner(args.platform_id, args.device_id);
-
- // Use full-search to explore all parameter combinations.
- tuner.UseFullSearch();
-
- // Configures the tuning parameters (kernel specific)
- tune_function(args, variation, a_mat, x_vec, y_vec, tuner);
-
- // Starts the tuning process
- tuner.Tune();
-
- // Prints the results to screen
- auto time_ms = tuner.PrintToScreen();
- tuner.PrintFormatted();
-
- // Also prints the performance of the best-case in terms of GB/s and GFLOPS
- const auto mega_bytes = ((args.m*args.n + 2*args.m + args.n)*GetBytes(args.precision)) * 1.0e-6;
- const auto mega_flops = (2*args.m*args.n) * 1.0e-6;
- if (time_ms != 0.0) {
- printf("[ -------> ] %.1lf ms or %.1lf GB/s or %.1lf GFLOPS\n",
- time_ms, mega_bytes/time_ms, mega_flops/time_ms);
- }
- }
-}
-
-// Compiles the above function
-template void TunerAXY<float>(int, char**, const size_t, const Tuner3V<float>&);
-template void TunerAXY<double>(int, char**, const size_t, const Tuner3V<double>&);
-template void TunerAXY<float2>(int, char**, const size_t, const Tuner3V<float2>&);
-template void TunerAXY<double2>(int, char**, const size_t, const Tuner3V<double2>&);
-
-// =================================================================================================
-
-// Function to get command-line argument, set-up the input buffers, configure the tuner, and collect
-// the results. Used for matrix-matrix routines.
-template <typename T>
-void TunerAB(int argc, char* argv[], const Tuner2<T> &tune_function) {
-
- // Sets the parameters and platform/device for which to tune (command-line options)
- auto help = std::string{"* Options given/available:\n"};
- auto args = Arguments<T>{};
- args.platform_id = GetArgument(argc, argv, help, kArgPlatform, size_t{0});
- args.device_id = GetArgument(argc, argv, help, kArgDevice, size_t{0});
- args.precision = GetArgument(argc, argv, help, kArgPrecision, Precision::kSingle);
- args.m = GetArgument(argc, argv, help, kArgM, size_t{1024});
- args.n = GetArgument(argc, argv, help, kArgN, size_t{1024});
- args.fraction = GetArgument(argc, argv, help, kArgFraction, 2048.0);
- fprintf(stdout, "%s\n", help.c_str());
-
- // Creates input buffers with random data
- auto a_mat = std::vector<T>(args.m * args.n);
- auto b_mat = std::vector<T>(args.m * args.n);
- PopulateVector(a_mat);
- PopulateVector(b_mat);
-
- // Initializes the tuner for the chosen device
- cltune::Tuner tuner(args.platform_id, args.device_id);
-
- // Use full-search to explore all parameter combinations.
- tuner.UseFullSearch();
-
- // Configures the tuning parameters (kernel specific)
- tune_function(args, a_mat, b_mat, tuner);
-
- // Starts the tuning process
- tuner.Tune();
-
- // Prints the results to screen
- auto time_ms = tuner.PrintToScreen();
- tuner.PrintFormatted();
-
- // Also prints the performance of the best-case in terms of GB/s
- const auto mega_bytes = (2*args.m*args.n*GetBytes(args.precision)) * 1.0e-6;
- if (time_ms != 0.0) {
- printf("[ -------> ] %.1lf ms or %.1lf GB/s\n", time_ms, mega_bytes/time_ms);
- }
-}
-
-// Compiles the above function
-template void TunerAB<float>(int, char**, const Tuner2<float>&);
-template void TunerAB<double>(int, char**, const Tuner2<double>&);
-template void TunerAB<float2>(int, char**, const Tuner2<float2>&);
-template void TunerAB<double2>(int, char**, const Tuner2<double2>&);
-
-// =================================================================================================
-
-// Function to get command-line argument, set-up the input buffers, configure the tuner, and collect
-// the results. Used for matrix-matrix-matrix routines.
-template <typename T>
-void TunerABC(int argc, char* argv[], const Tuner3<T> &tune_function) {
-
- // Sets the parameters and platform/device for which to tune (command-line options)
- auto help = std::string{"* Options given/available:\n"};
- auto args = Arguments<T>{};
- args.platform_id = GetArgument(argc, argv, help, kArgPlatform, size_t{0});
- args.device_id = GetArgument(argc, argv, help, kArgDevice, size_t{0});
- args.precision = GetArgument(argc, argv, help, kArgPrecision, Precision::kSingle);
- args.m = GetArgument(argc, argv, help, kArgM, size_t{1024});
- args.n = GetArgument(argc, argv, help, kArgN, size_t{1024});
- args.k = GetArgument(argc, argv, help, kArgK, size_t{1024});
- args.alpha = GetArgument(argc, argv, help, kArgAlpha, GetScalar<T>());
- args.beta = GetArgument(argc, argv, help, kArgBeta, GetScalar<T>());
- args.fraction = GetArgument(argc, argv, help, kArgFraction, 2048.0);
- fprintf(stdout, "%s\n", help.c_str());
-
- // Creates input buffers with random data
- auto a_mat = std::vector<T>(args.m * args.k);
- auto b_mat = std::vector<T>(args.n * args.k);
- auto c_mat = std::vector<T>(args.m * args.n);
- PopulateVector(a_mat);
- PopulateVector(b_mat);
- PopulateVector(c_mat);
-
- // Initializes the tuner for the chosen device
- cltune::Tuner tuner(args.platform_id, args.device_id);
-
- // Use random-search to search only a part of the parameter values. The fraction of the search-
- // space to explore is set as a command-line argument.
- tuner.UseRandomSearch(1.0/args.fraction);
-
- // Configures the tuning parameters (kernel specific)
- tune_function(args, a_mat, b_mat, c_mat, tuner);
-
- // Starts the tuning process
- tuner.Tune();
-
- // Prints the results to screen
- auto time_ms = tuner.PrintToScreen();
- tuner.PrintFormatted();
-
- // Also prints the performance of the best-case in terms of GFLOPS
- const auto mega_flops = (2*args.m*args.n*args.k) * 1.0e-6;
- if (time_ms != 0.0) {
- printf("[ -------> ] %.1lf ms or %.1lf GFLOPS\n", time_ms, mega_flops/time_ms);
- }
-}
-
-// Compiles the above function
-template void TunerABC<float>(int, char**, const Tuner3<float>&);
-template void TunerABC<double>(int, char**, const Tuner3<double>&);
-template void TunerABC<float2>(int, char**, const Tuner3<float2>&);
-template void TunerABC<double2>(int, char**, const Tuner3<double2>&);
-
-// =================================================================================================
-} // namespace clblast