diff options
author | cnugteren <web@cedricnugteren.nl> | 2016-04-20 21:11:33 -0600 |
---|---|---|
committer | cnugteren <web@cedricnugteren.nl> | 2016-04-20 21:11:33 -0600 |
commit | 894983fc3c7c57ffc48c21523641694cde318eca (patch) | |
tree | e1c5e36923448fa6c89014216308bce631c24a6e | |
parent | 5a4f8217be97575daf4f0f97d8ae7f8cf7bbbcd0 (diff) |
Added prototype for ixAMAX routines
-rw-r--r-- | include/clblast.h | 7 | ||||
-rw-r--r-- | include/clblast_c.h | 18 | ||||
-rw-r--r-- | scripts/generator/generator.py | 7 | ||||
-rw-r--r-- | scripts/generator/routine.py | 2 | ||||
-rw-r--r-- | src/clblast.cc | 25 | ||||
-rw-r--r-- | src/clblast_c.cc | 42 | ||||
-rw-r--r-- | test/correctness/routines/level1/xamax.cc | 28 | ||||
-rw-r--r-- | test/performance/routines/level1/xamax.cc | 35 | ||||
-rw-r--r-- | test/wrapper_cblas.h | 26 | ||||
-rw-r--r-- | test/wrapper_clblas.h | 68 |
10 files changed, 256 insertions, 2 deletions
diff --git a/include/clblast.h b/include/clblast.h index cf55a256..57948581 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -188,6 +188,13 @@ StatusCode Asum(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event = nullptr); +// Index of absolute maxium value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX +template <typename T> +StatusCode Amax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event = nullptr); + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= diff --git a/include/clblast_c.h b/include/clblast_c.h index 190e6a46..92f4afe5 100644 --- a/include/clblast_c.h +++ b/include/clblast_c.h @@ -296,6 +296,24 @@ StatusCode PUBLIC_API CLBlastDzasum(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, cl_command_queue* queue, cl_event* event); +// Index of absolute maxium value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX +StatusCode PUBLIC_API CLBlastiSamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastiDamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastiCamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); +StatusCode PUBLIC_API CLBlastiZamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event); + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index 9b1b2aa1..3a845686 100644 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -40,6 +40,10 @@ Z = DataType("Z", "Z", DBL2, [DBL2, DBL2, D2CL, D2CL], DBL2) # double-complex (6 # Special cases Sc = DataType("C", "Sc", FLT2, [FLT2, FLT2, FLT2, FLT2], FLT2) # As C, but with real output Dz = DataType("Z", "Dz", DBL2, [DBL2, DBL2, DBL2, DBL2], DBL2) # As Z, but with real output +iS = DataType("S", "iS", FLT, [FLT, FLT, FLT, FLT], FLT ) # As S, but with integer output +iD = DataType("D", "iD", DBL, [DBL, DBL, DBL, DBL], DBL ) # As D, but with integer output +iC = DataType("C", "iC", FLT2, [FLT2, FLT2, F2CL, F2CL], FLT2) # As C, but with integer output +iZ = DataType("Z", "iZ", DBL2, [DBL2, DBL2, D2CL, D2CL], DBL2) # As Z, but with integer output Css = DataType("C", "C", FLT, [FLT, FLT, FLT, FLT], FLT2) # As C, but with constants from S Zdd = DataType("Z", "Z", DBL, [DBL, DBL, DBL, DBL], DBL2) # As Z, but with constants from D Ccs = DataType("C", "C", FLT2+","+FLT, [FLT2, FLT, F2CL, FLT], FLT2) # As C, but with one constant from S @@ -68,6 +72,7 @@ routines = [ Routine(True, "1", "dotc", T, [C,Z], ["n"], [], ["x","y"], ["dot"], [], "n", "Dot product of two complex vectors, one conjugated"), Routine(True, "1", "nrm2", T, [S,D,Sc,Dz],["n"], [], ["x"], ["nrm2"], [], "2*n", "Euclidian norm of a vector"), Routine(True, "1", "asum", T, [S,D,Sc,Dz],["n"], [], ["x"], ["asum"], [], "n", "Absolute sum of values in a vector"), + Routine(False, "1", "amax", T, [iS,iD,iC,iZ],["n"], [], ["x"], ["imax"], [], "n", "Index of absolute maxium value in a vector"), ], [ # Level 2: matrix-vector Routine(True, "2a", "gemv", T, [S,D,C,Z], ["m","n"], ["layout","a_transpose"], ["a","x"], ["y"], ["alpha","beta"], "", "General matrix-vector multiplication"), @@ -369,7 +374,7 @@ for level in [1,2,3]: body += "using double2 = clblast::double2;\n\n" body += "// Main function (not within the clblast namespace)\n" body += "int main(int argc, char *argv[]) {\n" - default = PrecisionToFullName(routine.flavours[0].name) + default = PrecisionToFullName(routine.flavours[0].precision_name) body += " switch(clblast::GetPrecision(argc, argv, clblast::Precision::k"+default+")) {\n" for precision in ["H","S","D","C","Z"]: body += " case clblast::Precision::k"+PrecisionToFullName(precision)+":" diff --git a/scripts/generator/routine.py b/scripts/generator/routine.py index 7ddd7a12..9806d960 100644 --- a/scripts/generator/routine.py +++ b/scripts/generator/routine.py @@ -72,7 +72,7 @@ class Routine(): # List of scalar buffers def ScalarBuffersFirst(self): - return ["dot","nrm2","asum"] + return ["dot","nrm2","asum","imax"] def ScalarBuffersSecond(self): return ["sa","sb","sc","ss","sd1","sd2","sx1","sy1","sparam"] diff --git a/src/clblast.cc b/src/clblast.cc index 7210ad1d..bee63b53 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -428,6 +428,31 @@ template StatusCode PUBLIC_API Asum<double2>(const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +// Index of absolute maxium value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX +template <typename T> +StatusCode Amax(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*) { + return StatusCode::kNotImplemented; +} +template StatusCode PUBLIC_API Amax<float>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Amax<double>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Amax<float2>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Amax<double2>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= diff --git a/src/clblast_c.cc b/src/clblast_c.cc index c36edbca..23c96feb 100644 --- a/src/clblast_c.cc +++ b/src/clblast_c.cc @@ -475,6 +475,48 @@ StatusCode CLBlastDzasum(const size_t n, return static_cast<StatusCode>(status); } +// AMAX +StatusCode CLBlastiSamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Amax<float>(n, + imax_buffer, imax_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} +StatusCode CLBlastiDamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Amax<double>(n, + imax_buffer, imax_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} +StatusCode CLBlastiCamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Amax<float2>(n, + imax_buffer, imax_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} +StatusCode CLBlastiZamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Amax<double2>(n, + imax_buffer, imax_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= diff --git a/test/correctness/routines/level1/xamax.cc b/test/correctness/routines/level1/xamax.cc new file mode 100644 index 00000000..ade09e7a --- /dev/null +++ b/test/correctness/routines/level1/xamax.cc @@ -0,0 +1,28 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "correctness/testblas.h" +#include "routines/level1/xamax.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + clblast::RunTests<clblast::TestXamax<float>, float, float>(argc, argv, false, "iSAMAX"); + clblast::RunTests<clblast::TestXamax<double>, double, double>(argc, argv, true, "iDAMAX"); + clblast::RunTests<clblast::TestXamax<float2>, float2, float2>(argc, argv, true, "iCAMAX"); + clblast::RunTests<clblast::TestXamax<double2>, double2, double2>(argc, argv, true, "iZAMAX"); + return 0; +} + +// ================================================================================================= diff --git a/test/performance/routines/level1/xamax.cc b/test/performance/routines/level1/xamax.cc new file mode 100644 index 00000000..85caa483 --- /dev/null +++ b/test/performance/routines/level1/xamax.cc @@ -0,0 +1,35 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// ================================================================================================= + +#include "performance/client.h" +#include "routines/level1/xamax.h" + +// Shortcuts to the clblast namespace +using float2 = clblast::float2; +using double2 = clblast::double2; + +// Main function (not within the clblast namespace) +int main(int argc, char *argv[]) { + switch(clblast::GetPrecision(argc, argv, clblast::Precision::kSingle)) { + case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kSingle: + clblast::RunClient<clblast::TestXamax<float>, float, float>(argc, argv); break; + case clblast::Precision::kDouble: + clblast::RunClient<clblast::TestXamax<double>, double, double>(argc, argv); break; + case clblast::Precision::kComplexSingle: + clblast::RunClient<clblast::TestXamax<float2>, float2, float2>(argc, argv); break; + case clblast::Precision::kComplexDouble: + clblast::RunClient<clblast::TestXamax<double2>, double2, double2>(argc, argv); break; + } + return 0; +} + +// ================================================================================================= diff --git a/test/wrapper_cblas.h b/test/wrapper_cblas.h index af0eec9b..994b48b1 100644 --- a/test/wrapper_cblas.h +++ b/test/wrapper_cblas.h @@ -371,6 +371,32 @@ void cblasXasum(const size_t n, reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); } +// Forwards the Netlib BLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX +void cblasXamax(const size_t n, + std::vector<float>& imax_buffer, const size_t imax_offset, + const std::vector<float>& x_buffer, const size_t x_offset, const size_t x_inc) { + imax_buffer[imax_offset] = cblas_isamax(n, + &x_buffer[x_offset], static_cast<int>(x_inc)); +} +void cblasXamax(const size_t n, + std::vector<double>& imax_buffer, const size_t imax_offset, + const std::vector<double>& x_buffer, const size_t x_offset, const size_t x_inc) { + imax_buffer[imax_offset] = cblas_idamax(n, + &x_buffer[x_offset], static_cast<int>(x_inc)); +} +void cblasXamax(const size_t n, + std::vector<float2>& imax_buffer, const size_t imax_offset, + const std::vector<float2>& x_buffer, const size_t x_offset, const size_t x_inc) { + imax_buffer[imax_offset] = cblas_icamax(n, + reinterpret_cast<const float*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); +} +void cblasXamax(const size_t n, + std::vector<double2>& imax_buffer, const size_t imax_offset, + const std::vector<double2>& x_buffer, const size_t x_offset, const size_t x_inc) { + imax_buffer[imax_offset] = cblas_izamax(n, + reinterpret_cast<const double*>(&x_buffer[x_offset]), static_cast<int>(x_inc)); +} + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= diff --git a/test/wrapper_clblas.h b/test/wrapper_clblas.h index 09b3310b..955dc3ad 100644 --- a/test/wrapper_clblas.h +++ b/test/wrapper_clblas.h @@ -626,6 +626,74 @@ clblasStatus clblasXasum<double2>(const size_t n, num_queues, queues, num_wait_events, wait_events, events); } +// Forwards the clBLAS calls for iSAMAX/iDAMAX/iCAMAX/iZAMAX +template <typename T> +clblasStatus clblasXamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events); +template <> +clblasStatus clblasXamax<float>(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer<float>(context, n); + return clblasiSamax(n, + imax_buffer, imax_offset, + x_buffer, x_offset, static_cast<int>(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXamax<double>(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer<double>(context, n); + return clblasiDamax(n, + imax_buffer, imax_offset, + x_buffer, x_offset, static_cast<int>(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXamax<float2>(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer<float2>(context, n); + return clblasiCamax(n, + imax_buffer, imax_offset, + x_buffer, x_offset, static_cast<int>(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} +template <> +clblasStatus clblasXamax<double2>(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_uint num_queues, cl_command_queue *queues, + cl_uint num_wait_events, const cl_event *wait_events, cl_event *events) { + auto queue = Queue(queues[0]); + auto context = queue.GetContext(); + auto scratch_buffer = Buffer<double2>(context, n); + return clblasiZamax(n, + imax_buffer, imax_offset, + x_buffer, x_offset, static_cast<int>(x_inc), + scratch_buffer(), + num_queues, queues, num_wait_events, wait_events, events); +} + // ================================================================================================= // BLAS level-2 (matrix-vector) routines // ================================================================================================= |