diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2017-10-28 17:32:37 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2017-10-28 17:32:37 +0200 |
commit | 12b08ae49154379f7471a40809ace6418857b387 (patch) | |
tree | ef958197db0bb8a67c9a5840f828b3f6c72bd8fc /test/routines/levelx | |
parent | 2949e156f5bfdd724987e67477da3e3608e4aaf9 (diff) | |
parent | fa6e5e67f585b77d34c3031c176de9a0f7904aa9 (diff) |
Merge branch 'master' into android_support
Diffstat (limited to 'test/routines/levelx')
-rw-r--r-- | test/routines/levelx/xaxpybatched.hpp | 25 | ||||
-rw-r--r-- | test/routines/levelx/xgemmbatched.hpp | 31 | ||||
-rw-r--r-- | test/routines/levelx/xim2col.hpp | 34 | ||||
-rw-r--r-- | test/routines/levelx/xinvert.hpp | 33 | ||||
-rw-r--r-- | test/routines/levelx/xomatcopy.hpp | 25 |
5 files changed, 103 insertions, 45 deletions
diff --git a/test/routines/levelx/xaxpybatched.hpp b/test/routines/levelx/xaxpybatched.hpp index 4a8fc564..e9715f4e 100644 --- a/test/routines/levelx/xaxpybatched.hpp +++ b/test/routines/levelx/xaxpybatched.hpp @@ -83,14 +83,23 @@ class TestXaxpyBatched { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = AxpyBatched(args.n, args.alphas.data(), - buffers.x_vec(), args.x_offsets.data(), args.x_inc, - buffers.y_vec(), args.y_offsets.data(), args.y_inc, - args.batch_count, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = AxpyBatched(args.n, args.alphas.data(), + buffers.x_vec(), args.x_offsets.data(), args.x_inc, + buffers.y_vec(), args.y_offsets.data(), args.y_inc, + args.batch_count, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = AxpyBatched(args.n, args.alphas.data(), + buffers.x_vec(), args.x_offsets.data(), args.x_inc, + buffers.y_vec(), args.y_offsets.data(), args.y_inc, + args.batch_count, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/levelx/xgemmbatched.hpp b/test/routines/levelx/xgemmbatched.hpp index 704d0578..2a8bd9d4 100644 --- a/test/routines/levelx/xgemmbatched.hpp +++ b/test/routines/levelx/xgemmbatched.hpp @@ -108,8 +108,6 @@ class TestXgemmBatched { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; // Relaxed requirement on ld_a and ld_b within the library, this is here to match clBLAS auto a_rotated = (args.layout == Layout::kColMajor && args.a_transpose != Transpose::kNo) || (args.layout == Layout::kRowMajor && args.a_transpose == Transpose::kNo); @@ -119,14 +117,27 @@ class TestXgemmBatched { auto b_one = (!b_rotated) ? args.k : args.n; if (args.a_ld < a_one) { return StatusCode::kInvalidLeadDimA; } if (args.b_ld < b_one) { return StatusCode::kInvalidLeadDimB; } - auto status = GemmBatched(args.layout, args.a_transpose, args.b_transpose, - args.m, args.n, args.k, args.alphas.data(), - buffers.a_mat(), args.a_offsets.data(), args.a_ld, - buffers.b_mat(), args.b_offsets.data(), args.b_ld, args.betas.data(), - buffers.c_mat(), args.c_offsets.data(), args.c_ld, - args.batch_count, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = GemmBatched(args.layout, args.a_transpose, args.b_transpose, + args.m, args.n, args.k, args.alphas.data(), + buffers.a_mat(), args.a_offsets.data(), args.a_ld, + buffers.b_mat(), args.b_offsets.data(), args.b_ld, args.betas.data(), + buffers.c_mat(), args.c_offsets.data(), args.c_ld, + args.batch_count, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = GemmBatched(args.layout, args.a_transpose, args.b_transpose, + args.m, args.n, args.k, args.alphas.data(), + buffers.a_mat(), args.a_offsets.data(), args.a_ld, + buffers.b_mat(), args.b_offsets.data(), args.b_ld, args.betas.data(), + buffers.c_mat(), args.c_offsets.data(), args.c_ld, + args.batch_count, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/levelx/xim2col.hpp b/test/routines/levelx/xim2col.hpp index 4124190f..ebffe85e 100644 --- a/test/routines/levelx/xim2col.hpp +++ b/test/routines/levelx/xim2col.hpp @@ -84,17 +84,29 @@ public: // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Im2col<T>(args.channels, args.height, args.width, - args.kernel_h, args.kernel_w, - args.pad_h, args.pad_w, - args.stride_h, args.stride_w, - args.dilation_h, args.dilation_w, - buffers.a_mat(), args.a_offset, - buffers.b_mat(), args.b_offset, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Im2col<T>(args.channels, args.height, args.width, + args.kernel_h, args.kernel_w, + args.pad_h, args.pad_w, + args.stride_h, args.stride_w, + args.dilation_h, args.dilation_w, + buffers.a_mat(), args.a_offset, + buffers.b_mat(), args.b_offset, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Im2col<T>(args.channels, args.height, args.width, + args.kernel_h, args.kernel_w, + args.pad_h, args.pad_w, + args.stride_h, args.stride_w, + args.dilation_h, args.dilation_w, + buffers.a_mat(), args.a_offset, + buffers.b_mat(), args.b_offset, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } diff --git a/test/routines/levelx/xinvert.hpp b/test/routines/levelx/xinvert.hpp index cc02a88b..b8503029 100644 --- a/test/routines/levelx/xinvert.hpp +++ b/test/routines/levelx/xinvert.hpp @@ -17,6 +17,7 @@ #define CLBLAST_TEST_ROUTINES_XINVERT_H_ #include "test/routines/common.hpp" +#include "src/routines/levelx/xinvert.hpp" namespace clblast { // ================================================================================================= @@ -40,6 +41,13 @@ StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host) return StatusCode::kUnknownError; } + // Start at zero + for (size_t i =0; i < args.m; ++i) { + for (size_t j = 0; j < args.n; ++j) { + buffers_host.b_mat[j * args.m + i] = T{0.0}; + } + } + // Loops over the amount of diagonal blocks of size args.m by args.m each for (auto block_id = size_t{0}; block_id < num_blocks; ++block_id) { const auto a_offset = block_id * (block_size + a_ld * block_size) + args.a_offset; @@ -164,14 +172,23 @@ class TestXinvert { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { try { - auto event = cl_event{}; - auto inverter = Xinvert<T>(queue, &event); - inverter.InvertMatrixDiagonalBlocks(args.layout, args.triangle, args.diagonal, - args.n, args.m, - buffers.a_mat, args.a_offset, args.a_ld, - buffers.b_mat); - clWaitForEvents(1, &event); - clReleaseEvent(event); + #ifdef OPENCL_API + auto event = cl_event{}; + auto inverter = Xinvert<T>(queue, &event); + inverter.InvertMatrixDiagonalBlocks(args.layout, args.triangle, args.diagonal, + args.n, args.m, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat); + clWaitForEvents(1, &event); + clReleaseEvent(event); + #elif CUDA_API + auto inverter = Xinvert<T>(queue, nullptr); + inverter.InvertMatrixDiagonalBlocks(args.layout, args.triangle, args.diagonal, + args.n, args.m, + buffers.a_mat, args.a_offset, args.a_ld, + buffers.b_mat); + cuStreamSynchronize(queue()); + #endif } catch (...) { return DispatchException(); } return StatusCode::kSuccess; } diff --git a/test/routines/levelx/xomatcopy.hpp b/test/routines/levelx/xomatcopy.hpp index 2736cf75..70bda452 100644 --- a/test/routines/levelx/xomatcopy.hpp +++ b/test/routines/levelx/xomatcopy.hpp @@ -126,14 +126,23 @@ class TestXomatcopy { // Describes how to run the CLBlast routine static StatusCode RunRoutine(const Arguments<T> &args, Buffers<T> &buffers, Queue &queue) { - auto queue_plain = queue(); - auto event = cl_event{}; - auto status = Omatcopy<T>(args.layout, args.a_transpose, - args.m, args.n, args.alpha, - buffers.a_mat(), args.a_offset, args.a_ld, - buffers.b_mat(), args.b_offset, args.b_ld, - &queue_plain, &event); - if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #ifdef OPENCL_API + auto queue_plain = queue(); + auto event = cl_event{}; + auto status = Omatcopy<T>(args.layout, args.a_transpose, + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, + &queue_plain, &event); + if (status == StatusCode::kSuccess) { clWaitForEvents(1, &event); clReleaseEvent(event); } + #elif CUDA_API + auto status = Omatcopy<T>(args.layout, args.a_transpose, + args.m, args.n, args.alpha, + buffers.a_mat(), args.a_offset, args.a_ld, + buffers.b_mat(), args.b_offset, args.b_ld, + queue.GetContext()(), queue.GetDevice()()); + cuStreamSynchronize(queue()); + #endif return status; } |