diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-03-03 10:43:12 +0100 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-03-03 10:43:12 +0100 |
commit | bff64917bd5be9005c68e439517fbde06623e616 (patch) | |
tree | 2faa9bc3c5a4e54ecf24b242268c9e0949320322 | |
parent | e3384be0d0338b11e7d817d160fcfd62e57f1e27 (diff) |
Fixed some small issues regarding PR#253
-rw-r--r-- | doc/api.md | 44 | ||||
-rw-r--r-- | include/clblast_c.h | 130 | ||||
-rwxr-xr-x | scripts/generator/generator.py | 2 | ||||
-rw-r--r-- | src/clblast_c.cpp | 82 |
4 files changed, 107 insertions, 151 deletions
@@ -2236,50 +2236,6 @@ CLBlastStatusCode CLBlastHgemm(const CLBlastLayout layout, const CLBlastTranspos cl_command_queue* queue, cl_event* event) ``` -C API with temporary buffer (user to allocate & pass `temp_buffer` with size provided by xGemmTempBufferSize() ): -``` -CLBlastStatusCode CLBlastSgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const float alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const float beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, cl_mem temp_buffer) -CLBlastStatusCode CLBlastDgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const double alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const double beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, cl_mem temp_buffer) -CLBlastStatusCode CLBlastCgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const cl_float2 alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const cl_float2 beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, cl_mem temp_buffer) -CLBlastStatusCode CLBlastZgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const cl_double2 alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const cl_double2 beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, cl_mem temp_buffer) -CLBlastStatusCode CLBlastHgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const cl_half alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const cl_half beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, cl_mem temp_buffer) -``` - Arguments to GEMM: * `const Layout layout`: Data-layout of the matrices, either `Layout::kRowMajor` (101) for row-major layout or `Layout::kColMajor` (102) for column-major data-layout. diff --git a/include/clblast_c.h b/include/clblast_c.h index 051871ce..23a3afcc 100644 --- a/include/clblast_c.h +++ b/include/clblast_c.h @@ -1539,87 +1539,87 @@ CLBlastStatusCode PUBLIC_API CLBlastHgemmStridedBatched(const CLBlastLayout layo // ================================================================================================= // General matrix-matrix multiplication with temporary buffer from user (optional, for advanced users): SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM CLBlastStatusCode PUBLIC_API CLBlastSgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const float alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const float beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); + const size_t m, const size_t n, const size_t k, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); CLBlastStatusCode PUBLIC_API CLBlastDgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const double alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const double beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); + const size_t m, const size_t n, const size_t k, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); CLBlastStatusCode PUBLIC_API CLBlastCgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const cl_float2 alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const cl_float2 beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); + const size_t m, const size_t n, const size_t k, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); CLBlastStatusCode PUBLIC_API CLBlastZgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const cl_double2 alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const cl_double2 beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); + const size_t m, const size_t n, const size_t k, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); CLBlastStatusCode PUBLIC_API CLBlastHgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const cl_half alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const cl_half beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); + const size_t m, const size_t n, const size_t k, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_half beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, cl_mem temp_buffer); // ================================================================================================= // Retrieves the required size of the temporary buffer for the GEMM kernel: SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM (optional) CLBlastStatusCode PUBLIC_API CLBlastSGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const size_t a_offset, const size_t a_ld, - const size_t b_offset, const size_t b_ld, - const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, - size_t* temp_buffer_size); + const size_t m, const size_t n, const size_t k, + const size_t a_offset, const size_t a_ld, + const size_t b_offset, const size_t b_ld, + const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, + size_t* temp_buffer_size); CLBlastStatusCode PUBLIC_API CLBlastDGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const size_t a_offset, const size_t a_ld, - const size_t b_offset, const size_t b_ld, - const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, - size_t* temp_buffer_size); + const size_t m, const size_t n, const size_t k, + const size_t a_offset, const size_t a_ld, + const size_t b_offset, const size_t b_ld, + const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, + size_t* temp_buffer_size); CLBlastStatusCode PUBLIC_API CLBlastCGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const size_t a_offset, const size_t a_ld, - const size_t b_offset, const size_t b_ld, - const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, - size_t* temp_buffer_size); + const size_t m, const size_t n, const size_t k, + const size_t a_offset, const size_t a_ld, + const size_t b_offset, const size_t b_ld, + const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, + size_t* temp_buffer_size); CLBlastStatusCode PUBLIC_API CLBlastZGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const size_t a_offset, const size_t a_ld, - const size_t b_offset, const size_t b_ld, - const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, - size_t* temp_buffer_size); + const size_t m, const size_t n, const size_t k, + const size_t a_offset, const size_t a_ld, + const size_t b_offset, const size_t b_ld, + const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, + size_t* temp_buffer_size); CLBlastStatusCode PUBLIC_API CLBlastHGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const size_t a_offset, const size_t a_ld, - const size_t b_offset, const size_t b_ld, - const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, - size_t* temp_buffer_size); + const size_t m, const size_t n, const size_t k, + const size_t a_offset, const size_t a_ld, + const size_t b_offset, const size_t b_ld, + const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, + size_t* temp_buffer_size); // ================================================================================================= diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py index fddfd86e..5ead9b73 100755 --- a/scripts/generator/generator.py +++ b/scripts/generator/generator.py @@ -52,7 +52,7 @@ FILES = [ HEADER_LINES = [123, 21, 127, 24, 29, 41, 29, 65, 32, 95, 21, 288] FOOTER_LINES = [41, 56, 112, 275, 6, 6, 6, 9, 2, 41, 55, 1] HEADER_LINES_DOC = 0 -FOOTER_LINES_DOC = 123 +FOOTER_LINES_DOC = 158 # Different possibilities for requirements ald_m = "The value of `a_ld` must be at least `m`." diff --git a/src/clblast_c.cpp b/src/clblast_c.cpp index b91ad308..06a5fc67 100644 --- a/src/clblast_c.cpp +++ b/src/clblast_c.cpp @@ -4074,14 +4074,14 @@ CLBlastStatusCode CLBlastHgemmStridedBatched(const CLBlastLayout layout, const C // GEMM with temporary buffer (optional, for advanced users) CLBlastStatusCode CLBlastSgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const float alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const float beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, - cl_mem temp_buffer) { + const size_t m, const size_t n, const size_t k, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, + cl_mem temp_buffer) { try { return static_cast<CLBlastStatusCode>( clblast::Gemm(static_cast<clblast::Layout>(layout), @@ -4098,14 +4098,14 @@ CLBlastStatusCode CLBlastSgemmWithTempBuffer(const CLBlastLayout layout, const C } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); } } CLBlastStatusCode CLBlastDgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const double alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const double beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, - cl_mem temp_buffer) { + const size_t m, const size_t n, const size_t k, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, + cl_mem temp_buffer) { try { return static_cast<CLBlastStatusCode>( clblast::Gemm(static_cast<clblast::Layout>(layout), @@ -4122,14 +4122,14 @@ CLBlastStatusCode CLBlastDgemmWithTempBuffer(const CLBlastLayout layout, const C } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); } } CLBlastStatusCode CLBlastCgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const cl_float2 alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const cl_float2 beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, - cl_mem temp_buffer) { + const size_t m, const size_t n, const size_t k, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, + cl_mem temp_buffer) { try { return static_cast<CLBlastStatusCode>( clblast::Gemm(static_cast<clblast::Layout>(layout), @@ -4146,14 +4146,14 @@ CLBlastStatusCode CLBlastCgemmWithTempBuffer(const CLBlastLayout layout, const C } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); } } CLBlastStatusCode CLBlastZgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const cl_double2 alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const cl_double2 beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, - cl_mem temp_buffer) { + const size_t m, const size_t n, const size_t k, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, + cl_mem temp_buffer) { try { return static_cast<CLBlastStatusCode>( clblast::Gemm(static_cast<clblast::Layout>(layout), @@ -4170,14 +4170,14 @@ CLBlastStatusCode CLBlastZgemmWithTempBuffer(const CLBlastLayout layout, const C } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); } } CLBlastStatusCode CLBlastHgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose, - const size_t m, const size_t n, const size_t k, - const cl_half alpha, - const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, - const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, - const cl_half beta, - cl_mem c_buffer, const size_t c_offset, const size_t c_ld, - cl_command_queue* queue, cl_event* event, - cl_mem temp_buffer) { + const size_t m, const size_t n, const size_t k, + const cl_half alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_half beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event, + cl_mem temp_buffer) { try { return static_cast<CLBlastStatusCode>( clblast::Gemm(static_cast<clblast::Layout>(layout), @@ -4343,4 +4343,4 @@ CLBlastStatusCode PUBLIC_API CLBlastOverrideParameters(const cl_device_id device } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); } } -// =================================================================================================
\ No newline at end of file +// ================================================================================================= |