summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-03-03 10:43:12 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2018-03-03 10:43:12 +0100
commitbff64917bd5be9005c68e439517fbde06623e616 (patch)
tree2faa9bc3c5a4e54ecf24b242268c9e0949320322
parente3384be0d0338b11e7d817d160fcfd62e57f1e27 (diff)
Fixed some small issues regarding PR#253
-rw-r--r--doc/api.md44
-rw-r--r--include/clblast_c.h130
-rwxr-xr-xscripts/generator/generator.py2
-rw-r--r--src/clblast_c.cpp82
4 files changed, 107 insertions, 151 deletions
diff --git a/doc/api.md b/doc/api.md
index 3e26688c..0fbdeaa0 100644
--- a/doc/api.md
+++ b/doc/api.md
@@ -2236,50 +2236,6 @@ CLBlastStatusCode CLBlastHgemm(const CLBlastLayout layout, const CLBlastTranspos
cl_command_queue* queue, cl_event* event)
```
-C API with temporary buffer (user to allocate & pass `temp_buffer` with size provided by xGemmTempBufferSize() ):
-```
-CLBlastStatusCode CLBlastSgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event, cl_mem temp_buffer)
-CLBlastStatusCode CLBlastDgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event, cl_mem temp_buffer)
-CLBlastStatusCode CLBlastCgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const cl_float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const cl_float2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event, cl_mem temp_buffer)
-CLBlastStatusCode CLBlastZgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const cl_double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const cl_double2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event, cl_mem temp_buffer)
-CLBlastStatusCode CLBlastHgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const cl_half alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const cl_half beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event, cl_mem temp_buffer)
-```
-
Arguments to GEMM:
* `const Layout layout`: Data-layout of the matrices, either `Layout::kRowMajor` (101) for row-major layout or `Layout::kColMajor` (102) for column-major data-layout.
diff --git a/include/clblast_c.h b/include/clblast_c.h
index 051871ce..23a3afcc 100644
--- a/include/clblast_c.h
+++ b/include/clblast_c.h
@@ -1539,87 +1539,87 @@ CLBlastStatusCode PUBLIC_API CLBlastHgemmStridedBatched(const CLBlastLayout layo
// =================================================================================================
// General matrix-matrix multiplication with temporary buffer from user (optional, for advanced users): SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM
CLBlastStatusCode PUBLIC_API CLBlastSgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
+ const size_t m, const size_t n, const size_t k,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
CLBlastStatusCode PUBLIC_API CLBlastDgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
+ const size_t m, const size_t n, const size_t k,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
CLBlastStatusCode PUBLIC_API CLBlastCgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const cl_float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const cl_float2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
+ const size_t m, const size_t n, const size_t k,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_float2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
CLBlastStatusCode PUBLIC_API CLBlastZgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const cl_double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const cl_double2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
+ const size_t m, const size_t n, const size_t k,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_double2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
CLBlastStatusCode PUBLIC_API CLBlastHgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const cl_half alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const cl_half beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
+ const size_t m, const size_t n, const size_t k,
+ const cl_half alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_half beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event, cl_mem temp_buffer);
// =================================================================================================
// Retrieves the required size of the temporary buffer for the GEMM kernel: SGEMM/DGEMM/CGEMM/ZGEMM/HGEMM (optional)
CLBlastStatusCode PUBLIC_API CLBlastSGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const size_t a_offset, const size_t a_ld,
- const size_t b_offset, const size_t b_ld,
- const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue,
- size_t* temp_buffer_size);
+ const size_t m, const size_t n, const size_t k,
+ const size_t a_offset, const size_t a_ld,
+ const size_t b_offset, const size_t b_ld,
+ const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue,
+ size_t* temp_buffer_size);
CLBlastStatusCode PUBLIC_API CLBlastDGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const size_t a_offset, const size_t a_ld,
- const size_t b_offset, const size_t b_ld,
- const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue,
- size_t* temp_buffer_size);
+ const size_t m, const size_t n, const size_t k,
+ const size_t a_offset, const size_t a_ld,
+ const size_t b_offset, const size_t b_ld,
+ const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue,
+ size_t* temp_buffer_size);
CLBlastStatusCode PUBLIC_API CLBlastCGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const size_t a_offset, const size_t a_ld,
- const size_t b_offset, const size_t b_ld,
- const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue,
- size_t* temp_buffer_size);
+ const size_t m, const size_t n, const size_t k,
+ const size_t a_offset, const size_t a_ld,
+ const size_t b_offset, const size_t b_ld,
+ const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue,
+ size_t* temp_buffer_size);
CLBlastStatusCode PUBLIC_API CLBlastZGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const size_t a_offset, const size_t a_ld,
- const size_t b_offset, const size_t b_ld,
- const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue,
- size_t* temp_buffer_size);
+ const size_t m, const size_t n, const size_t k,
+ const size_t a_offset, const size_t a_ld,
+ const size_t b_offset, const size_t b_ld,
+ const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue,
+ size_t* temp_buffer_size);
CLBlastStatusCode PUBLIC_API CLBlastHGemmTempBufferSize(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const size_t a_offset, const size_t a_ld,
- const size_t b_offset, const size_t b_ld,
- const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue,
- size_t* temp_buffer_size);
+ const size_t m, const size_t n, const size_t k,
+ const size_t a_offset, const size_t a_ld,
+ const size_t b_offset, const size_t b_ld,
+ const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue,
+ size_t* temp_buffer_size);
// =================================================================================================
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index fddfd86e..5ead9b73 100755
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -52,7 +52,7 @@ FILES = [
HEADER_LINES = [123, 21, 127, 24, 29, 41, 29, 65, 32, 95, 21, 288]
FOOTER_LINES = [41, 56, 112, 275, 6, 6, 6, 9, 2, 41, 55, 1]
HEADER_LINES_DOC = 0
-FOOTER_LINES_DOC = 123
+FOOTER_LINES_DOC = 158
# Different possibilities for requirements
ald_m = "The value of `a_ld` must be at least `m`."
diff --git a/src/clblast_c.cpp b/src/clblast_c.cpp
index b91ad308..06a5fc67 100644
--- a/src/clblast_c.cpp
+++ b/src/clblast_c.cpp
@@ -4074,14 +4074,14 @@ CLBlastStatusCode CLBlastHgemmStridedBatched(const CLBlastLayout layout, const C
// GEMM with temporary buffer (optional, for advanced users)
CLBlastStatusCode CLBlastSgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const float alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const float beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event,
- cl_mem temp_buffer) {
+ const size_t m, const size_t n, const size_t k,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event,
+ cl_mem temp_buffer) {
try {
return static_cast<CLBlastStatusCode>(
clblast::Gemm(static_cast<clblast::Layout>(layout),
@@ -4098,14 +4098,14 @@ CLBlastStatusCode CLBlastSgemmWithTempBuffer(const CLBlastLayout layout, const C
} catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
}
CLBlastStatusCode CLBlastDgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const double alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const double beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event,
- cl_mem temp_buffer) {
+ const size_t m, const size_t n, const size_t k,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event,
+ cl_mem temp_buffer) {
try {
return static_cast<CLBlastStatusCode>(
clblast::Gemm(static_cast<clblast::Layout>(layout),
@@ -4122,14 +4122,14 @@ CLBlastStatusCode CLBlastDgemmWithTempBuffer(const CLBlastLayout layout, const C
} catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
}
CLBlastStatusCode CLBlastCgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const cl_float2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const cl_float2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event,
- cl_mem temp_buffer) {
+ const size_t m, const size_t n, const size_t k,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_float2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event,
+ cl_mem temp_buffer) {
try {
return static_cast<CLBlastStatusCode>(
clblast::Gemm(static_cast<clblast::Layout>(layout),
@@ -4146,14 +4146,14 @@ CLBlastStatusCode CLBlastCgemmWithTempBuffer(const CLBlastLayout layout, const C
} catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
}
CLBlastStatusCode CLBlastZgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const cl_double2 alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const cl_double2 beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event,
- cl_mem temp_buffer) {
+ const size_t m, const size_t n, const size_t k,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_double2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event,
+ cl_mem temp_buffer) {
try {
return static_cast<CLBlastStatusCode>(
clblast::Gemm(static_cast<clblast::Layout>(layout),
@@ -4170,14 +4170,14 @@ CLBlastStatusCode CLBlastZgemmWithTempBuffer(const CLBlastLayout layout, const C
} catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
}
CLBlastStatusCode CLBlastHgemmWithTempBuffer(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const CLBlastTranspose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const cl_half alpha,
- const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
- const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
- const cl_half beta,
- cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
- cl_command_queue* queue, cl_event* event,
- cl_mem temp_buffer) {
+ const size_t m, const size_t n, const size_t k,
+ const cl_half alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_half beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event,
+ cl_mem temp_buffer) {
try {
return static_cast<CLBlastStatusCode>(
clblast::Gemm(static_cast<clblast::Layout>(layout),
@@ -4343,4 +4343,4 @@ CLBlastStatusCode PUBLIC_API CLBlastOverrideParameters(const cl_device_id device
} catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
}
-// ================================================================================================= \ No newline at end of file
+// =================================================================================================