summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CHANGELOG1
-rw-r--r--CMakeLists.txt19
-rw-r--r--README.md2
-rw-r--r--include/clblast_c.h397
-rw-r--r--samples/sgemm.c108
-rw-r--r--src/clblast_c.cc758
6 files changed, 1278 insertions, 7 deletions
diff --git a/CHANGELOG b/CHANGELOG
index 4a87ecb5..07810499 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -2,6 +2,7 @@
Development version (next release)
- Now using the Claduc C++11 interface to OpenCL
- Removed clBLAS sources, it should now be installed separately for testing
+- Added plain C API for increased compatibility (clblast_c.h)
- Added level-2 routines:
* CHEMV/ZHEMV
* SSYMV/DSYMV
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 8ca225b2..8b598bf8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,7 +11,7 @@
# CMake project details
cmake_minimum_required(VERSION 2.8.10)
-project("clblast" CXX)
+project("clblast" C CXX)
set(clblast_VERSION_MAJOR 0)
set(clblast_VERSION_MINOR 3)
set(clblast_VERSION_PATCH 0)
@@ -103,7 +103,8 @@ include_directories(${clblast_SOURCE_DIR}/include ${OPENCL_INCLUDE_DIRS})
# Sets the supported routines and the used kernels. New routines and kernels should be added here.
set(KERNELS copy pad transpose padtranspose xaxpy xgemv xgemm)
-set(SAMPLE_PROGRAMS sgemm)
+set(SAMPLE_PROGRAMS_CPP sgemm)
+set(SAMPLE_PROGRAMS_C sgemm)
set(LEVEL1_ROUTINES xaxpy)
set(LEVEL2_ROUTINES xgemv xhemv xsymv)
set(LEVEL3_ROUTINES xgemm xsymm xhemm xsyrk xherk xsyr2k xher2k xtrmm)
@@ -112,7 +113,7 @@ set(ROUTINES ${LEVEL1_ROUTINES} ${LEVEL2_ROUTINES} ${LEVEL3_ROUTINES})
# ==================================================================================================
# Gathers all source-files
-set(SOURCES src/clblast.cc src/database.cc src/routine.cc src/utilities.cc)
+set(SOURCES src/clblast.cc src/database.cc src/routine.cc src/utilities.cc src/clblast_c.cc)
foreach(ROUTINE ${LEVEL1_ROUTINES})
set(SOURCES ${SOURCES} src/routines/level1/${ROUTINE}.cc)
endforeach()
@@ -130,19 +131,27 @@ target_link_libraries(clblast ${OPENCL_LIBRARIES})
# Installs the library
install(TARGETS clblast DESTINATION lib)
install(FILES include/clblast.h DESTINATION include)
+install(FILES include/clblast_c.h DESTINATION include)
# ==================================================================================================
# This section contains all the code related to the examples
if(SAMPLES)
- # Adds sample programs
- foreach(SAMPLE ${SAMPLE_PROGRAMS})
+ # Adds sample programs (C++)
+ foreach(SAMPLE ${SAMPLE_PROGRAMS_CPP})
add_executable(sample_${SAMPLE} samples/${SAMPLE}.cc)
target_link_libraries(sample_${SAMPLE} clblast ${OPENCL_LIBRARIES})
install(TARGETS sample_${SAMPLE} DESTINATION bin)
endforeach()
+ # Adds sample programs (C)
+ foreach(SAMPLE ${SAMPLE_PROGRAMS_C})
+ add_executable(sample_${SAMPLE}_c samples/${SAMPLE}.c)
+ target_link_libraries(sample_${SAMPLE}_c clblast ${OPENCL_LIBRARIES})
+ install(TARGETS sample_${SAMPLE}_c DESTINATION bin)
+ endforeach()
+
endif()
# ==================================================================================================
diff --git a/README.md b/README.md
index 965da95e..386e7bf9 100644
--- a/README.md
+++ b/README.md
@@ -208,8 +208,6 @@ To-do list before release of version 1.0
- Increase the functionality:
* Support all routines supported by clBLAS
* Allow the user control over events and synchronization
- * Add an interface with OpenCL C++ data-types
- * Add an old-style C compatible interface
* Add half-precision routines (e.g. HGEMM)
- Improve host performance:
* Allow initialization to pre-compile kernels and store to disk
diff --git a/include/clblast_c.h b/include/clblast_c.h
new file mode 100644
index 00000000..c25e5880
--- /dev/null
+++ b/include/clblast_c.h
@@ -0,0 +1,397 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file contains the plain C interface to the CLBlast BLAS routines, the counter-part of the
+// normal 'clblast.h' C++ header.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_CLBLAST_C_H_
+#define CLBLAST_CLBLAST_C_H_
+
+// Includes the normal OpenCL C header
+#if defined(__APPLE__) || defined(__MACOSX)
+ #include <OpenCL/opencl.h>
+#else
+ #include <CL/opencl.h>
+#endif
+
+// =================================================================================================
+
+// Status codes. These codes can be returned by functions declared in this header file. The error
+// codes match either the standard OpenCL error codes or the clBLAS error codes.
+typedef enum StatusCode_ {
+
+ // Status codes in common with the OpenCL standard
+ kSuccess = 0, // CL_SUCCESS
+ kTempBufferAllocFailure = -4, // CL_MEM_OBJECT_ALLOCATION_FAILURE
+ kBuildProgramFailure = -11, // CL_BUILD_PROGRAM_FAILURE: OpenCL compilation error
+ kInvalidBinary = -42, // CL_INVALID_BINARY
+ kInvalidKernel = -48, // CL_INVALID_KERNEL
+ kInvalidLocalNumDimensions = -53, // CL_INVALID_WORK_DIMENSION: Too many thread dimensions
+ kInvalidLocalThreadsTotal = -54, // CL_INVALID_WORK_GROUP_SIZE: Too many threads in total
+ kInvalidLocalThreadsDim = -55, // CL_INVALID_WORK_ITEM_SIZE: ... or for a specific dimension
+ kInvalidTempBufferSize = -61, // CL_INVALID_BUFFER_SIZE
+
+ // Status codes in common with the clBLAS library
+ kNotImplemented = -1024, // Routine or functionality not implemented yet
+ kInvalidMatrixA = -1022, // Matrix A is not a valid OpenCL buffer
+ kInvalidMatrixB = -1021, // Matrix B is not a valid OpenCL buffer
+ kInvalidMatrixC = -1020, // Matrix C is not a valid OpenCL buffer
+ kInvalidVectorX = -1019, // Vector X is not a valid OpenCL buffer
+ kInvalidVectorY = -1018, // Vector Y is not a valid OpenCL buffer
+ kInvalidDimension = -1017, // Dimensions M, N, and K have to be larger than zero
+ kInvalidLeadDimA = -1016, // LD of A is smaller than the matrix's first dimension
+ kInvalidLeadDimB = -1015, // LD of B is smaller than the matrix's first dimension
+ kInvalidLeadDimC = -1014, // LD of C is smaller than the matrix's first dimension
+ kInvalidIncrementX = -1013, // Increment of vector X cannot be zero
+ kInvalidIncrementY = -1012, // Increment of vector Y cannot be zero
+ kInsufficientMemoryA = -1011, // Matrix A's OpenCL buffer is too small
+ kInsufficientMemoryB = -1010, // Matrix B's OpenCL buffer is too small
+ kInsufficientMemoryC = -1009, // Matrix C's OpenCL buffer is too small
+ kInsufficientMemoryX = -1008, // Vector X's OpenCL buffer is too small
+ kInsufficientMemoryY = -1007, // Vector Y's OpenCL buffer is too small
+
+ // Custom additional status codes for CLBlast
+ kKernelLaunchError = -2048, // Problem occurred when enqueuing the kernel
+ kKernelRunError = -2047, // Problem occurred while running the kernel
+ kInvalidLocalMemUsage = -2046, // Not enough local memory available on this device
+ kNoHalfPrecision = -2045, // Half precision (16-bits) not supported by the device
+ kNoDoublePrecision = -2044, // Double precision (64-bits) not supported by the device
+} StatusCode;
+
+// Matrix layout and transpose types
+typedef enum Layout_ { kRowMajor, kColMajor } Layout;
+typedef enum Transpose_ { kNo, kYes, kConjugate } Transpose;
+typedef enum Side_ { kLeft, kRight } Side;
+typedef enum Triangle_ { kUpper, kLower } Triangle;
+typedef enum Diagonal_ { kUnit, kNonUnit } Diagonal;
+
+// Precision scoped enum (values in bits)
+typedef enum Precision_ { kHalf = 16, kSingle = 32, kDouble = 64,
+ kComplexSingle = 3232, kComplexDouble = 6464 } Precision;
+
+// =================================================================================================
+// BLAS level-1 (vector-vector) routines
+// =================================================================================================
+
+// Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY
+StatusCode CLBlastSaxpy(const size_t n,
+ const float alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastDaxpy(const size_t n,
+ const double alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastCaxpy(const size_t n,
+ const cl_float2 alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastZaxpy(const size_t n,
+ const cl_double2 alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+
+// =================================================================================================
+// BLAS level-2 (matrix-vector) routines
+// =================================================================================================
+
+// Generalized matrix-vector multiplication: SGEMV/DGEMV/CGEMV/ZGEMV
+StatusCode CLBlastSgemv(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastDgemv(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastCgemv(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_float2 beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastZgemv(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_double2 beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+
+// Hermitian matrix-vector multiplication: CHEMV/ZHEMV
+StatusCode CLBlastChemv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_float2 beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastZhemv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_double2 beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+
+// Symmetric matrix-vector multiplication: SSYMV/DSYMV
+StatusCode CLBlastSsymv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastDsymv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event);
+
+// =================================================================================================
+// BLAS level-3 (matrix-matrix) routines
+// =================================================================================================
+
+// Generalized matrix-matrix multiplication: SGEMM/DGEMM/CGEMM/ZGEMM
+StatusCode CLBlastSgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastDgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastCgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_float2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastZgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_double2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+
+// Symmetric matrix-matrix multiplication: SSYMM/DSYMM/CSYMM/ZSYMM
+StatusCode CLBlastSsymm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastDsymm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastCsymm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_float2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastZsymm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_double2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+
+// Hermitian matrix-matrix multiplication: CHEMM/ZHEMM
+StatusCode CLBlastChemm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_float2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastZhemm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_double2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+
+// Rank-K update of a symmetric matrix: SSYRK/DSYRK/CSYRK/ZSYRK
+StatusCode CLBlastSsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const float beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastDsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const double beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastCsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_float2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastZsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_double2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+
+// Rank-K update of a hermitian matrix: CHERK/ZHERK
+StatusCode CLBlastCherk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const float beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastZherk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const double beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+
+// Rank-2K update of a symmetric matrix: SSYR2K/DSYR2K/CSYR2K/ZSYR2K
+StatusCode CLBlastSsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastDsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastCsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_float2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastZsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_double2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+
+// Rank-2K update of a hermitian matrix: CHER2K/ZHER2K
+StatusCode CLBlastCher2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastZher2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event);
+
+// Triangular matrix-matrix multiplication: STRMM/DTRMM/CTRMM/ZTRMM
+StatusCode CLBlastStrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
+ const size_t m, const size_t n,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastDtrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
+ const size_t m, const size_t n,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastCtrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
+ const size_t m, const size_t n,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ cl_command_queue* queue, cl_event* event);
+StatusCode CLBlastZtrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
+ const size_t m, const size_t n,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ cl_command_queue* queue, cl_event* event);
+
+// =================================================================================================
+
+// CLBLAST_CLBLAST_C_H_
+#endif
diff --git a/samples/sgemm.c b/samples/sgemm.c
new file mode 100644
index 00000000..f43fb147
--- /dev/null
+++ b/samples/sgemm.c
@@ -0,0 +1,108 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file demonstrates the use of the SGEMM routine. It is pure C99 and demonstrates the use of
+// the C API to the CLBlast library.
+//
+// Note that this example is meant for illustration purposes only. CLBlast provides other programs
+// for performance benchmarking ('client_xxxxx') and for correctness testing ('test_xxxxx').
+//
+// =================================================================================================
+
+#include <stdio.h>
+#include <string.h>
+
+// Includes the CLBlast library (C interface)
+#include <clblast_c.h>
+
+// =================================================================================================
+
+// Example use of the single-precision routine SGEMM
+int main(void) {
+
+ // OpenCL platform/device settings
+ const size_t platform_id = 0;
+ const size_t device_id = 0;
+
+ // Example SGEMM arguments
+ const size_t m = 128;
+ const size_t n = 64;
+ const size_t k = 512;
+ const float alpha = 0.7f;
+ const float beta = 1.0f;
+ const size_t a_ld = k;
+ const size_t b_ld = n;
+ const size_t c_ld = n;
+
+ // Initializes the OpenCL platform
+ cl_uint num_platforms;
+ clGetPlatformIDs(0, NULL, &num_platforms);
+ cl_platform_id* platforms = (cl_platform_id*)malloc(num_platforms*sizeof(cl_platform_id));
+ clGetPlatformIDs(num_platforms, platforms, NULL);
+ cl_platform_id platform = platforms[platform_id];
+
+ // Initializes the OpenCL device (note: example for GPU devices only)
+ cl_uint num_devices;
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 0, NULL, &num_devices);
+ cl_device_id* devices = (cl_device_id*)malloc(num_devices*sizeof(cl_device_id));
+ clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, num_devices, devices, NULL);
+ cl_device_id device = devices[device_id];
+
+ // Creates the OpenCL context, queue, and an event
+ cl_context context = clCreateContext(NULL, 1, &device, NULL, NULL, NULL);
+ cl_command_queue queue = clCreateCommandQueue(context, device, 0, NULL);
+ cl_event event = NULL;
+
+ // Populate host matrices with some example data
+ float* host_a = (float*)malloc(sizeof(float)*m*k);
+ float* host_b = (float*)malloc(sizeof(float)*n*k);
+ float* host_c = (float*)malloc(sizeof(float)*m*n);
+ for (size_t i=0; i<m*k; ++i) { host_a[i] = 12.193f; }
+ for (size_t i=0; i<n*k; ++i) { host_b[i] = -8.199f; }
+ for (size_t i=0; i<m*n; ++i) { host_c[i] = 0.0f; }
+
+ // Copy the matrices to the device
+ cl_mem device_a = clCreateBuffer(context, CL_MEM_READ_WRITE, m*k*sizeof(float), NULL, NULL);
+ cl_mem device_b = clCreateBuffer(context, CL_MEM_READ_WRITE, n*k*sizeof(float), NULL, NULL);
+ cl_mem device_c = clCreateBuffer(context, CL_MEM_READ_WRITE, m*n*sizeof(float), NULL, NULL);
+ clEnqueueWriteBuffer(queue, device_a, CL_TRUE, 0, m*k*sizeof(float), host_a, 0, NULL, NULL);
+ clEnqueueWriteBuffer(queue, device_b, CL_TRUE, 0, n*k*sizeof(float), host_b, 0, NULL, NULL);
+ clEnqueueWriteBuffer(queue, device_c, CL_TRUE, 0, m*n*sizeof(float), host_c, 0, NULL, NULL);
+
+ // Call the SGEMM routine.
+ StatusCode status = CLBlastSgemm(kRowMajor, kNo, kNo,
+ m, n, k,
+ alpha,
+ device_a, 0, a_ld,
+ device_b, 0, b_ld,
+ beta,
+ device_c, 0, c_ld,
+ &queue, &event);
+
+ // Wait for completion
+ clWaitForEvents(1, &event);
+
+ // Example completed. See "clblast_c.h" for status codes (0 -> success).
+ printf("Completed with status %d\n", status);
+
+ // Clean-up
+ free(platforms);
+ free(devices);
+ free(host_a);
+ free(host_b);
+ free(host_c);
+ clReleaseMemObject(device_a);
+ clReleaseMemObject(device_b);
+ clReleaseMemObject(device_c);
+ clReleaseCommandQueue(queue);
+ clReleaseContext(context);
+ return 0;
+}
+
+// =================================================================================================
diff --git a/src/clblast_c.cc b/src/clblast_c.cc
new file mode 100644
index 00000000..3b437aff
--- /dev/null
+++ b/src/clblast_c.cc
@@ -0,0 +1,758 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements all the plain C BLAS API calls. This forwards the calls to the C++ API.
+//
+// =================================================================================================
+
+#include <string>
+
+extern "C" {
+ #include "clblast_c.h"
+}
+#include "clblast.h"
+#include "internal/utilities.h"
+
+// =================================================================================================
+// BLAS level-1 (vector-vector) routines
+// =================================================================================================
+
+// AXPY
+StatusCode CLBlastSaxpy(const size_t n,
+ const float alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Axpy(n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastDaxpy(const size_t n,
+ const double alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Axpy(n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastCaxpy(const size_t n,
+ const cl_float2 alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Axpy(n,
+ clblast::float2{alpha.s[0], alpha.s[1]},
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastZaxpy(const size_t n,
+ const cl_double2 alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Axpy(n,
+ clblast::double2{alpha.s[0], alpha.s[1]},
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
+// =================================================================================================
+// BLAS level-2 (matrix-vector) routines
+// =================================================================================================
+
+// GEMV
+StatusCode CLBlastSgemv(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Gemv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastDgemv(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Gemv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastCgemv(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_float2 beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Gemv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n,
+ clblast::float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ clblast::float2{beta.s[0], beta.s[1]},
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastZgemv(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_double2 beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Gemv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n,
+ clblast::double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ clblast::double2{beta.s[0], beta.s[1]},
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
+// HEMV
+StatusCode CLBlastChemv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_float2 beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Hemv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ clblast::float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ clblast::float2{beta.s[0], beta.s[1]},
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastZhemv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_double2 beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Hemv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ clblast::double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ clblast::double2{beta.s[0], beta.s[1]},
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
+// SYMV
+StatusCode CLBlastSsymv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const float beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Symv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastDsymv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const double beta,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Symv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
+// =================================================================================================
+// BLAS level-3 (matrix-matrix) routines
+// =================================================================================================
+
+// GEMM
+StatusCode CLBlastSgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Gemm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Transpose>(b_transpose),
+ m, n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastDgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Gemm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Transpose>(b_transpose),
+ m, n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastCgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_float2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Gemm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Transpose>(b_transpose),
+ m, n, k,
+ clblast::float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ clblast::float2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastZgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_double2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Gemm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Transpose>(b_transpose),
+ m, n, k,
+ clblast::double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ clblast::double2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
+// SYMM
+StatusCode CLBlastSsymm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Symm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastDsymm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Symm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastCsymm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_float2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Symm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ m, n,
+ clblast::float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ clblast::float2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastZsymm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_double2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Symm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ m, n,
+ clblast::double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ clblast::double2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
+// HEMM
+StatusCode CLBlastChemm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_float2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Hemm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ m, n,
+ clblast::float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ clblast::float2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastZhemm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_double2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Hemm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ m, n,
+ clblast::double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ clblast::double2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
+// SYRK
+StatusCode CLBlastSsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const float beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Syrk(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastDsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const double beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Syrk(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastCsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_float2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Syrk(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ n, k,
+ clblast::float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ clblast::float2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastZsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_double2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Syrk(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ n, k,
+ clblast::double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ clblast::double2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
+// HERK
+StatusCode CLBlastCherk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const float beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Herk(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastZherk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const double beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Herk(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
+// SYR2K
+StatusCode CLBlastSsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Syr2k(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(ab_transpose),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastDsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Syr2k(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(ab_transpose),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastCsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_float2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Syr2k(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(ab_transpose),
+ n, k,
+ clblast::float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ clblast::float2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastZsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const cl_double2 beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Syr2k(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(ab_transpose),
+ n, k,
+ clblast::double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ clblast::double2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
+// HER2K
+StatusCode CLBlastCher2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const float beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Her2k(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(ab_transpose),
+ n, k,
+ clblast::float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastZher2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ const cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ const double beta,
+ cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Her2k(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(ab_transpose),
+ n, k,
+ clblast::double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
+// TRMM
+StatusCode CLBlastStrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
+ const size_t m, const size_t n,
+ const float alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Trmm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastDtrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
+ const size_t m, const size_t n,
+ const double alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Trmm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastCtrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
+ const size_t m, const size_t n,
+ const cl_float2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Trmm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ m, n,
+ clblast::float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+StatusCode CLBlastZtrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
+ const size_t m, const size_t n,
+ const cl_double2 alpha,
+ const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Trmm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ m, n,
+ clblast::double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
+
+// =================================================================================================