diff options
-rw-r--r-- | CMakeLists.txt | 5 | ||||
-rw-r--r-- | include/clblast_c.h | 126 | ||||
-rw-r--r-- | src/clblast_c.cc | 96 |
3 files changed, 225 insertions, 2 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ca225b2..9cf89569 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -11,7 +11,7 @@ # CMake project details cmake_minimum_required(VERSION 2.8.10) -project("clblast" CXX) +project("clblast" C CXX) set(clblast_VERSION_MAJOR 0) set(clblast_VERSION_MINOR 3) set(clblast_VERSION_PATCH 0) @@ -112,7 +112,7 @@ set(ROUTINES ${LEVEL1_ROUTINES} ${LEVEL2_ROUTINES} ${LEVEL3_ROUTINES}) # ================================================================================================== # Gathers all source-files -set(SOURCES src/clblast.cc src/database.cc src/routine.cc src/utilities.cc) +set(SOURCES src/clblast.cc src/database.cc src/routine.cc src/utilities.cc src/clblast_c.cc) foreach(ROUTINE ${LEVEL1_ROUTINES}) set(SOURCES ${SOURCES} src/routines/level1/${ROUTINE}.cc) endforeach() @@ -130,6 +130,7 @@ target_link_libraries(clblast ${OPENCL_LIBRARIES}) # Installs the library install(TARGETS clblast DESTINATION lib) install(FILES include/clblast.h DESTINATION include) +install(FILES include/clblast_c.h DESTINATION include) # ================================================================================================== diff --git a/include/clblast_c.h b/include/clblast_c.h new file mode 100644 index 00000000..85241f6c --- /dev/null +++ b/include/clblast_c.h @@ -0,0 +1,126 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file contains the plain C interface to the CLBlast BLAS routines, the counter-part of the +// normal 'clblast.h' C++ header. +// +// ================================================================================================= + +#ifndef CLBLAST_CLBLAST_C_H_ +#define CLBLAST_CLBLAST_C_H_ + +// Includes the normal OpenCL C header +#if defined(__APPLE__) || defined(__MACOSX) + #include <OpenCL/opencl.h> +#else + #include <CL/opencl.h> +#endif + +// ================================================================================================= + +// Status codes. These codes can be returned by functions declared in this header file. The error +// codes match either the standard OpenCL error codes or the clBLAS error codes. +typedef enum StatusCode_ { + + // Status codes in common with the OpenCL standard + kSuccess = 0, // CL_SUCCESS + kTempBufferAllocFailure = -4, // CL_MEM_OBJECT_ALLOCATION_FAILURE + kBuildProgramFailure = -11, // CL_BUILD_PROGRAM_FAILURE: OpenCL compilation error + kInvalidBinary = -42, // CL_INVALID_BINARY + kInvalidKernel = -48, // CL_INVALID_KERNEL + kInvalidLocalNumDimensions = -53, // CL_INVALID_WORK_DIMENSION: Too many thread dimensions + kInvalidLocalThreadsTotal = -54, // CL_INVALID_WORK_GROUP_SIZE: Too many threads in total + kInvalidLocalThreadsDim = -55, // CL_INVALID_WORK_ITEM_SIZE: ... or for a specific dimension + kInvalidTempBufferSize = -61, // CL_INVALID_BUFFER_SIZE + + // Status codes in common with the clBLAS library + kNotImplemented = -1024, // Routine or functionality not implemented yet + kInvalidMatrixA = -1022, // Matrix A is not a valid OpenCL buffer + kInvalidMatrixB = -1021, // Matrix B is not a valid OpenCL buffer + kInvalidMatrixC = -1020, // Matrix C is not a valid OpenCL buffer + kInvalidVectorX = -1019, // Vector X is not a valid OpenCL buffer + kInvalidVectorY = -1018, // Vector Y is not a valid OpenCL buffer + kInvalidDimension = -1017, // Dimensions M, N, and K have to be larger than zero + kInvalidLeadDimA = -1016, // LD of A is smaller than the matrix's first dimension + kInvalidLeadDimB = -1015, // LD of B is smaller than the matrix's first dimension + kInvalidLeadDimC = -1014, // LD of C is smaller than the matrix's first dimension + kInvalidIncrementX = -1013, // Increment of vector X cannot be zero + kInvalidIncrementY = -1012, // Increment of vector Y cannot be zero + kInsufficientMemoryA = -1011, // Matrix A's OpenCL buffer is too small + kInsufficientMemoryB = -1010, // Matrix B's OpenCL buffer is too small + kInsufficientMemoryC = -1009, // Matrix C's OpenCL buffer is too small + kInsufficientMemoryX = -1008, // Vector X's OpenCL buffer is too small + kInsufficientMemoryY = -1007, // Vector Y's OpenCL buffer is too small + + // Custom additional status codes for CLBlast + kKernelLaunchError = -2048, // Problem occurred when enqueuing the kernel + kKernelRunError = -2047, // Problem occurred while running the kernel + kInvalidLocalMemUsage = -2046, // Not enough local memory available on this device + kNoHalfPrecision = -2045, // Half precision (16-bits) not supported by the device + kNoDoublePrecision = -2044, // Double precision (64-bits) not supported by the device +} StatusCode; + +// Matrix layout and transpose types +typedef enum Layout_ { kRowMajor, kColMajor } Layout; +typedef enum Transpose_ { kNo, kYes, kConjugate } Transpose; +typedef enum Side_ { kLeft, kRight } Side; +typedef enum Triangle_ { kUpper, kLower } Triangle; +typedef enum Diagonal_ { kUnit, kNonUnit } Diagonal; + +// Precision scoped enum (values in bits) +typedef enum Precision_ { kHalf = 16, kSingle = 32, kDouble = 64, + kComplexSingle = 3232, kComplexDouble = 6464 } Precision; + +// ================================================================================================= +// BLAS level-1 (vector-vector) routines + +// ================================================================================================= +// BLAS level-2 (matrix-vector) routines + +// ================================================================================================= +// BLAS level-3 (matrix-matrix) routines + +// Generalized matrix-matrix multiplication: SGEMM/DGEMM/CGEMM/ZGEMM +StatusCode CLBlastSgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, + const size_t m, const size_t n, const size_t k, + const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +StatusCode CLBlastDgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, + const size_t m, const size_t n, const size_t k, + const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +StatusCode CLBlastCgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, + const size_t m, const size_t n, const size_t k, + const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); +StatusCode CLBlastZgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, + const size_t m, const size_t n, const size_t k, + const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, + const cl_double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event); + +// ================================================================================================= + +// CLBLAST_CLBLAST_C_H_ +#endif diff --git a/src/clblast_c.cc b/src/clblast_c.cc new file mode 100644 index 00000000..74a74971 --- /dev/null +++ b/src/clblast_c.cc @@ -0,0 +1,96 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements all the plain C BLAS API calls. This forwards the calls to the C++ API. +// +// ================================================================================================= + +#include <string> + +extern "C" { + #include "clblast_c.h" +} +#include "clblast.h" +#include "internal/utilities.h" + +// ================================================================================================= +// BLAS level-1 (vector-vector) routines +// ================================================================================================= + +// ================================================================================================= +// BLAS level-2 (matrix-vector) routines +// ================================================================================================= + +// ================================================================================================= +// BLAS level-3 (matrix-matrix) routines +// ================================================================================================= + +// GEMM +StatusCode CLBlastSgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, + const size_t m, const size_t n, const size_t k, const float alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const float beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Gemm(static_cast<clblast::Layout>(layout), + static_cast<clblast::Transpose>(a_transpose), + static_cast<clblast::Transpose>(b_transpose), + m, n, k, alpha, + a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, + c_buffer, c_offset, c_ld, queue, event); + return static_cast<StatusCode>(status); +} +StatusCode CLBlastDgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, + const size_t m, const size_t n, const size_t k, const double alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const double beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Gemm(static_cast<clblast::Layout>(layout), + static_cast<clblast::Transpose>(a_transpose), + static_cast<clblast::Transpose>(b_transpose), + m, n, k, alpha, + a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, beta, + c_buffer, c_offset, c_ld, queue, event); + return static_cast<StatusCode>(status); +} +StatusCode CLBlastCgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, + const size_t m, const size_t n, const size_t k, const cl_float2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_float2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event) { + auto cl_alpha = clblast::float2{alpha.s[0], alpha.s[1]}; + auto cl_beta = clblast::float2{beta.s[0], beta.s[1]}; + auto status = clblast::Gemm(static_cast<clblast::Layout>(layout), + static_cast<clblast::Transpose>(a_transpose), + static_cast<clblast::Transpose>(b_transpose), + m, n, k, cl_alpha, + a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_beta, + c_buffer, c_offset, c_ld, queue, event); + return static_cast<StatusCode>(status); +} +StatusCode CLBlastZgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose, + const size_t m, const size_t n, const size_t k, const cl_double2 alpha, + const cl_mem a_buffer, const size_t a_offset, const size_t a_ld, + const cl_mem b_buffer, const size_t b_offset, const size_t b_ld, const cl_double2 beta, + cl_mem c_buffer, const size_t c_offset, const size_t c_ld, + cl_command_queue* queue, cl_event* event) { + auto cl_alpha = clblast::double2{alpha.s[0], alpha.s[1]}; + auto cl_beta = clblast::double2{beta.s[0], beta.s[1]}; + auto status = clblast::Gemm(static_cast<clblast::Layout>(layout), + static_cast<clblast::Transpose>(a_transpose), + static_cast<clblast::Transpose>(b_transpose), + m, n, k, cl_alpha, + a_buffer, a_offset, a_ld, b_buffer, b_offset, b_ld, cl_beta, + c_buffer, c_offset, c_ld, queue, event); + return static_cast<StatusCode>(status); +} + +// ================================================================================================= |