summaryrefslogtreecommitdiff
path: root/external/clBLAS/src/client/clGemm.h
diff options
context:
space:
mode:
Diffstat (limited to 'external/clBLAS/src/client/clGemm.h')
-rw-r--r--external/clBLAS/src/client/clGemm.h627
1 files changed, 0 insertions, 627 deletions
diff --git a/external/clBLAS/src/client/clGemm.h b/external/clBLAS/src/client/clGemm.h
deleted file mode 100644
index 8f201b7c..00000000
--- a/external/clBLAS/src/client/clGemm.h
+++ /dev/null
@@ -1,627 +0,0 @@
-/* ************************************************************************
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ************************************************************************/
-
-#include <iostream>
-#include <clBLAS.h>
-#include <string>
-#include <map>
-
-cl_int gemm_err;
-
-std::string prettyPrintClStatus( const cl_int& status )
-{
- switch( status )
- {
- case CL_INVALID_GLOBAL_WORK_SIZE:
- return "CL_INVALID_GLOBAL_WORK_SIZE";
- case CL_INVALID_MIP_LEVEL:
- return "CL_INVALID_MIP_LEVEL";
- case CL_INVALID_BUFFER_SIZE:
- return "CL_INVALID_BUFFER_SIZE";
- case CL_INVALID_GL_OBJECT:
- return "CL_INVALID_GL_OBJECT";
- case CL_INVALID_OPERATION:
- return "CL_INVALID_OPERATION";
- case CL_INVALID_EVENT:
- return "CL_INVALID_EVENT";
- case CL_INVALID_EVENT_WAIT_LIST:
- return "CL_INVALID_EVENT_WAIT_LIST";
- case CL_INVALID_GLOBAL_OFFSET:
- return "CL_INVALID_GLOBAL_OFFSET";
- case CL_INVALID_WORK_ITEM_SIZE:
- return "CL_INVALID_WORK_ITEM_SIZE";
- case CL_INVALID_WORK_GROUP_SIZE:
- return "CL_INVALID_WORK_GROUP_SIZE";
- case CL_INVALID_WORK_DIMENSION:
- return "CL_INVALID_WORK_DIMENSION";
- case CL_INVALID_KERNEL_ARGS:
- return "CL_INVALID_KERNEL_ARGS";
- case CL_INVALID_ARG_SIZE:
- return "CL_INVALID_ARG_SIZE";
- case CL_INVALID_ARG_VALUE:
- return "CL_INVALID_ARG_VALUE";
- case CL_INVALID_ARG_INDEX:
- return "CL_INVALID_ARG_INDEX";
- case CL_INVALID_KERNEL:
- return "CL_INVALID_KERNEL";
- case CL_INVALID_KERNEL_DEFINITION:
- return "CL_INVALID_KERNEL_DEFINITION";
- case CL_INVALID_KERNEL_NAME:
- return "CL_INVALID_KERNEL_NAME";
- case CL_INVALID_PROGRAM_EXECUTABLE:
- return "CL_INVALID_PROGRAM_EXECUTABLE";
- case CL_INVALID_PROGRAM:
- return "CL_INVALID_PROGRAM";
- case CL_INVALID_BUILD_OPTIONS:
- return "CL_INVALID_BUILD_OPTIONS";
- case CL_INVALID_BINARY:
- return "CL_INVALID_BINARY";
- case CL_INVALID_SAMPLER:
- return "CL_INVALID_SAMPLER";
- case CL_INVALID_IMAGE_SIZE:
- return "CL_INVALID_IMAGE_SIZE";
- case CL_INVALID_IMAGE_FORMAT_DESCRIPTOR:
- return "CL_INVALID_IMAGE_FORMAT_DESCRIPTOR";
- case CL_INVALID_MEM_OBJECT:
- return "CL_INVALID_MEM_OBJECT";
- case CL_INVALID_HOST_PTR:
- return "CL_INVALID_HOST_PTR";
- case CL_INVALID_COMMAND_QUEUE:
- return "CL_INVALID_COMMAND_QUEUE";
- case CL_INVALID_QUEUE_PROPERTIES:
- return "CL_INVALID_QUEUE_PROPERTIES";
- case CL_INVALID_CONTEXT:
- return "CL_INVALID_CONTEXT";
- case CL_INVALID_DEVICE:
- return "CL_INVALID_DEVICE";
- case CL_INVALID_PLATFORM:
- return "CL_INVALID_PLATFORM";
- case CL_INVALID_DEVICE_TYPE:
- return "CL_INVALID_DEVICE_TYPE";
- case CL_INVALID_VALUE:
- return "CL_INVALID_VALUE";
- case CL_MAP_FAILURE:
- return "CL_MAP_FAILURE";
- case CL_BUILD_PROGRAM_FAILURE:
- return "CL_BUILD_PROGRAM_FAILURE";
- case CL_IMAGE_FORMAT_NOT_SUPPORTED:
- return "CL_IMAGE_FORMAT_NOT_SUPPORTED";
- case CL_IMAGE_FORMAT_MISMATCH:
- return "CL_IMAGE_FORMAT_MISMATCH";
- case CL_MEM_COPY_OVERLAP:
- return "CL_MEM_COPY_OVERLAP";
- case CL_PROFILING_INFO_NOT_AVAILABLE:
- return "CL_PROFILING_INFO_NOT_AVAILABLE";
- case CL_OUT_OF_HOST_MEMORY:
- return "CL_OUT_OF_HOST_MEMORY";
- case CL_OUT_OF_RESOURCES:
- return "CL_OUT_OF_RESOURCES";
- case CL_MEM_OBJECT_ALLOCATION_FAILURE:
- return "CL_MEM_OBJECT_ALLOCATION_FAILURE";
- case CL_COMPILER_NOT_AVAILABLE:
- return "CL_COMPILER_NOT_AVAILABLE";
- case CL_DEVICE_NOT_AVAILABLE:
- return "CL_DEVICE_NOT_AVAILABLE";
- case CL_DEVICE_NOT_FOUND:
- return "CL_DEVICE_NOT_FOUND";
- case CL_SUCCESS:
- return "CL_SUCCESS";
- default:
- return "Error code not defined";
- break;
- }
-}
-
-// This is used to either wrap an OpenCL function call, or to explicitly check a variable for an OpenCL error condition.
-// If an error occurs, we throw.
-// Note: std::runtime_error does not take unicode strings as input, so only strings supported
-inline cl_int OpenCL_V_Throw( cl_int res, const std::string& msg, size_t lineno )
-{
- switch( res )
- {
- case CL_SUCCESS: /**< No error */
- break;
- default:
- {
- std::stringstream tmp;
- tmp << "OPENCL_V_THROWERROR< ";
- tmp << prettyPrintClStatus(res) ;
- tmp << " > (";
- tmp << lineno;
- tmp << "): ";
- tmp << msg;
- std::string errorm(tmp.str());
- std::cout << errorm<< std::endl;
- throw std::runtime_error( errorm );
- }
- }
-
- return res;
-}
-#define OPENCL_V_THROW(_status,_message) OpenCL_V_Throw(_status, _message, __LINE__)
-
-enum complexity_t { not_complex = 1, yes_complex = 2 };
-
-//can be cl_float, cl_double
-//TODO should be cl_float2 and cl_double2 instead of using float/double * complexity?
-template< class T >
-class buffers
-{
-public:
- size_t M, N, K;
- size_t lda, ldb, ldc;
- complexity_t complexity;
- T* A;
- T* B;
- T* C;
- cl_mem bufA, bufB, bufC;
- cl_command_queue queue;
- std::map<std::string, T*> buffer_map;
- std::map<std::string, size_t> rows_map;
- std::map<std::string, size_t> ldx_map;
-
- buffers( cl_context ctx, cl_command_queue _queue,
- size_t _M, size_t _N, size_t _K,
- size_t _lda, size_t _ldb, size_t _ldc,
- complexity_t _complexity )
- : M(_M)
- , N(_N)
- , K(_K)
- , lda(_lda)
- , ldb(_ldb)
- , ldc(_ldc)
- , complexity(_complexity)
- , A(new T[M*lda*sizeof(T)*complexity])
- , B(new T[K*ldb*sizeof(T)*complexity])
- , C(new T[M*ldc*sizeof(T)*complexity])
- , queue(_queue)
- {
- // request and initialize openCL memory
- bufA = clCreateBuffer(ctx, CL_MEM_READ_ONLY, M * lda * sizeof(*A) * complexity,
- NULL, &gemm_err);
- OPENCL_V_THROW( gemm_err, "creating buffer A" );
- bufB = clCreateBuffer(ctx, CL_MEM_READ_ONLY, K * ldb * sizeof(*B) * complexity,
- NULL, &gemm_err);
- OPENCL_V_THROW( gemm_err, "creating buffer B" );
- bufC = clCreateBuffer(ctx, CL_MEM_READ_WRITE, M * ldc * sizeof(*C) * complexity,
- NULL, &gemm_err);
- OPENCL_V_THROW( gemm_err, "creating buffer C" );
-
- buffer_map.insert(std::pair<std::string,T*>("A",A));
- buffer_map.insert(std::pair<std::string,T*>("B",B));
- buffer_map.insert(std::pair<std::string,T*>("C",C));
- rows_map.insert(std::pair<std::string,size_t>("A",M));
- rows_map.insert(std::pair<std::string,size_t>("B",K));
- rows_map.insert(std::pair<std::string,size_t>("C",M));
- ldx_map.insert(std::pair<std::string,size_t>("A",lda));
- ldx_map.insert(std::pair<std::string,size_t>("B",ldb));
- ldx_map.insert(std::pair<std::string,size_t>("C",ldc));
-
- initialize_data();
- }
-
- ~buffers()
- {
- OPENCL_V_THROW( clReleaseMemObject(bufC), "releasing buffer A");
- OPENCL_V_THROW( clReleaseMemObject(bufB), "releasing buffer B");
- OPENCL_V_THROW( clReleaseMemObject(bufA), "releasing buffer C");
- delete[] A;
- delete[] B;
- delete[] C;
- }
-
- void initialize_data()
- {
- initializeLocalMatrix("A");
- initializeLocalMatrix("B");
- initializeLocalMatrix("C");
-
- gemm_err = clEnqueueWriteBuffer(queue, bufA, CL_TRUE, 0,
- M * K * sizeof(*A) * complexity, A, 0, NULL, NULL);
- OPENCL_V_THROW( gemm_err, "writing to buffer A" );
- gemm_err = clEnqueueWriteBuffer(queue, bufB, CL_TRUE, 0,
- K * N * sizeof(*B) * complexity, A, 0, NULL, NULL);
- OPENCL_V_THROW( gemm_err, "writing to buffer B" );
- gemm_err = clEnqueueWriteBuffer(queue, bufC, CL_TRUE, 0,
- M * N * sizeof(*C) * complexity, C, 0, NULL, NULL);
- OPENCL_V_THROW( gemm_err, "writing to buffer C" );
- }
-
- void read_back_result()
- {
- OPENCL_V_THROW( clEnqueueReadBuffer(queue, bufC, CL_TRUE, 0, M * N * sizeof(*C) * complexity, C, 0, NULL, NULL),
- "reading from buffer C" );
- }
-
- void initializeLocalMatrix(std::string matrix)
- {
- for (size_t i = 0; i < rows_map[matrix]*complexity; i++) {
- for (size_t j = 0; j < ldx_map[matrix]; j++) {
- buffer_map[matrix][i * ldx_map[matrix] + j] = (i+1)*10 + (j+1);
- }
- }
- }
-
- void printLocalMatrix(std::string matrix)
- {
- for (size_t i = 0; i < rows_map[matrix]*complexity; i++) {
- for (size_t j = 0; j < ldx_map[matrix]; j++) {
- std::cout << (int)buffer_map[matrix][i * ldx_map[matrix] + j] << " ";
- }
- std::cout << std::endl;
- }
- std::cout << std::endl;
- }
-};
-
-class clGemm
-{
-public:
- size_t M;
- size_t N;
- size_t K;
- size_t lda;
- size_t ldb;
- size_t ldc;
- clblasOrder order;
- clblasTranspose transA;
- clblasTranspose transB;
- cl_context_properties props[3];
- cl_platform_id platform;
- cl_device_id device;
- cl_context ctx;
- cl_device_type deviceType;
- cl_command_queue queue;
- cl_event event;
- cl_uint commandQueueFlags;
- bool useimages;
- cl_ulong imgA;
- cl_ulong imgB;
- StatisticalTimer& timer;
- StatisticalTimer::sTimerID gemm_timer_id;
-
- clGemm( size_t _M, size_t _N, size_t _K,
- size_t _lda, size_t _ldb, size_t _ldc,
- bool _useimages,
- clblasOrder _order,
- clblasTranspose _transA, clblasTranspose _transB,
- cl_device_type _deviceType, cl_uint _commandQueueFlags,
- StatisticalTimer& _timer )
- : M(_M)
- , N(_N)
- , K(_K)
- , lda(_lda)
- , ldb(_ldb)
- , ldc(_ldc)
- , order(_order)
- , transA(_transA)
- , transB(_transB)
- , deviceType(_deviceType)
- , event(NULL)
- , commandQueueFlags(_commandQueueFlags)
- , useimages(_useimages)
- , imgA(0)
- , imgB(0)
- , timer(_timer)
- {
- props[0] = CL_CONTEXT_PLATFORM;
- props[1] = 0;
- props[2] = 0;
- OPENCL_V_THROW( clGetPlatformIDs(1, &platform, NULL), "getting platform IDs" );
- OPENCL_V_THROW( clGetDeviceIDs(platform, deviceType, 1, &device, NULL), "getting device IDs" );
- props[1] = (cl_context_properties)platform;
- ctx = clCreateContext(props, 1, &device, NULL, NULL, &gemm_err);
- OPENCL_V_THROW( gemm_err, "creating context" );
- queue = clCreateCommandQueue(ctx, device, commandQueueFlags, &gemm_err);
- OPENCL_V_THROW( gemm_err, "creating command queue" );
-
- gemm_err = clblasSetup();
- if (gemm_err != CL_SUCCESS) {
- std::cout << "clblasSetup() failed with " << gemm_err << std::endl;
- clReleaseCommandQueue(queue);
- clReleaseContext(ctx);
- exit(1);
- }
-
- if (useimages) {
- imgA = clblasAddScratchImage(ctx, 16, 64, NULL);
- imgB = clblasAddScratchImage(ctx, 16, 64, NULL);
- }
-
- gemm_timer_id = timer.getUniqueID( "clGemm", 0 );
- }
-
- ~clGemm()
- {
- if (useimages) {
- clblasRemoveScratchImage(imgA);
- clblasRemoveScratchImage(imgB);
- }
-
- clblasTeardown();
- OPENCL_V_THROW( clReleaseCommandQueue(queue), "releasing command queue" );
- OPENCL_V_THROW( clReleaseContext(ctx), "releasing context" );
- }
-
- void wait_and_check()
- {
- cl_int wait_status = clWaitForEvents(1, &event);
-
- if( wait_status != CL_SUCCESS )
- {
- if( wait_status == CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST )
- {
- clGetEventInfo( event, CL_EVENT_COMMAND_EXECUTION_STATUS, sizeof(cl_int), &gemm_err, NULL );
- std::cout << "blas function execution status error: " << gemm_err << std::endl;
- exit(1);
- }
- else
- {
- std::cout << "blas function wait status error: " << wait_status << std::endl;
- exit(1);
- }
- }
- }
-
- double time_in_ns()
- {
- StatisticalTimer& timer = StatisticalTimer::getInstance( );
- return timer.getAverageTime( gemm_timer_id ) * 1e9;
- }
-
- virtual void call_gemm() = 0;
- virtual void clear_buffers() = 0;
- virtual double gflops() = 0;
- virtual std::string gflops_formula() = 0;
-};
-
-class clSgemm : public clGemm
-{
-public:
- cl_float alpha;
- cl_float beta;
- buffers<cl_float> mybuffers;
-
- clSgemm( size_t _M, size_t _N, size_t _K,
- size_t _lda, size_t _ldb, size_t _ldc,
- bool _useimages,
- clblasOrder _order,
- clblasTranspose _transA, clblasTranspose _transB,
- cl_float _alpha, cl_float _beta,
- cl_device_type _deviceType, cl_uint _commandQueueFlags,
- StatisticalTimer& _timer)
- : clGemm( _M, _N, _K,
- _lda, _ldb, _ldc,
- _useimages, _order, _transA, _transB,
- _deviceType, _commandQueueFlags, _timer )
- , alpha(_alpha)
- , beta(_beta)
- , mybuffers( ctx, queue, M, N, K, lda, ldb, ldc, not_complex )
- {}
-
- void call_gemm()
- {
- timer.Start(gemm_timer_id);
- OPENCL_V_THROW( clblasSgemm(order, transA, transB,
- M, N, K,
- alpha,
- mybuffers.bufA, lda,
- mybuffers.bufB, ldb,
- beta,
- mybuffers.bufC, ldc,
- 1, &queue, 0, NULL, &event),
- "clblasSgemm" );
- wait_and_check();
- timer.Stop(gemm_timer_id);
- //mybuffers.read_back_result();
- //mybuffers.printLocalMatrix("C");
- }
-
- void clear_buffers()
- {
- mybuffers.initialize_data();
- }
-
- double gflops()
- {
- return (2*M*N*K)/time_in_ns();
- }
-
- std::string gflops_formula()
- {
- return "(2*M*N*K)/time_in_ns";
- }
-};
-
-class clDgemm : public clGemm
-{
-public:
- cl_double alpha;
- cl_double beta;
- buffers<cl_double> mybuffers;
-
- clDgemm( size_t _M, size_t _N, size_t _K,
- size_t _lda, size_t _ldb, size_t _ldc,
- bool _useimages,
- clblasOrder _order,
- clblasTranspose _transA, clblasTranspose _transB,
- cl_double _alpha, cl_double _beta,
- cl_device_type _deviceType, cl_uint _commandQueueFlags,
- StatisticalTimer& _timer)
- : clGemm( _M, _N, _K,
- _lda, _ldb, _ldc,
- _useimages, _order, _transA, _transB,
- _deviceType, _commandQueueFlags, _timer )
- , alpha(_alpha)
- , beta(_beta)
- , mybuffers( ctx, queue, M, N, K, lda, ldb, ldc, not_complex )
- {}
-
- void call_gemm()
- {
- timer.Start(gemm_timer_id);
- OPENCL_V_THROW( clblasDgemm(order, transA, transB,
- M, N, K,
- alpha,
- mybuffers.bufA, lda,
- mybuffers.bufB, ldb,
- beta,
- mybuffers.bufC, ldc,
- 1, &queue, 0, NULL, &event),
- "clblasDgemm" );
- wait_and_check();
- timer.Stop(gemm_timer_id);
- //mybuffers.read_back_result();
- //mybuffers.printLocalMatrix("C");
- }
-
- void clear_buffers()
- {
- mybuffers.initialize_data();
- }
-
- double gflops()
- {
- return (2*M*N*K)/time_in_ns();
- }
-
- std::string gflops_formula()
- {
- return "(2*M*N*K)/time_in_ns";
- }
-};
-
-class clCgemm : public clGemm
-{
-public:
- cl_float2 alpha;
- cl_float2 beta;
- buffers<cl_float> mybuffers;
-
- clCgemm( size_t _M, size_t _N, size_t _K,
- size_t _lda, size_t _ldb, size_t _ldc,
- bool _useimages,
- clblasOrder _order,
- clblasTranspose _transA, clblasTranspose _transB,
- cl_float _alpha, cl_float _beta,
- cl_device_type _deviceType, cl_uint _commandQueueFlags,
- StatisticalTimer& _timer)
- : clGemm( _M, _N, _K,
- _lda, _ldb, _ldc,
- _useimages, _order, _transA, _transB,
- _deviceType, _commandQueueFlags, _timer )
- , mybuffers( ctx, queue, M, N, K, lda, ldb, ldc, yes_complex )
- {
- alpha.s[0] = _alpha;
- alpha.s[1] = _alpha;
- beta.s[0] = _beta;
- beta.s[1] = _beta;
- }
-
- void call_gemm()
- {
- timer.Start(gemm_timer_id);
- OPENCL_V_THROW( clblasCgemm(order, transA, transB,
- M, N, K,
- alpha,
- mybuffers.bufA, lda,
- mybuffers.bufB, ldb,
- beta,
- mybuffers.bufC, ldc,
- 1, &queue, 0, NULL, &event),
- "clblasCgemm" );
- wait_and_check();
- timer.Stop(gemm_timer_id);
- //mybuffers.read_back_result();
- //mybuffers.printLocalMatrix("C");
- }
-
- void clear_buffers()
- {
- mybuffers.initialize_data();
- }
-
- double gflops()
- {
- return (8*M*N*K)/time_in_ns();
- }
-
- std::string gflops_formula()
- {
- return "(8*M*N*K)/time_in_ns";
- }
-};
-
-class clZgemm : public clGemm
-{
-public:
- cl_double2 alpha;
- cl_double2 beta;
- buffers<cl_double> mybuffers;
-
- clZgemm( size_t _M, size_t _N, size_t _K,
- size_t _lda, size_t _ldb, size_t _ldc,
- bool _useimages,
- clblasOrder _order,
- clblasTranspose _transA, clblasTranspose _transB,
- cl_double _alpha, cl_double _beta,
- cl_device_type _deviceType, cl_uint _commandQueueFlags,
- StatisticalTimer& _timer)
- : clGemm( _M, _N, _K,
- _lda, _ldb, _ldc,
- _useimages, _order, _transA, _transB,
- _deviceType, _commandQueueFlags, _timer )
- , mybuffers( ctx, queue, M, N, K, lda, ldb, ldc, yes_complex )
- {
- alpha.s[0] = _alpha;
- alpha.s[1] = _alpha;
- beta.s[0] = _beta;
- beta.s[1] = _beta;
- }
-
- void call_gemm()
- {
- timer.Start(gemm_timer_id);
- OPENCL_V_THROW( clblasZgemm(order, transA, transB,
- M, N, K,
- alpha,
- mybuffers.bufA, lda,
- mybuffers.bufB, ldb,
- beta,
- mybuffers.bufC, ldc,
- 1, &queue, 0, NULL, &event),
- "clblasZgemm" );
- wait_and_check();
- timer.Stop(gemm_timer_id);
- //mybuffers.read_back_result();
- //mybuffers.printLocalMatrix("C");
- }
-
- void clear_buffers()
- {
- mybuffers.initialize_data();
- }
-
- double gflops()
- {
- return (8*M*N*K)/time_in_ns();
- }
-
- std::string gflops_formula()
- {
- return "(8*M*N*K)/time_in_ns";
- }
-};