From ef5008f5e46c4fe6d3728beff1d3277d02aae099 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Wed, 31 Jan 2018 20:41:02 +0100 Subject: Created the API and stubs for the HAD (hadamard-product) routines --- include/clblast.h | 10 ++++++++++ include/clblast_c.h | 37 +++++++++++++++++++++++++++++++++++++ include/clblast_cuda.h | 10 ++++++++++ include/clblast_netlib_c.h | 26 ++++++++++++++++++++++++++ 4 files changed, 83 insertions(+) (limited to 'include') diff --git a/include/clblast.h b/include/clblast.h index c4ff5290..9d3b9ea0 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -610,6 +610,16 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, c // Extra non-BLAS routines (level-X) // ================================================================================================= +// Element-wise vector product (Hadamard): SHAD/DHAD/CHAD/ZHAD/HHAD +template +StatusCode Had(const size_t n, + const T alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const T beta, + cl_mem z_buffer, const size_t z_offset, const size_t z_inc, + cl_command_queue* queue, cl_event* event = nullptr); + // Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY template StatusCode Omatcopy(const Layout layout, const Transpose a_transpose, diff --git a/include/clblast_c.h b/include/clblast_c.h index f1fc5371..a00aca45 100644 --- a/include/clblast_c.h +++ b/include/clblast_c.h @@ -1318,6 +1318,43 @@ CLBlastStatusCode PUBLIC_API CLBlastZtrsm(const CLBlastLayout layout, const CLBl // Extra non-BLAS routines (level-X) // ================================================================================================= +// Element-wise vector product (Hadamard): SHAD/DHAD/CHAD/ZHAD/HHAD +CLBlastStatusCode PUBLIC_API CLBlastShad(const size_t n, + const float alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const float beta, + cl_mem z_buffer, const size_t z_offset, const size_t z_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastDhad(const size_t n, + const double alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const double beta, + cl_mem z_buffer, const size_t z_offset, const size_t z_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastChad(const size_t n, + const cl_float2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const cl_float2 beta, + cl_mem z_buffer, const size_t z_offset, const size_t z_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastZhad(const size_t n, + const cl_double2 alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const cl_double2 beta, + cl_mem z_buffer, const size_t z_offset, const size_t z_inc, + cl_command_queue* queue, cl_event* event); +CLBlastStatusCode PUBLIC_API CLBlastHhad(const size_t n, + const cl_half alpha, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + const cl_half beta, + cl_mem z_buffer, const size_t z_offset, const size_t z_inc, + cl_command_queue* queue, cl_event* event); + // Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY CLBlastStatusCode PUBLIC_API CLBlastSomatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const size_t m, const size_t n, diff --git a/include/clblast_cuda.h b/include/clblast_cuda.h index ed348efe..d82ee331 100644 --- a/include/clblast_cuda.h +++ b/include/clblast_cuda.h @@ -582,6 +582,16 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, c // Extra non-BLAS routines (level-X) // ================================================================================================= +// Element-wise vector product (Hadamard): SHAD/DHAD/CHAD/ZHAD/HHAD +template +StatusCode Had(const size_t n, + const T alpha, + const CUdeviceptr x_buffer, const size_t x_offset, const size_t x_inc, + const CUdeviceptr y_buffer, const size_t y_offset, const size_t y_inc, + const T beta, + CUdeviceptr z_buffer, const size_t z_offset, const size_t z_inc, + const CUcontext context, const CUdevice device); + // Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY template StatusCode Omatcopy(const Layout layout, const Transpose a_transpose, diff --git a/include/clblast_netlib_c.h b/include/clblast_netlib_c.h index 8637ac3e..b64b82eb 100644 --- a/include/clblast_netlib_c.h +++ b/include/clblast_netlib_c.h @@ -898,6 +898,32 @@ void PUBLIC_API cblas_ztrsm(const CLBlastLayout layout, const CLBlastSide side, // Extra non-BLAS routines (level-X) // ================================================================================================= +// Element-wise vector product (Hadamard): SHAD/DHAD/CHAD/ZHAD/HHAD +void PUBLIC_API cblas_shad(const int n, + const float alpha, + const float* x, const int x_inc, + const float* y, const int y_inc, + const float beta, + float* z, const int z_inc); +void PUBLIC_API cblas_dhad(const int n, + const double alpha, + const double* x, const int x_inc, + const double* y, const int y_inc, + const double beta, + double* z, const int z_inc); +void PUBLIC_API cblas_chad(const int n, + const void* alpha, + const void* x, const int x_inc, + const void* y, const int y_inc, + const void* beta, + void* z, const int z_inc); +void PUBLIC_API cblas_zhad(const int n, + const void* alpha, + const void* x, const int x_inc, + const void* y, const int y_inc, + const void* beta, + void* z, const int z_inc); + // Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY void PUBLIC_API cblas_somatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose, const int m, const int n, -- cgit v1.2.3