summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-01-31 20:41:02 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2018-01-31 20:41:02 +0100
commitef5008f5e46c4fe6d3728beff1d3277d02aae099 (patch)
tree3b01fe2150bd394dbf3a8b411d30de63145243f6 /include
parent37c5e8f58c8c6a1f8888938baa67691f8ecddaf4 (diff)
Created the API and stubs for the HAD (hadamard-product) routines
Diffstat (limited to 'include')
-rw-r--r--include/clblast.h10
-rw-r--r--include/clblast_c.h37
-rw-r--r--include/clblast_cuda.h10
-rw-r--r--include/clblast_netlib_c.h26
4 files changed, 83 insertions, 0 deletions
diff --git a/include/clblast.h b/include/clblast.h
index c4ff5290..9d3b9ea0 100644
--- a/include/clblast.h
+++ b/include/clblast.h
@@ -610,6 +610,16 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, c
// Extra non-BLAS routines (level-X)
// =================================================================================================
+// Element-wise vector product (Hadamard): SHAD/DHAD/CHAD/ZHAD/HHAD
+template <typename T>
+StatusCode Had(const size_t n,
+ const T alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const T beta,
+ cl_mem z_buffer, const size_t z_offset, const size_t z_inc,
+ cl_command_queue* queue, cl_event* event = nullptr);
+
// Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY
template <typename T>
StatusCode Omatcopy(const Layout layout, const Transpose a_transpose,
diff --git a/include/clblast_c.h b/include/clblast_c.h
index f1fc5371..a00aca45 100644
--- a/include/clblast_c.h
+++ b/include/clblast_c.h
@@ -1318,6 +1318,43 @@ CLBlastStatusCode PUBLIC_API CLBlastZtrsm(const CLBlastLayout layout, const CLBl
// Extra non-BLAS routines (level-X)
// =================================================================================================
+// Element-wise vector product (Hadamard): SHAD/DHAD/CHAD/ZHAD/HHAD
+CLBlastStatusCode PUBLIC_API CLBlastShad(const size_t n,
+ const float alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const float beta,
+ cl_mem z_buffer, const size_t z_offset, const size_t z_inc,
+ cl_command_queue* queue, cl_event* event);
+CLBlastStatusCode PUBLIC_API CLBlastDhad(const size_t n,
+ const double alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const double beta,
+ cl_mem z_buffer, const size_t z_offset, const size_t z_inc,
+ cl_command_queue* queue, cl_event* event);
+CLBlastStatusCode PUBLIC_API CLBlastChad(const size_t n,
+ const cl_float2 alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const cl_float2 beta,
+ cl_mem z_buffer, const size_t z_offset, const size_t z_inc,
+ cl_command_queue* queue, cl_event* event);
+CLBlastStatusCode PUBLIC_API CLBlastZhad(const size_t n,
+ const cl_double2 alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const cl_double2 beta,
+ cl_mem z_buffer, const size_t z_offset, const size_t z_inc,
+ cl_command_queue* queue, cl_event* event);
+CLBlastStatusCode PUBLIC_API CLBlastHhad(const size_t n,
+ const cl_half alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ const cl_half beta,
+ cl_mem z_buffer, const size_t z_offset, const size_t z_inc,
+ cl_command_queue* queue, cl_event* event);
+
// Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY
CLBlastStatusCode PUBLIC_API CLBlastSomatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose,
const size_t m, const size_t n,
diff --git a/include/clblast_cuda.h b/include/clblast_cuda.h
index ed348efe..d82ee331 100644
--- a/include/clblast_cuda.h
+++ b/include/clblast_cuda.h
@@ -582,6 +582,16 @@ StatusCode Trsm(const Layout layout, const Side side, const Triangle triangle, c
// Extra non-BLAS routines (level-X)
// =================================================================================================
+// Element-wise vector product (Hadamard): SHAD/DHAD/CHAD/ZHAD/HHAD
+template <typename T>
+StatusCode Had(const size_t n,
+ const T alpha,
+ const CUdeviceptr x_buffer, const size_t x_offset, const size_t x_inc,
+ const CUdeviceptr y_buffer, const size_t y_offset, const size_t y_inc,
+ const T beta,
+ CUdeviceptr z_buffer, const size_t z_offset, const size_t z_inc,
+ const CUcontext context, const CUdevice device);
+
// Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY
template <typename T>
StatusCode Omatcopy(const Layout layout, const Transpose a_transpose,
diff --git a/include/clblast_netlib_c.h b/include/clblast_netlib_c.h
index 8637ac3e..b64b82eb 100644
--- a/include/clblast_netlib_c.h
+++ b/include/clblast_netlib_c.h
@@ -898,6 +898,32 @@ void PUBLIC_API cblas_ztrsm(const CLBlastLayout layout, const CLBlastSide side,
// Extra non-BLAS routines (level-X)
// =================================================================================================
+// Element-wise vector product (Hadamard): SHAD/DHAD/CHAD/ZHAD/HHAD
+void PUBLIC_API cblas_shad(const int n,
+ const float alpha,
+ const float* x, const int x_inc,
+ const float* y, const int y_inc,
+ const float beta,
+ float* z, const int z_inc);
+void PUBLIC_API cblas_dhad(const int n,
+ const double alpha,
+ const double* x, const int x_inc,
+ const double* y, const int y_inc,
+ const double beta,
+ double* z, const int z_inc);
+void PUBLIC_API cblas_chad(const int n,
+ const void* alpha,
+ const void* x, const int x_inc,
+ const void* y, const int y_inc,
+ const void* beta,
+ void* z, const int z_inc);
+void PUBLIC_API cblas_zhad(const int n,
+ const void* alpha,
+ const void* x, const int x_inc,
+ const void* y, const int y_inc,
+ const void* beta,
+ void* z, const int z_inc);
+
// Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY
void PUBLIC_API cblas_somatcopy(const CLBlastLayout layout, const CLBlastTranspose a_transpose,
const int m, const int n,