summaryrefslogtreecommitdiff
path: root/src/clblast_cuda.cpp
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-01-31 20:41:02 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2018-01-31 20:41:02 +0100
commitef5008f5e46c4fe6d3728beff1d3277d02aae099 (patch)
tree3b01fe2150bd394dbf3a8b411d30de63145243f6 /src/clblast_cuda.cpp
parent37c5e8f58c8c6a1f8888938baa67691f8ecddaf4 (diff)
Created the API and stubs for the HAD (hadamard-product) routines
Diffstat (limited to 'src/clblast_cuda.cpp')
-rw-r--r--src/clblast_cuda.cpp59
1 files changed, 59 insertions, 0 deletions
diff --git a/src/clblast_cuda.cpp b/src/clblast_cuda.cpp
index 0aa57087..0ba57056 100644
--- a/src/clblast_cuda.cpp
+++ b/src/clblast_cuda.cpp
@@ -2201,6 +2201,65 @@ template StatusCode PUBLIC_API Trsm<double2>(const Layout, const Side, const Tri
// Extra non-BLAS routines (level-X)
// =================================================================================================
+// Element-wise vector product (Hadamard): SHAD/DHAD/CHAD/ZHAD/HHAD
+template <typename T>
+StatusCode Had(const size_t n,
+ const T alpha,
+ const CUdeviceptr x_buffer, const size_t x_offset, const size_t x_inc,
+ const CUdeviceptr y_buffer, const size_t y_offset, const size_t y_inc,
+ const T beta,
+ CUdeviceptr z_buffer, const size_t z_offset, const size_t z_inc,
+ const CUcontext context, const CUdevice device) {
+ try {
+ const auto context_cpp = Context(context);
+ const auto device_cpp = Device(device);
+ auto queue_cpp = Queue(context_cpp, device_cpp);
+ auto routine = Xhad<T>(queue_cpp, nullptr);
+ routine.DoHad(n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc,
+ beta,
+ Buffer<T>(z_buffer), z_offset, z_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
+}
+template StatusCode PUBLIC_API Had<float>(const size_t,
+ const float,
+ const CUdeviceptr, const size_t, const size_t,
+ const CUdeviceptr, const size_t, const size_t,
+ const float,
+ CUdeviceptr, const size_t, const size_t,
+ const CUcontext, const CUdevice);
+template StatusCode PUBLIC_API Had<double>(const size_t,
+ const double,
+ const CUdeviceptr, const size_t, const size_t,
+ const CUdeviceptr, const size_t, const size_t,
+ const double,
+ CUdeviceptr, const size_t, const size_t,
+ const CUcontext, const CUdevice);
+template StatusCode PUBLIC_API Had<float2>(const size_t,
+ const float2,
+ const CUdeviceptr, const size_t, const size_t,
+ const CUdeviceptr, const size_t, const size_t,
+ const float2,
+ CUdeviceptr, const size_t, const size_t,
+ const CUcontext, const CUdevice);
+template StatusCode PUBLIC_API Had<double2>(const size_t,
+ const double2,
+ const CUdeviceptr, const size_t, const size_t,
+ const CUdeviceptr, const size_t, const size_t,
+ const double2,
+ CUdeviceptr, const size_t, const size_t,
+ const CUcontext, const CUdevice);
+template StatusCode PUBLIC_API Had<half>(const size_t,
+ const half,
+ const CUdeviceptr, const size_t, const size_t,
+ const CUdeviceptr, const size_t, const size_t,
+ const half,
+ CUdeviceptr, const size_t, const size_t,
+ const CUcontext, const CUdevice);
+
// Scaling and out-place transpose/copy (non-BLAS function): SOMATCOPY/DOMATCOPY/COMATCOPY/ZOMATCOPY/HOMATCOPY
template <typename T>
StatusCode Omatcopy(const Layout layout, const Transpose a_transpose,