Implemented the XHAD Hadamard product routine

author: Cedric Nugteren <web@cedricnugteren.nl> 2018-02-02 21:18:37 +0100
committer: Cedric Nugteren <web@cedricnugteren.nl> 2018-02-02 21:18:37 +0100
commit: 69ed46c8da69ee18338eca5102ead43410cc01b5 (patch)
tree: 575c4eef2a3210117a574f25f81662c503ec207d /test
parent: ae66782eabc574a507b8cfe2b83f2df23b1a36c1 (diff)
1 files changed, 33 insertions, 15 deletions
diff --git a/test/routines/levelx/xhad.hpp b/test/routines/levelx/xhad.hpp
index fc47a7d6..3e40de87 100644
--- a/test/routines/levelx/xhad.hpp
+++ b/test/routines/levelx/xhad.hpp
@@ -21,12 +21,43 @@
 namespace clblast {
 // =================================================================================================
 
+template <typename T>
+StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host) {
+  for (auto index = size_t{0}; index < args.n; ++index) {
+    const auto x = buffers_host.x_vec[index * args.x_inc + args.x_offset];
+    const auto y = buffers_host.y_vec[index * args.y_inc + args.y_offset];
+    const auto z = buffers_host.c_mat[index]; // * args.z_inc + args.z_offset];
+    buffers_host.c_mat[index] = args.alpha * x * y + args.beta * z;
+  }
+  return StatusCode::kSuccess;
+}
+
+// Half-precision version calling the above reference implementation after conversions
+template <>
+StatusCode RunReference<half>(const Arguments<half> &args, BuffersHost<half> &buffers_host) {
+  auto x_buffer2 = HalfToFloatBuffer(buffers_host.x_vec);
+  auto y_buffer2 = HalfToFloatBuffer(buffers_host.y_vec);
+  auto c_buffer2 = HalfToFloatBuffer(buffers_host.c_mat);
+  auto dummy = std::vector<float>(0);
+  auto buffers2 = BuffersHost<float>{x_buffer2, y_buffer2, dummy, dummy, c_buffer2, dummy, dummy};
+  auto args2 = Arguments<float>();
+  args2.x_size = args.x_size; args2.y_size = args.y_size; args2.c_size = args.c_size;
+  args2.x_inc = args.x_inc; args2.y_inc = args.y_inc; args2.n = args.n;
+  args2.x_offset = args.x_offset; args2.y_offset = args.y_offset;
+  args2.alpha = HalfToFloat(args.alpha); args2.beta = HalfToFloat(args.beta);
+  auto status = RunReference(args2, buffers2);
+  FloatToHalfBuffer(buffers_host.c_mat, buffers2.c_mat);
+  return status;
+}
+
+// =================================================================================================
+
 // See comment at top of file for a description of the class
 template <typename T>
 class TestXhad {
 public:
 
-  // The BLAS level: 4 for the extra routines
+  // The BLAS level: 4 for the extra routines (note: tested with matrix-size values for 'n')
   static size_t BLASLevel() { return 4; }
 
   // The list of arguments relevant for this routine
@@ -34,7 +65,7 @@ public:
     return {kArgN,
             kArgXInc, kArgYInc,
             kArgXOffset, kArgYOffset,
-            kArgAlpha};
+            kArgAlpha, kArgBeta};
   }
   static std::vector<std::string> BuffersIn() { return {kBufVecX, kBufVecY, kBufMatC}; }
   static std::vector<std::string> BuffersOut() { return {kBufMatC}; }
@@ -135,19 +166,6 @@ public:
 };
 
 // =================================================================================================
-
-template <typename T>
-StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host) {
-  for (auto index = size_t{0}; index < args.n; ++index) {
-    const auto x = buffers_host.x_vec[index * args.x_inc + args.x_offset];
-    const auto y = buffers_host.y_vec[index * args.y_inc + args.y_offset];
-    const auto z = buffers_host.c_mat[index]; // * args.z_inc + args.z_offset];
-    buffers_host.c_mat[index] = x * y * args.alpha + z * args.beta;
-  }
-  return StatusCode::kSuccess;
-}
-
-// =================================================================================================
 } // namespace clblast
 
 // CLBLAST_TEST_ROUTINES_XHAD_H_
author	Cedric Nugteren <web@cedricnugteren.nl>	2018-02-02 21:18:37 +0100
committer	Cedric Nugteren <web@cedricnugteren.nl>	2018-02-02 21:18:37 +0100
commit	69ed46c8da69ee18338eca5102ead43410cc01b5 (patch)
tree	575c4eef2a3210117a574f25f81662c503ec207d /test
parent	ae66782eabc574a507b8cfe2b83f2df23b1a36c1 (diff)