summaryrefslogtreecommitdiff
path: root/src/routines/level1
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-05-13 20:49:34 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-05-13 20:49:34 +0200
commit120c31a30f933eea12d4dfffd4951fa22102ef5f (patch)
tree853aa6fae0522c9e92fce266c5fddb12a19dafd3 /src/routines/level1
parentf2ba75890c522b4fe1762bfeac3e08667cf9588a (diff)
Initial experimental version of the half-precision HAXPY routine
Diffstat (limited to 'src/routines/level1')
-rw-r--r--src/routines/level1/xaxpy.cc6
1 files changed, 4 insertions, 2 deletions
diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc
index 96809a57..b7956bf2 100644
--- a/src/routines/level1/xaxpy.cc
+++ b/src/routines/level1/xaxpy.cc
@@ -20,6 +20,7 @@ namespace clblast {
// =================================================================================================
// Specific implementations to get the memory-type based on a template argument
+template <> const Precision Xaxpy<half>::precision_ = Precision::kHalf;
template <> const Precision Xaxpy<float>::precision_ = Precision::kSingle;
template <> const Precision Xaxpy<double>::precision_ = Precision::kDouble;
template <> const Precision Xaxpy<float2>::precision_ = Precision::kComplexSingle;
@@ -70,13 +71,13 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha,
// Sets the kernel arguments
if (use_fast_kernel) {
kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, alpha);
+ kernel.SetArgument(1, static_cast<typename RealArg<T>::Type>(alpha));
kernel.SetArgument(2, x_buffer());
kernel.SetArgument(3, y_buffer());
}
else {
kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, alpha);
+ kernel.SetArgument(1, static_cast<typename RealArg<T>::Type>(alpha));
kernel.SetArgument(2, x_buffer());
kernel.SetArgument(3, static_cast<int>(x_offset));
kernel.SetArgument(4, static_cast<int>(x_inc));
@@ -107,6 +108,7 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha,
// =================================================================================================
// Compiles the templated class
+template class Xaxpy<half>;
template class Xaxpy<float>;
template class Xaxpy<double>;
template class Xaxpy<float2>;