diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-05-13 20:49:34 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-05-13 20:49:34 +0200 |
commit | 120c31a30f933eea12d4dfffd4951fa22102ef5f (patch) | |
tree | 853aa6fae0522c9e92fce266c5fddb12a19dafd3 /src/routines/level1 | |
parent | f2ba75890c522b4fe1762bfeac3e08667cf9588a (diff) |
Initial experimental version of the half-precision HAXPY routine
Diffstat (limited to 'src/routines/level1')
-rw-r--r-- | src/routines/level1/xaxpy.cc | 6 |
1 files changed, 4 insertions, 2 deletions
diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc index 96809a57..b7956bf2 100644 --- a/src/routines/level1/xaxpy.cc +++ b/src/routines/level1/xaxpy.cc @@ -20,6 +20,7 @@ namespace clblast { // ================================================================================================= // Specific implementations to get the memory-type based on a template argument +template <> const Precision Xaxpy<half>::precision_ = Precision::kHalf; template <> const Precision Xaxpy<float>::precision_ = Precision::kSingle; template <> const Precision Xaxpy<double>::precision_ = Precision::kDouble; template <> const Precision Xaxpy<float2>::precision_ = Precision::kComplexSingle; @@ -70,13 +71,13 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha, // Sets the kernel arguments if (use_fast_kernel) { kernel.SetArgument(0, static_cast<int>(n)); - kernel.SetArgument(1, alpha); + kernel.SetArgument(1, static_cast<typename RealArg<T>::Type>(alpha)); kernel.SetArgument(2, x_buffer()); kernel.SetArgument(3, y_buffer()); } else { kernel.SetArgument(0, static_cast<int>(n)); - kernel.SetArgument(1, alpha); + kernel.SetArgument(1, static_cast<typename RealArg<T>::Type>(alpha)); kernel.SetArgument(2, x_buffer()); kernel.SetArgument(3, static_cast<int>(x_offset)); kernel.SetArgument(4, static_cast<int>(x_inc)); @@ -107,6 +108,7 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha, // ================================================================================================= // Compiles the templated class +template class Xaxpy<half>; template class Xaxpy<float>; template class Xaxpy<double>; template class Xaxpy<float2>; |