From f70ded34f3000dee742fe23631f3066c8f486dfd Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 22 May 2016 14:26:19 +0200 Subject: Added half-precision support for all level 1 routines --- src/routines/level1/xamax.cc | 2 ++ src/routines/level1/xasum.cc | 2 ++ src/routines/level1/xcopy.cc | 2 ++ src/routines/level1/xdot.cc | 2 ++ src/routines/level1/xnrm2.cc | 2 ++ src/routines/level1/xscal.cc | 2 ++ src/routines/level1/xswap.cc | 2 ++ src/tuning/xdot.cc | 2 +- 8 files changed, 15 insertions(+), 1 deletion(-) (limited to 'src') diff --git a/src/routines/level1/xamax.cc b/src/routines/level1/xamax.cc index 682e2b63..335e59bc 100644 --- a/src/routines/level1/xamax.cc +++ b/src/routines/level1/xamax.cc @@ -20,6 +20,7 @@ namespace clblast { // ================================================================================================= // Specific implementations to get the memory-type based on a template argument +template <> const Precision Xamax::precision_ = Precision::kHalf; template <> const Precision Xamax::precision_ = Precision::kSingle; template <> const Precision Xamax::precision_ = Precision::kDouble; template <> const Precision Xamax::precision_ = Precision::kComplexSingle; @@ -103,6 +104,7 @@ StatusCode Xamax::DoAmax(const size_t n, // ================================================================================================= // Compiles the templated class +template class Xamax; template class Xamax; template class Xamax; template class Xamax; diff --git a/src/routines/level1/xasum.cc b/src/routines/level1/xasum.cc index ea33d7e1..e04f7064 100644 --- a/src/routines/level1/xasum.cc +++ b/src/routines/level1/xasum.cc @@ -20,6 +20,7 @@ namespace clblast { // ================================================================================================= // Specific implementations to get the memory-type based on a template argument +template <> const Precision Xasum::precision_ = Precision::kHalf; template <> const Precision Xasum::precision_ = Precision::kSingle; template <> const Precision Xasum::precision_ = Precision::kDouble; template <> const Precision Xasum::precision_ = Precision::kComplexSingle; @@ -100,6 +101,7 @@ StatusCode Xasum::DoAsum(const size_t n, // ================================================================================================= // Compiles the templated class +template class Xasum; template class Xasum; template class Xasum; template class Xasum; diff --git a/src/routines/level1/xcopy.cc b/src/routines/level1/xcopy.cc index d34482ce..ff8f5999 100644 --- a/src/routines/level1/xcopy.cc +++ b/src/routines/level1/xcopy.cc @@ -20,6 +20,7 @@ namespace clblast { // ================================================================================================= // Specific implementations to get the memory-type based on a template argument +template <> const Precision Xcopy::precision_ = Precision::kHalf; template <> const Precision Xcopy::precision_ = Precision::kSingle; template <> const Precision Xcopy::precision_ = Precision::kDouble; template <> const Precision Xcopy::precision_ = Precision::kComplexSingle; @@ -105,6 +106,7 @@ StatusCode Xcopy::DoCopy(const size_t n, // ================================================================================================= // Compiles the templated class +template class Xcopy; template class Xcopy; template class Xcopy; template class Xcopy; diff --git a/src/routines/level1/xdot.cc b/src/routines/level1/xdot.cc index b2513485..db6a369e 100644 --- a/src/routines/level1/xdot.cc +++ b/src/routines/level1/xdot.cc @@ -20,6 +20,7 @@ namespace clblast { // ================================================================================================= // Specific implementations to get the memory-type based on a template argument +template <> const Precision Xdot::precision_ = Precision::kHalf; template <> const Precision Xdot::precision_ = Precision::kSingle; template <> const Precision Xdot::precision_ = Precision::kDouble; template <> const Precision Xdot::precision_ = Precision::kComplexSingle; @@ -108,6 +109,7 @@ StatusCode Xdot::DoDot(const size_t n, // ================================================================================================= // Compiles the templated class +template class Xdot; template class Xdot; template class Xdot; template class Xdot; diff --git a/src/routines/level1/xnrm2.cc b/src/routines/level1/xnrm2.cc index 86166a0c..14f7f6aa 100644 --- a/src/routines/level1/xnrm2.cc +++ b/src/routines/level1/xnrm2.cc @@ -20,6 +20,7 @@ namespace clblast { // ================================================================================================= // Specific implementations to get the memory-type based on a template argument +template <> const Precision Xnrm2::precision_ = Precision::kHalf; template <> const Precision Xnrm2::precision_ = Precision::kSingle; template <> const Precision Xnrm2::precision_ = Precision::kDouble; template <> const Precision Xnrm2::precision_ = Precision::kComplexSingle; @@ -100,6 +101,7 @@ StatusCode Xnrm2::DoNrm2(const size_t n, // ================================================================================================= // Compiles the templated class +template class Xnrm2; template class Xnrm2; template class Xnrm2; template class Xnrm2; diff --git a/src/routines/level1/xscal.cc b/src/routines/level1/xscal.cc index b92e2cdf..1207acfa 100644 --- a/src/routines/level1/xscal.cc +++ b/src/routines/level1/xscal.cc @@ -20,6 +20,7 @@ namespace clblast { // ================================================================================================= // Specific implementations to get the memory-type based on a template argument +template <> const Precision Xscal::precision_ = Precision::kHalf; template <> const Precision Xscal::precision_ = Precision::kSingle; template <> const Precision Xscal::precision_ = Precision::kDouble; template <> const Precision Xscal::precision_ = Precision::kComplexSingle; @@ -99,6 +100,7 @@ StatusCode Xscal::DoScal(const size_t n, const T alpha, // ================================================================================================= // Compiles the templated class +template class Xscal; template class Xscal; template class Xscal; template class Xscal; diff --git a/src/routines/level1/xswap.cc b/src/routines/level1/xswap.cc index bfc4a739..8844abff 100644 --- a/src/routines/level1/xswap.cc +++ b/src/routines/level1/xswap.cc @@ -20,6 +20,7 @@ namespace clblast { // ================================================================================================= // Specific implementations to get the memory-type based on a template argument +template <> const Precision Xswap::precision_ = Precision::kHalf; template <> const Precision Xswap::precision_ = Precision::kSingle; template <> const Precision Xswap::precision_ = Precision::kDouble; template <> const Precision Xswap::precision_ = Precision::kComplexSingle; @@ -105,6 +106,7 @@ StatusCode Xswap::DoSwap(const size_t n, // ================================================================================================= // Compiles the templated class +template class Xswap; template class Xswap; template class Xswap; template class Xswap; diff --git a/src/tuning/xdot.cc b/src/tuning/xdot.cc index cff656c3..5f30296c 100644 --- a/src/tuning/xdot.cc +++ b/src/tuning/xdot.cc @@ -119,7 +119,7 @@ using double2 = clblast::double2; template void StartVariation(int argc, char *argv[]) { switch(clblast::GetPrecision(argc, argv)) { - case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode"); + case clblast::Precision::kHalf: clblast::Tuner, half>(argc, argv); break; case clblast::Precision::kSingle: clblast::Tuner, float>(argc, argv); break; case clblast::Precision::kDouble: clblast::Tuner, double>(argc, argv); break; case clblast::Precision::kComplexSingle: clblast::Tuner, float2>(argc, argv); break; -- cgit v1.2.3