From f70ded34f3000dee742fe23631f3066c8f486dfd Mon Sep 17 00:00:00 2001
From: Cedric Nugteren <web@cedricnugteren.nl>
Date: Sun, 22 May 2016 14:26:19 +0200
Subject: Added half-precision support for all level 1 routines

---
 src/routines/level1/xamax.cc | 2 ++
 src/routines/level1/xasum.cc | 2 ++
 src/routines/level1/xcopy.cc | 2 ++
 src/routines/level1/xdot.cc  | 2 ++
 src/routines/level1/xnrm2.cc | 2 ++
 src/routines/level1/xscal.cc | 2 ++
 src/routines/level1/xswap.cc | 2 ++
 src/tuning/xdot.cc           | 2 +-
 8 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'src')
diff --git a/src/routines/level1/xamax.cc b/src/routines/level1/xamax.cc
index 682e2b63..335e59bc 100644
--- a/src/routines/level1/xamax.cc
+++ b/src/routines/level1/xamax.cc
@@ -20,6 +20,7 @@ namespace clblast {
 // =================================================================================================
 
 // Specific implementations to get the memory-type based on a template argument
+template <> const Precision Xamax<half>::precision_ = Precision::kHalf;
 template <> const Precision Xamax<float>::precision_ = Precision::kSingle;
 template <> const Precision Xamax<double>::precision_ = Precision::kDouble;
 template <> const Precision Xamax<float2>::precision_ = Precision::kComplexSingle;
@@ -103,6 +104,7 @@ StatusCode Xamax<T>::DoAmax(const size_t n,
 // =================================================================================================
 
 // Compiles the templated class
+template class Xamax<half>;
 template class Xamax<float>;
 template class Xamax<double>;
 template class Xamax<float2>;
diff --git a/src/routines/level1/xasum.cc b/src/routines/level1/xasum.cc
index ea33d7e1..e04f7064 100644
--- a/src/routines/level1/xasum.cc
+++ b/src/routines/level1/xasum.cc
@@ -20,6 +20,7 @@ namespace clblast {
 // =================================================================================================
 
 // Specific implementations to get the memory-type based on a template argument
+template <> const Precision Xasum<half>::precision_ = Precision::kHalf;
 template <> const Precision Xasum<float>::precision_ = Precision::kSingle;
 template <> const Precision Xasum<double>::precision_ = Precision::kDouble;
 template <> const Precision Xasum<float2>::precision_ = Precision::kComplexSingle;
@@ -100,6 +101,7 @@ StatusCode Xasum<T>::DoAsum(const size_t n,
 // =================================================================================================
 
 // Compiles the templated class
+template class Xasum<half>;
 template class Xasum<float>;
 template class Xasum<double>;
 template class Xasum<float2>;
diff --git a/src/routines/level1/xcopy.cc b/src/routines/level1/xcopy.cc
index d34482ce..ff8f5999 100644
--- a/src/routines/level1/xcopy.cc
+++ b/src/routines/level1/xcopy.cc
@@ -20,6 +20,7 @@ namespace clblast {
 // =================================================================================================
 
 // Specific implementations to get the memory-type based on a template argument
+template <> const Precision Xcopy<half>::precision_ = Precision::kHalf;
 template <> const Precision Xcopy<float>::precision_ = Precision::kSingle;
 template <> const Precision Xcopy<double>::precision_ = Precision::kDouble;
 template <> const Precision Xcopy<float2>::precision_ = Precision::kComplexSingle;
@@ -105,6 +106,7 @@ StatusCode Xcopy<T>::DoCopy(const size_t n,
 // =================================================================================================
 
 // Compiles the templated class
+template class Xcopy<half>;
 template class Xcopy<float>;
 template class Xcopy<double>;
 template class Xcopy<float2>;
diff --git a/src/routines/level1/xdot.cc b/src/routines/level1/xdot.cc
index b2513485..db6a369e 100644
--- a/src/routines/level1/xdot.cc
+++ b/src/routines/level1/xdot.cc
@@ -20,6 +20,7 @@ namespace clblast {
 // =================================================================================================
 
 // Specific implementations to get the memory-type based on a template argument
+template <> const Precision Xdot<half>::precision_ = Precision::kHalf;
 template <> const Precision Xdot<float>::precision_ = Precision::kSingle;
 template <> const Precision Xdot<double>::precision_ = Precision::kDouble;
 template <> const Precision Xdot<float2>::precision_ = Precision::kComplexSingle;
@@ -108,6 +109,7 @@ StatusCode Xdot<T>::DoDot(const size_t n,
 // =================================================================================================
 
 // Compiles the templated class
+template class Xdot<half>;
 template class Xdot<float>;
 template class Xdot<double>;
 template class Xdot<float2>;
diff --git a/src/routines/level1/xnrm2.cc b/src/routines/level1/xnrm2.cc
index 86166a0c..14f7f6aa 100644
--- a/src/routines/level1/xnrm2.cc
+++ b/src/routines/level1/xnrm2.cc
@@ -20,6 +20,7 @@ namespace clblast {
 // =================================================================================================
 
 // Specific implementations to get the memory-type based on a template argument
+template <> const Precision Xnrm2<half>::precision_ = Precision::kHalf;
 template <> const Precision Xnrm2<float>::precision_ = Precision::kSingle;
 template <> const Precision Xnrm2<double>::precision_ = Precision::kDouble;
 template <> const Precision Xnrm2<float2>::precision_ = Precision::kComplexSingle;
@@ -100,6 +101,7 @@ StatusCode Xnrm2<T>::DoNrm2(const size_t n,
 // =================================================================================================
 
 // Compiles the templated class
+template class Xnrm2<half>;
 template class Xnrm2<float>;
 template class Xnrm2<double>;
 template class Xnrm2<float2>;
diff --git a/src/routines/level1/xscal.cc b/src/routines/level1/xscal.cc
index b92e2cdf..1207acfa 100644
--- a/src/routines/level1/xscal.cc
+++ b/src/routines/level1/xscal.cc
@@ -20,6 +20,7 @@ namespace clblast {
 // =================================================================================================
 
 // Specific implementations to get the memory-type based on a template argument
+template <> const Precision Xscal<half>::precision_ = Precision::kHalf;
 template <> const Precision Xscal<float>::precision_ = Precision::kSingle;
 template <> const Precision Xscal<double>::precision_ = Precision::kDouble;
 template <> const Precision Xscal<float2>::precision_ = Precision::kComplexSingle;
@@ -99,6 +100,7 @@ StatusCode Xscal<T>::DoScal(const size_t n, const T alpha,
 // =================================================================================================
 
 // Compiles the templated class
+template class Xscal<half>;
 template class Xscal<float>;
 template class Xscal<double>;
 template class Xscal<float2>;
diff --git a/src/routines/level1/xswap.cc b/src/routines/level1/xswap.cc
index bfc4a739..8844abff 100644
--- a/src/routines/level1/xswap.cc
+++ b/src/routines/level1/xswap.cc
@@ -20,6 +20,7 @@ namespace clblast {
 // =================================================================================================
 
 // Specific implementations to get the memory-type based on a template argument
+template <> const Precision Xswap<half>::precision_ = Precision::kHalf;
 template <> const Precision Xswap<float>::precision_ = Precision::kSingle;
 template <> const Precision Xswap<double>::precision_ = Precision::kDouble;
 template <> const Precision Xswap<float2>::precision_ = Precision::kComplexSingle;
@@ -105,6 +106,7 @@ StatusCode Xswap<T>::DoSwap(const size_t n,
 // =================================================================================================
 
 // Compiles the templated class
+template class Xswap<half>;
 template class Xswap<float>;
 template class Xswap<double>;
 template class Xswap<float2>;
diff --git a/src/tuning/xdot.cc b/src/tuning/xdot.cc
index cff656c3..5f30296c 100644
--- a/src/tuning/xdot.cc
+++ b/src/tuning/xdot.cc
@@ -119,7 +119,7 @@ using double2 = clblast::double2;
 template <int V>
 void StartVariation(int argc, char *argv[]) {
   switch(clblast::GetPrecision(argc, argv)) {
-    case clblast::Precision::kHalf: throw std::runtime_error("Unsupported precision mode");
+    case clblast::Precision::kHalf: clblast::Tuner<clblast::TuneXdot<half, V>, half>(argc, argv); break;
     case clblast::Precision::kSingle: clblast::Tuner<clblast::TuneXdot<float, V>, float>(argc, argv); break;
     case clblast::Precision::kDouble: clblast::Tuner<clblast::TuneXdot<double, V>, double>(argc, argv); break;
     case clblast::Precision::kComplexSingle: clblast::Tuner<clblast::TuneXdot<float2, V>, float2>(argc, argv); break;
-- 
cgit v1.2.3