summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-05-22 16:59:14 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-05-22 16:59:14 +0200
commit3e9a07f00ad62c9d0e27c385249ed2b510acceff (patch)
treed82ac3f95170daecccf47fd49b48f458efb3a351 /src
parentf0cb3fdc81031625370d58da77f7cbe73fc130a7 (diff)
Added level-2 half-precision routines HGER/HSYR/HSPR/HSYR2/HSPR2
Diffstat (limited to 'src')
-rw-r--r--src/clblast.cc43
-rw-r--r--src/clblast_c.cc80
-rw-r--r--src/routines/level2/xger.cc2
-rw-r--r--src/routines/level2/xher.cc3
-rw-r--r--src/routines/level2/xher2.cc2
-rw-r--r--src/routines/level2/xspr.cc1
-rw-r--r--src/routines/level2/xspr2.cc1
-rw-r--r--src/routines/level2/xsyr.cc1
-rw-r--r--src/routines/level2/xsyr2.cc1
9 files changed, 129 insertions, 5 deletions
diff --git a/src/clblast.cc b/src/clblast.cc
index e89b41e8..449c7321 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -1207,7 +1207,7 @@ template StatusCode PUBLIC_API Tpsv<double2>(const Layout, const Triangle, const
cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
-// General rank-1 matrix update: SGER/DGER
+// General rank-1 matrix update: SGER/DGER/HGER
template <typename T>
StatusCode Ger(const Layout layout,
const size_t m, const size_t n,
@@ -1241,6 +1241,13 @@ template StatusCode PUBLIC_API Ger<double>(const Layout,
const cl_mem, const size_t, const size_t,
cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Ger<half>(const Layout,
+ const size_t, const size_t,
+ const half,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
// General rank-1 complex matrix update: CGERU/ZGERU
template <typename T>
@@ -1444,7 +1451,7 @@ template StatusCode PUBLIC_API Hpr2<double2>(const Layout, const Triangle,
cl_mem, const size_t,
cl_command_queue*, cl_event*);
-// Symmetric rank-1 matrix update: SSYR/DSYR
+// Symmetric rank-1 matrix update: SSYR/DSYR/HSYR
template <typename T>
StatusCode Syr(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1474,8 +1481,14 @@ template StatusCode PUBLIC_API Syr<double>(const Layout, const Triangle,
const cl_mem, const size_t, const size_t,
cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Syr<half>(const Layout, const Triangle,
+ const size_t,
+ const half,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
-// Symmetric packed rank-1 matrix update: SSPR/DSPR
+// Symmetric packed rank-1 matrix update: SSPR/DSPR/HSPR
template <typename T>
StatusCode Spr(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1505,8 +1518,14 @@ template StatusCode PUBLIC_API Spr<double>(const Layout, const Triangle,
const cl_mem, const size_t, const size_t,
cl_mem, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Spr<half>(const Layout, const Triangle,
+ const size_t,
+ const half,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t,
+ cl_command_queue*, cl_event*);
-// Symmetric rank-2 matrix update: SSYR2/DSYR2
+// Symmetric rank-2 matrix update: SSYR2/DSYR2/HSYR2
template <typename T>
StatusCode Syr2(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1540,8 +1559,15 @@ template StatusCode PUBLIC_API Syr2<double>(const Layout, const Triangle,
const cl_mem, const size_t, const size_t,
cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Syr2<half>(const Layout, const Triangle,
+ const size_t,
+ const half,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
-// Symmetric packed rank-2 matrix update: SSPR2/DSPR2
+// Symmetric packed rank-2 matrix update: SSPR2/DSPR2/HSPR2
template <typename T>
StatusCode Spr2(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1575,6 +1601,13 @@ template StatusCode PUBLIC_API Spr2<double>(const Layout, const Triangle,
const cl_mem, const size_t, const size_t,
cl_mem, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Spr2<half>(const Layout, const Triangle,
+ const size_t,
+ const half,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t,
+ cl_command_queue*, cl_event*);
// =================================================================================================
// BLAS level-3 (matrix-matrix) routines
diff --git a/src/clblast_c.cc b/src/clblast_c.cc
index f1a81be5..c368a03c 100644
--- a/src/clblast_c.cc
+++ b/src/clblast_c.cc
@@ -1702,6 +1702,22 @@ StatusCode CLBlastDger(const Layout layout,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastHger(const Layout layout,
+ const size_t m, const size_t n,
+ const cl_half alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Ger(static_cast<clblast::Layout>(layout),
+ m, n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// GERU
StatusCode CLBlastCgeru(const Layout layout,
@@ -1938,6 +1954,21 @@ StatusCode CLBlastDsyr(const Layout layout, const Triangle triangle,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastHsyr(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const cl_half alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Syr(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// SPR
StatusCode CLBlastSspr(const Layout layout, const Triangle triangle,
@@ -1970,6 +2001,21 @@ StatusCode CLBlastDspr(const Layout layout, const Triangle triangle,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastHspr(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const cl_half alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem ap_buffer, const size_t ap_offset,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Spr(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ ap_buffer, ap_offset,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// SYR2
StatusCode CLBlastSsyr2(const Layout layout, const Triangle triangle,
@@ -2006,6 +2052,23 @@ StatusCode CLBlastDsyr2(const Layout layout, const Triangle triangle,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastHsyr2(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const cl_half alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Syr2(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// SPR2
StatusCode CLBlastSspr2(const Layout layout, const Triangle triangle,
@@ -2042,6 +2105,23 @@ StatusCode CLBlastDspr2(const Layout layout, const Triangle triangle,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastHspr2(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const cl_half alpha,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_mem ap_buffer, const size_t ap_offset,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Spr2(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ ap_buffer, ap_offset,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// =================================================================================================
// BLAS level-3 (matrix-matrix) routines
diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc
index 47d7abe2..d1f98990 100644
--- a/src/routines/level2/xger.cc
+++ b/src/routines/level2/xger.cc
@@ -20,6 +20,7 @@ namespace clblast {
// =================================================================================================
// Specific implementations to get the memory-type based on a template argument
+template <> const Precision Xger<half>::precision_ = Precision::kHalf;
template <> const Precision Xger<float>::precision_ = Precision::kSingle;
template <> const Precision Xger<double>::precision_ = Precision::kDouble;
template <> const Precision Xger<float2>::precision_ = Precision::kComplexSingle;
@@ -104,6 +105,7 @@ StatusCode Xger<T>::DoGer(const Layout layout,
// =================================================================================================
// Compiles the templated class
+template class Xger<half>;
template class Xger<float>;
template class Xger<double>;
template class Xger<float2>;
diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cc
index 852e3f15..73e7a47d 100644
--- a/src/routines/level2/xher.cc
+++ b/src/routines/level2/xher.cc
@@ -19,6 +19,7 @@ namespace clblast {
// =================================================================================================
// Specific implementations to get the memory-type based on a template argument
+template <> const Precision Xher<half, half>::precision_ = Precision::kHalf;
template <> const Precision Xher<float, float>::precision_ = Precision::kSingle;
template <> const Precision Xher<double, double>::precision_ = Precision::kDouble;
template <> const Precision Xher<float2, float>::precision_ = Precision::kComplexSingle;
@@ -43,6 +44,7 @@ template <> float2 Xher<float2,float>::GetAlpha(const float alpha) { return floa
template <> double2 Xher<double2,double>::GetAlpha(const double alpha) { return double2{alpha, 0.0}; }
template <> float Xher<float,float>::GetAlpha(const float alpha) { return alpha; }
template <> double Xher<double,double>::GetAlpha(const double alpha) { return alpha; }
+template <> half Xher<half,half>::GetAlpha(const half alpha) { return alpha; }
// =================================================================================================
@@ -114,6 +116,7 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xher<half, half>;
template class Xher<float, float>;
template class Xher<double, double>;
template class Xher<float2, float>;
diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cc
index 82052187..a73dde52 100644
--- a/src/routines/level2/xher2.cc
+++ b/src/routines/level2/xher2.cc
@@ -19,6 +19,7 @@ namespace clblast {
// =================================================================================================
// Specific implementations to get the memory-type based on a template argument
+template <> const Precision Xher2<half>::precision_ = Precision::kHalf;
template <> const Precision Xher2<float>::precision_ = Precision::kSingle;
template <> const Precision Xher2<double>::precision_ = Precision::kDouble;
template <> const Precision Xher2<float2>::precision_ = Precision::kComplexSingle;
@@ -106,6 +107,7 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xher2<half>;
template class Xher2<float>;
template class Xher2<double>;
template class Xher2<float2>;
diff --git a/src/routines/level2/xspr.cc b/src/routines/level2/xspr.cc
index 55af2f29..c556b920 100644
--- a/src/routines/level2/xspr.cc
+++ b/src/routines/level2/xspr.cc
@@ -44,6 +44,7 @@ StatusCode Xspr<T>::DoSpr(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xspr<half>;
template class Xspr<float>;
template class Xspr<double>;
diff --git a/src/routines/level2/xspr2.cc b/src/routines/level2/xspr2.cc
index 9a3f97ce..c4ad5dc4 100644
--- a/src/routines/level2/xspr2.cc
+++ b/src/routines/level2/xspr2.cc
@@ -46,6 +46,7 @@ StatusCode Xspr2<T>::DoSpr2(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xspr2<half>;
template class Xspr2<float>;
template class Xspr2<double>;
diff --git a/src/routines/level2/xsyr.cc b/src/routines/level2/xsyr.cc
index 4b3928e5..892517d7 100644
--- a/src/routines/level2/xsyr.cc
+++ b/src/routines/level2/xsyr.cc
@@ -43,6 +43,7 @@ StatusCode Xsyr<T>::DoSyr(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xsyr<half>;
template class Xsyr<float>;
template class Xsyr<double>;
diff --git a/src/routines/level2/xsyr2.cc b/src/routines/level2/xsyr2.cc
index 3ae389e0..e6dfd158 100644
--- a/src/routines/level2/xsyr2.cc
+++ b/src/routines/level2/xsyr2.cc
@@ -45,6 +45,7 @@ StatusCode Xsyr2<T>::DoSyr2(const Layout layout, const Triangle triangle,
// =================================================================================================
// Compiles the templated class
+template class Xsyr2<half>;
template class Xsyr2<float>;
template class Xsyr2<double>;