summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-05-22 14:47:14 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-05-22 14:47:14 +0200
commit803aaf3070a6b04095b29100e628a4308bb9dcf7 (patch)
tree20964ed41147f185348ff2a0ed1699b0ab6bb967 /src
parent3c9e63c0549870c6a1a8d019ec7cc2abee61601e (diff)
Added level-1 half-precision routines HSWAP/HSCAL/HCOPY/HAXPY/HDOT/HNRM2/HASUM/HSUM/iHAMAX/iHMAX/iHMIN
Diffstat (limited to 'src')
-rw-r--r--src/clblast.cc61
-rw-r--r--src/clblast_c.cc102
2 files changed, 153 insertions, 10 deletions
diff --git a/src/clblast.cc b/src/clblast.cc
index c18dc0a9..098ff7f3 100644
--- a/src/clblast.cc
+++ b/src/clblast.cc
@@ -160,7 +160,7 @@ template StatusCode PUBLIC_API Rotm<double>(const size_t,
cl_mem, const size_t,
cl_command_queue*, cl_event*);
-// Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP
+// Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP/HSWAP
template <typename T>
StatusCode Swap(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
@@ -190,8 +190,12 @@ template StatusCode PUBLIC_API Swap<double2>(const size_t,
cl_mem, const size_t, const size_t,
cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Swap<half>(const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
-// Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL
+// Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL/HSCAL
template <typename T>
StatusCode Scal(const size_t n,
const T alpha,
@@ -221,8 +225,12 @@ template StatusCode PUBLIC_API Scal<double2>(const size_t,
const double2,
cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Scal<half>(const size_t,
+ const half,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
-// Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY
+// Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY/HCOPY
template <typename T>
StatusCode Copy(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
@@ -252,6 +260,10 @@ template StatusCode PUBLIC_API Copy<double2>(const size_t,
const cl_mem, const size_t, const size_t,
cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Copy<half>(const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
// Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY/HAXPY
template <typename T>
@@ -295,7 +307,7 @@ template StatusCode PUBLIC_API Axpy<half>(const size_t,
cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
-// Dot product of two vectors: SDOT/DDOT
+// Dot product of two vectors: SDOT/DDOT/HDOT
template <typename T>
StatusCode Dot(const size_t n,
cl_mem dot_buffer, const size_t dot_offset,
@@ -321,6 +333,11 @@ template StatusCode PUBLIC_API Dot<double>(const size_t,
const cl_mem, const size_t, const size_t,
const cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Dot<half>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
// Dot product of two complex vectors: CDOTU/ZDOTU
template <typename T>
@@ -376,7 +393,7 @@ template StatusCode PUBLIC_API Dotc<double2>(const size_t,
const cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
-// Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2
+// Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2/HNRM2
template <typename T>
StatusCode Nrm2(const size_t n,
cl_mem nrm2_buffer, const size_t nrm2_offset,
@@ -406,8 +423,12 @@ template StatusCode PUBLIC_API Nrm2<double2>(const size_t,
cl_mem, const size_t,
const cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Nrm2<half>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
-// Absolute sum of values in a vector: SASUM/DASUM/ScASUM/DzASUM
+// Absolute sum of values in a vector: SASUM/DASUM/ScASUM/DzASUM/HASUM
template <typename T>
StatusCode Asum(const size_t n,
cl_mem asum_buffer, const size_t asum_offset,
@@ -437,8 +458,12 @@ template StatusCode PUBLIC_API Asum<double2>(const size_t,
cl_mem, const size_t,
const cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Asum<half>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
-// Sum of values in a vector (non-BLAS function): SSUM/DSUM/ScSUM/DzSUM
+// Sum of values in a vector (non-BLAS function): SSUM/DSUM/ScSUM/DzSUM/HSUM
template <typename T>
StatusCode Sum(const size_t n,
cl_mem sum_buffer, const size_t sum_offset,
@@ -468,8 +493,12 @@ template StatusCode PUBLIC_API Sum<double2>(const size_t,
cl_mem, const size_t,
const cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Sum<half>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
-// Index of absolute maximum value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX
+// Index of absolute maximum value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX
template <typename T>
StatusCode Amax(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
@@ -499,8 +528,12 @@ template StatusCode PUBLIC_API Amax<double2>(const size_t,
cl_mem, const size_t,
const cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Amax<half>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
-// Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX
+// Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX/iHMAX
template <typename T>
StatusCode Max(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
@@ -530,8 +563,12 @@ template StatusCode PUBLIC_API Max<double2>(const size_t,
cl_mem, const size_t,
const cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Max<half>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
-// Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN
+// Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN/iHMIN
template <typename T>
StatusCode Min(const size_t n,
cl_mem imin_buffer, const size_t imin_offset,
@@ -561,6 +598,10 @@ template StatusCode PUBLIC_API Min<double2>(const size_t,
cl_mem, const size_t,
const cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API Min<half>(const size_t,
+ cl_mem, const size_t,
+ const cl_mem, const size_t, const size_t,
+ cl_command_queue*, cl_event*);
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
diff --git a/src/clblast_c.cc b/src/clblast_c.cc
index 7642a1e4..fd37462f 100644
--- a/src/clblast_c.cc
+++ b/src/clblast_c.cc
@@ -178,6 +178,16 @@ StatusCode CLBlastZswap(const size_t n,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastHswap(const size_t n,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Swap<half>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// SCAL
StatusCode CLBlastSscal(const size_t n,
@@ -220,6 +230,16 @@ StatusCode CLBlastZscal(const size_t n,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastHscal(const size_t n,
+ const cl_half alpha,
+ cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Scal(n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// COPY
StatusCode CLBlastScopy(const size_t n,
@@ -262,6 +282,16 @@ StatusCode CLBlastZcopy(const size_t n,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastHcopy(const size_t n,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Copy<half>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// AXPY
StatusCode CLBlastSaxpy(const size_t n,
@@ -350,6 +380,18 @@ StatusCode CLBlastDdot(const size_t n,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastHdot(const size_t n,
+ cl_mem dot_buffer, const size_t dot_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Dot<half>(n,
+ dot_buffer, dot_offset,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// DOTU
StatusCode CLBlastCdotu(const size_t n,
@@ -444,6 +486,16 @@ StatusCode CLBlastDznrm2(const size_t n,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastHnrm2(const size_t n,
+ cl_mem nrm2_buffer, const size_t nrm2_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Nrm2<half>(n,
+ nrm2_buffer, nrm2_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// ASUM
StatusCode CLBlastSasum(const size_t n,
@@ -486,6 +538,16 @@ StatusCode CLBlastDzasum(const size_t n,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastHasum(const size_t n,
+ cl_mem asum_buffer, const size_t asum_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Asum<half>(n,
+ asum_buffer, asum_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// SUM
StatusCode CLBlastSsum(const size_t n,
@@ -528,6 +590,16 @@ StatusCode CLBlastDzsum(const size_t n,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastHsum(const size_t n,
+ cl_mem sum_buffer, const size_t sum_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Sum<half>(n,
+ sum_buffer, sum_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// AMAX
StatusCode CLBlastiSamax(const size_t n,
@@ -570,6 +642,16 @@ StatusCode CLBlastiZamax(const size_t n,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastiHamax(const size_t n,
+ cl_mem imax_buffer, const size_t imax_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Amax<half>(n,
+ imax_buffer, imax_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// MAX
StatusCode CLBlastiSmax(const size_t n,
@@ -612,6 +694,16 @@ StatusCode CLBlastiZmax(const size_t n,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastiHmax(const size_t n,
+ cl_mem imax_buffer, const size_t imax_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Max<half>(n,
+ imax_buffer, imax_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// MIN
StatusCode CLBlastiSmin(const size_t n,
@@ -654,6 +746,16 @@ StatusCode CLBlastiZmin(const size_t n,
queue, event);
return static_cast<StatusCode>(status);
}
+StatusCode CLBlastiHmin(const size_t n,
+ cl_mem imin_buffer, const size_t imin_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event) {
+ auto status = clblast::Min<half>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event);
+ return static_cast<StatusCode>(status);
+}
// =================================================================================================
// BLAS level-2 (matrix-vector) routines