diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2016-05-22 14:47:14 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2016-05-22 14:47:14 +0200 |
commit | 803aaf3070a6b04095b29100e628a4308bb9dcf7 (patch) | |
tree | 20964ed41147f185348ff2a0ed1699b0ab6bb967 /src | |
parent | 3c9e63c0549870c6a1a8d019ec7cc2abee61601e (diff) |
Added level-1 half-precision routines HSWAP/HSCAL/HCOPY/HAXPY/HDOT/HNRM2/HASUM/HSUM/iHAMAX/iHMAX/iHMIN
Diffstat (limited to 'src')
-rw-r--r-- | src/clblast.cc | 61 | ||||
-rw-r--r-- | src/clblast_c.cc | 102 |
2 files changed, 153 insertions, 10 deletions
diff --git a/src/clblast.cc b/src/clblast.cc index c18dc0a9..098ff7f3 100644 --- a/src/clblast.cc +++ b/src/clblast.cc @@ -160,7 +160,7 @@ template StatusCode PUBLIC_API Rotm<double>(const size_t, cl_mem, const size_t, cl_command_queue*, cl_event*); -// Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP +// Swap two vectors: SSWAP/DSWAP/CSWAP/ZSWAP/HSWAP template <typename T> StatusCode Swap(const size_t n, cl_mem x_buffer, const size_t x_offset, const size_t x_inc, @@ -190,8 +190,12 @@ template StatusCode PUBLIC_API Swap<double2>(const size_t, cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Swap<half>(const size_t, + cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); -// Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL +// Vector scaling: SSCAL/DSCAL/CSCAL/ZSCAL/HSCAL template <typename T> StatusCode Scal(const size_t n, const T alpha, @@ -221,8 +225,12 @@ template StatusCode PUBLIC_API Scal<double2>(const size_t, const double2, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Scal<half>(const size_t, + const half, + cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); -// Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY +// Vector copy: SCOPY/DCOPY/CCOPY/ZCOPY/HCOPY template <typename T> StatusCode Copy(const size_t n, const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, @@ -252,6 +260,10 @@ template StatusCode PUBLIC_API Copy<double2>(const size_t, const cl_mem, const size_t, const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Copy<half>(const size_t, + const cl_mem, const size_t, const size_t, + cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); // Vector-times-constant plus vector: SAXPY/DAXPY/CAXPY/ZAXPY/HAXPY template <typename T> @@ -295,7 +307,7 @@ template StatusCode PUBLIC_API Axpy<half>(const size_t, cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); -// Dot product of two vectors: SDOT/DDOT +// Dot product of two vectors: SDOT/DDOT/HDOT template <typename T> StatusCode Dot(const size_t n, cl_mem dot_buffer, const size_t dot_offset, @@ -321,6 +333,11 @@ template StatusCode PUBLIC_API Dot<double>(const size_t, const cl_mem, const size_t, const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Dot<half>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); // Dot product of two complex vectors: CDOTU/ZDOTU template <typename T> @@ -376,7 +393,7 @@ template StatusCode PUBLIC_API Dotc<double2>(const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); -// Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2 +// Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2/HNRM2 template <typename T> StatusCode Nrm2(const size_t n, cl_mem nrm2_buffer, const size_t nrm2_offset, @@ -406,8 +423,12 @@ template StatusCode PUBLIC_API Nrm2<double2>(const size_t, cl_mem, const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Nrm2<half>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); -// Absolute sum of values in a vector: SASUM/DASUM/ScASUM/DzASUM +// Absolute sum of values in a vector: SASUM/DASUM/ScASUM/DzASUM/HASUM template <typename T> StatusCode Asum(const size_t n, cl_mem asum_buffer, const size_t asum_offset, @@ -437,8 +458,12 @@ template StatusCode PUBLIC_API Asum<double2>(const size_t, cl_mem, const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Asum<half>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); -// Sum of values in a vector (non-BLAS function): SSUM/DSUM/ScSUM/DzSUM +// Sum of values in a vector (non-BLAS function): SSUM/DSUM/ScSUM/DzSUM/HSUM template <typename T> StatusCode Sum(const size_t n, cl_mem sum_buffer, const size_t sum_offset, @@ -468,8 +493,12 @@ template StatusCode PUBLIC_API Sum<double2>(const size_t, cl_mem, const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Sum<half>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); -// Index of absolute maximum value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX +// Index of absolute maximum value in a vector: iSAMAX/iDAMAX/iCAMAX/iZAMAX/iHAMAX template <typename T> StatusCode Amax(const size_t n, cl_mem imax_buffer, const size_t imax_offset, @@ -499,8 +528,12 @@ template StatusCode PUBLIC_API Amax<double2>(const size_t, cl_mem, const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Amax<half>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); -// Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX +// Index of maximum value in a vector (non-BLAS function): iSMAX/iDMAX/iCMAX/iZMAX/iHMAX template <typename T> StatusCode Max(const size_t n, cl_mem imax_buffer, const size_t imax_offset, @@ -530,8 +563,12 @@ template StatusCode PUBLIC_API Max<double2>(const size_t, cl_mem, const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Max<half>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); -// Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN +// Index of minimum value in a vector (non-BLAS function): iSMIN/iDMIN/iCMIN/iZMIN/iHMIN template <typename T> StatusCode Min(const size_t n, cl_mem imin_buffer, const size_t imin_offset, @@ -561,6 +598,10 @@ template StatusCode PUBLIC_API Min<double2>(const size_t, cl_mem, const size_t, const cl_mem, const size_t, const size_t, cl_command_queue*, cl_event*); +template StatusCode PUBLIC_API Min<half>(const size_t, + cl_mem, const size_t, + const cl_mem, const size_t, const size_t, + cl_command_queue*, cl_event*); // ================================================================================================= // BLAS level-2 (matrix-vector) routines diff --git a/src/clblast_c.cc b/src/clblast_c.cc index 7642a1e4..fd37462f 100644 --- a/src/clblast_c.cc +++ b/src/clblast_c.cc @@ -178,6 +178,16 @@ StatusCode CLBlastZswap(const size_t n, queue, event); return static_cast<StatusCode>(status); } +StatusCode CLBlastHswap(const size_t n, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Swap<half>(n, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + queue, event); + return static_cast<StatusCode>(status); +} // SCAL StatusCode CLBlastSscal(const size_t n, @@ -220,6 +230,16 @@ StatusCode CLBlastZscal(const size_t n, queue, event); return static_cast<StatusCode>(status); } +StatusCode CLBlastHscal(const size_t n, + const cl_half alpha, + cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Scal(n, + alpha, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} // COPY StatusCode CLBlastScopy(const size_t n, @@ -262,6 +282,16 @@ StatusCode CLBlastZcopy(const size_t n, queue, event); return static_cast<StatusCode>(status); } +StatusCode CLBlastHcopy(const size_t n, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Copy<half>(n, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + queue, event); + return static_cast<StatusCode>(status); +} // AXPY StatusCode CLBlastSaxpy(const size_t n, @@ -350,6 +380,18 @@ StatusCode CLBlastDdot(const size_t n, queue, event); return static_cast<StatusCode>(status); } +StatusCode CLBlastHdot(const size_t n, + cl_mem dot_buffer, const size_t dot_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + const cl_mem y_buffer, const size_t y_offset, const size_t y_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Dot<half>(n, + dot_buffer, dot_offset, + x_buffer, x_offset, x_inc, + y_buffer, y_offset, y_inc, + queue, event); + return static_cast<StatusCode>(status); +} // DOTU StatusCode CLBlastCdotu(const size_t n, @@ -444,6 +486,16 @@ StatusCode CLBlastDznrm2(const size_t n, queue, event); return static_cast<StatusCode>(status); } +StatusCode CLBlastHnrm2(const size_t n, + cl_mem nrm2_buffer, const size_t nrm2_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Nrm2<half>(n, + nrm2_buffer, nrm2_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} // ASUM StatusCode CLBlastSasum(const size_t n, @@ -486,6 +538,16 @@ StatusCode CLBlastDzasum(const size_t n, queue, event); return static_cast<StatusCode>(status); } +StatusCode CLBlastHasum(const size_t n, + cl_mem asum_buffer, const size_t asum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Asum<half>(n, + asum_buffer, asum_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} // SUM StatusCode CLBlastSsum(const size_t n, @@ -528,6 +590,16 @@ StatusCode CLBlastDzsum(const size_t n, queue, event); return static_cast<StatusCode>(status); } +StatusCode CLBlastHsum(const size_t n, + cl_mem sum_buffer, const size_t sum_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Sum<half>(n, + sum_buffer, sum_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} // AMAX StatusCode CLBlastiSamax(const size_t n, @@ -570,6 +642,16 @@ StatusCode CLBlastiZamax(const size_t n, queue, event); return static_cast<StatusCode>(status); } +StatusCode CLBlastiHamax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Amax<half>(n, + imax_buffer, imax_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} // MAX StatusCode CLBlastiSmax(const size_t n, @@ -612,6 +694,16 @@ StatusCode CLBlastiZmax(const size_t n, queue, event); return static_cast<StatusCode>(status); } +StatusCode CLBlastiHmax(const size_t n, + cl_mem imax_buffer, const size_t imax_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Max<half>(n, + imax_buffer, imax_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} // MIN StatusCode CLBlastiSmin(const size_t n, @@ -654,6 +746,16 @@ StatusCode CLBlastiZmin(const size_t n, queue, event); return static_cast<StatusCode>(status); } +StatusCode CLBlastiHmin(const size_t n, + cl_mem imin_buffer, const size_t imin_offset, + const cl_mem x_buffer, const size_t x_offset, const size_t x_inc, + cl_command_queue* queue, cl_event* event) { + auto status = clblast::Min<half>(n, + imin_buffer, imin_offset, + x_buffer, x_offset, x_inc, + queue, event); + return static_cast<StatusCode>(status); +} // ================================================================================================= // BLAS level-2 (matrix-vector) routines |