diff options
47 files changed, 174 insertions, 236 deletions
diff --git a/include/clblast.h b/include/clblast.h index 31a07423..c8596b39 100644 --- a/include/clblast.h +++ b/include/clblast.h @@ -68,8 +68,8 @@ enum class StatusCode { kInvalidLocalMemUsage = -2046, // Not enough local memory available on this device kNoHalfPrecision = -2045, // Half precision (16-bits) not supported by the device kNoDoublePrecision = -2044, // Double precision (64-bits) not supported by the device - kInvalidVectorDot = -2043, // Vector dot is not a valid OpenCL buffer - kInsufficientMemoryDot = -2042, // Vector dot's OpenCL buffer is too small + kInvalidVectorScalar = -2043, // The unit-sized vector is not a valid OpenCL buffer + kInsufficientMemoryScalar = -2042, // The unit-sized vector's OpenCL buffer is too small }; // Matrix layout and transpose types diff --git a/include/clblast_c.h b/include/clblast_c.h index 3ac6d99c..b92febac 100644 --- a/include/clblast_c.h +++ b/include/clblast_c.h @@ -77,8 +77,8 @@ typedef enum StatusCode_ { kInvalidLocalMemUsage = -2046, // Not enough local memory available on this device kNoHalfPrecision = -2045, // Half precision (16-bits) not supported by the device kNoDoublePrecision = -2044, // Double precision (64-bits) not supported by the device - kInvalidVectorDot = -2043, // Vector dot is not a valid OpenCL buffer - kInsufficientMemoryDot = -2042, // Vector dot's OpenCL buffer is too small + kInvalidVectorScalar = -2043, // The unit-sized vector is not a valid OpenCL buffer + kInsufficientMemoryScalar = -2042, // The unit-sized vector's OpenCL buffer is too small } StatusCode; // Matrix layout and transpose types diff --git a/include/internal/buffer_test.h b/include/internal/buffer_test.h new file mode 100644 index 00000000..80f5243f --- /dev/null +++ b/include/internal/buffer_test.h @@ -0,0 +1,121 @@ + +// ================================================================================================= +// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This +// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- +// width of 100 characters per line. +// +// Author(s): +// Cedric Nugteren <www.cedricnugteren.nl> +// +// This file implements the tests for the OpenCL buffers (matrices and vectors). These tests are +// templated and thus header-only. +// +// ================================================================================================= + +#ifndef CLBLAST_BUFFER_TEST_H_ +#define CLBLAST_BUFFER_TEST_H_ + +#include "clblast.h" + +namespace clblast { +// ================================================================================================= + +// Tests matrix 'A' for validity +template <typename T> +StatusCode TestMatrixA(const size_t one, const size_t two, const Buffer<T> &buffer, + const size_t offset, const size_t ld) { + if (ld < one) { return StatusCode::kInvalidLeadDimA; } + try { + const auto required_size = (ld * (two - 1) + one + offset) * sizeof(T); + if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryA; } + } catch (...) { return StatusCode::kInvalidMatrixA; } + return StatusCode::kSuccess; +} + +// Tests matrix 'B' for validity +template <typename T> +StatusCode TestMatrixB(const size_t one, const size_t two, const Buffer<T> &buffer, + const size_t offset, const size_t ld) { + if (ld < one) { return StatusCode::kInvalidLeadDimB; } + try { + const auto required_size = (ld * (two - 1) + one + offset) * sizeof(T); + if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryB; } + } catch (...) { return StatusCode::kInvalidMatrixB; } + return StatusCode::kSuccess; +} + +// Tests matrix 'C' for validity +template <typename T> +StatusCode TestMatrixC(const size_t one, const size_t two, const Buffer<T> &buffer, + const size_t offset, const size_t ld) { + if (ld < one) { return StatusCode::kInvalidLeadDimC; } + try { + const auto required_size = (ld * (two - 1) + one + offset) * sizeof(T); + if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryC; } + } catch (...) { return StatusCode::kInvalidMatrixC; } + return StatusCode::kSuccess; +} + +// Tests matrix 'AP' for validity +template <typename T> +StatusCode TestMatrixAP(const size_t n, const Buffer<T> &buffer, const size_t offset) { + try { + const auto required_size = (((n * (n + 1)) / 2) + offset) * sizeof(T); + if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryA; } + } catch (...) { return StatusCode::kInvalidMatrixA; } + return StatusCode::kSuccess; +} + +// ================================================================================================= + +// Tests vector 'X' for validity +template <typename T> +StatusCode TestVectorX(const size_t n, const Buffer<T> &buffer, const size_t offset, + const size_t inc) { + if (inc == 0) { return StatusCode::kInvalidIncrementX; } + try { + const auto required_size = ((n - 1) * inc + 1 + offset) * sizeof(T); + if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryX; } + } catch (...) { return StatusCode::kInvalidVectorX; } + return StatusCode::kSuccess; +} + +// Tests vector 'Y' for validity +template <typename T> +StatusCode TestVectorY(const size_t n, const Buffer<T> &buffer, const size_t offset, + const size_t inc) { + if (inc == 0) { return StatusCode::kInvalidIncrementY; } + try { + const auto required_size = ((n - 1) * inc + 1 + offset) * sizeof(T); + if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryY; } + } catch (...) { return StatusCode::kInvalidVectorY; } + return StatusCode::kSuccess; +} + +// ================================================================================================= + +// Tests vector 'scalar' for validity +template <typename T> +StatusCode TestVectorScalar(const size_t n, const Buffer<T> &buffer, const size_t offset) { + try { + const auto required_size = (n + offset) * sizeof(T); + if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryScalar; } + } catch (...) { return StatusCode::kInvalidVectorScalar; } + return StatusCode::kSuccess; +} + +// Tests vector 'index' for validity +template <typename T> +StatusCode TestVectorIndex(const size_t n, const Buffer<T> &buffer, const size_t offset) { + try { + const auto required_size = (n + offset) * sizeof(T); + if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryScalar; } + } catch (...) { return StatusCode::kInvalidVectorScalar; } + return StatusCode::kSuccess; +} + +// ================================================================================================= +} // namespace clblast + +// CLBLAST_BUFFER_TEST_H_ +#endif diff --git a/include/internal/routine.h b/include/internal/routine.h index 35837575..0b53b82e 100644 --- a/include/internal/routine.h +++ b/include/internal/routine.h @@ -22,6 +22,7 @@ #include "internal/cache.h" #include "internal/utilities.h" #include "internal/database.h" +#include "internal/buffer_test.h" namespace clblast { // ================================================================================================= @@ -52,28 +53,6 @@ class Routine { StatusCode RunKernel(Kernel &kernel, std::vector<size_t> global, const std::vector<size_t> &local, EventPointer event); - // Tests for valid inputs of matrices A, B, and C - StatusCode TestMatrixA(const size_t one, const size_t two, const Buffer<T> &buffer, - const size_t offset, const size_t ld, const size_t data_size); - StatusCode TestMatrixB(const size_t one, const size_t two, const Buffer<T> &buffer, - const size_t offset, const size_t ld, const size_t data_size); - StatusCode TestMatrixC(const size_t one, const size_t two, const Buffer<T> &buffer, - const size_t offset, const size_t ld, const size_t data_size); - StatusCode TestMatrixAP(const size_t n, const Buffer<T> &buffer, - const size_t offset, const size_t data_size); - - // Tests for valid inputs of vector X and Y - StatusCode TestVectorX(const size_t n, const Buffer<T> &buffer, const size_t offset, - const size_t inc, const size_t data_size); - StatusCode TestVectorY(const size_t n, const Buffer<T> &buffer, const size_t offset, - const size_t inc, const size_t data_size); - - // Tests for valid inputs of other vectors - StatusCode TestVectorDot(const size_t n, const Buffer<T> &buffer, const size_t offset, - const size_t data_size); - StatusCode TestVectorIndex(const size_t n, const Buffer<unsigned int> &buffer, - const size_t offset, const size_t data_size); - // Copies/transposes a matrix and padds/unpads it with zeroes. This method is also able to write // to symmetric and triangular matrices through optional arguments. StatusCode PadCopyTransposeMatrix(EventPointer event, std::vector<Event>& waitForEvents, diff --git a/include/internal/routines/level1/xamax.h b/include/internal/routines/level1/xamax.h index c318115e..b44e0ceb 100644 --- a/include/internal/routines/level1/xamax.h +++ b/include/internal/routines/level1/xamax.h @@ -31,8 +31,6 @@ class Xamax: public Routine<T> { using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestVectorX; - using Routine<T>::TestVectorIndex; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level1/xasum.h b/include/internal/routines/level1/xasum.h index b6e5d2cd..8e22d76a 100644 --- a/include/internal/routines/level1/xasum.h +++ b/include/internal/routines/level1/xasum.h @@ -31,8 +31,6 @@ class Xasum: public Routine<T> { using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestVectorX; - using Routine<T>::TestVectorDot; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level1/xaxpy.h b/include/internal/routines/level1/xaxpy.h index 03771d53..da5b2b0f 100644 --- a/include/internal/routines/level1/xaxpy.h +++ b/include/internal/routines/level1/xaxpy.h @@ -31,8 +31,6 @@ class Xaxpy: public Routine<T> { using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestVectorX; - using Routine<T>::TestVectorY; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level1/xcopy.h b/include/internal/routines/level1/xcopy.h index 5786cb0f..08e63ce4 100644 --- a/include/internal/routines/level1/xcopy.h +++ b/include/internal/routines/level1/xcopy.h @@ -30,8 +30,6 @@ class Xcopy: public Routine<T> { using Routine<T>::queue_; using Routine<T>::event_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestVectorX; - using Routine<T>::TestVectorY; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level1/xdot.h b/include/internal/routines/level1/xdot.h index 95a7ad07..5c46e0dc 100644 --- a/include/internal/routines/level1/xdot.h +++ b/include/internal/routines/level1/xdot.h @@ -31,9 +31,6 @@ class Xdot: public Routine<T> { using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestVectorX; - using Routine<T>::TestVectorY; - using Routine<T>::TestVectorDot; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level1/xnrm2.h b/include/internal/routines/level1/xnrm2.h index 6f6ca74f..5abfaa59 100644 --- a/include/internal/routines/level1/xnrm2.h +++ b/include/internal/routines/level1/xnrm2.h @@ -31,8 +31,6 @@ class Xnrm2: public Routine<T> { using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestVectorX; - using Routine<T>::TestVectorDot; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level1/xscal.h b/include/internal/routines/level1/xscal.h index e10a201d..5786869f 100644 --- a/include/internal/routines/level1/xscal.h +++ b/include/internal/routines/level1/xscal.h @@ -30,7 +30,6 @@ class Xscal: public Routine<T> { using Routine<T>::queue_; using Routine<T>::event_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestVectorX; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level1/xswap.h b/include/internal/routines/level1/xswap.h index 0f240763..483f21d5 100644 --- a/include/internal/routines/level1/xswap.h +++ b/include/internal/routines/level1/xswap.h @@ -30,8 +30,6 @@ class Xswap: public Routine<T> { using Routine<T>::queue_; using Routine<T>::event_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestVectorX; - using Routine<T>::TestVectorY; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level2/xgemv.h b/include/internal/routines/level2/xgemv.h index 875f936e..6e8f0e47 100644 --- a/include/internal/routines/level2/xgemv.h +++ b/include/internal/routines/level2/xgemv.h @@ -31,10 +31,6 @@ class Xgemv: public Routine<T> { using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestVectorX; - using Routine<T>::TestVectorY; - using Routine<T>::TestMatrixA; - using Routine<T>::TestMatrixAP; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level2/xger.h b/include/internal/routines/level2/xger.h index 1d5c64bd..b0c67798 100644 --- a/include/internal/routines/level2/xger.h +++ b/include/internal/routines/level2/xger.h @@ -31,9 +31,6 @@ class Xger: public Routine<T> { using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestVectorX; - using Routine<T>::TestVectorY; - using Routine<T>::TestMatrixA; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level2/xher.h b/include/internal/routines/level2/xher.h index ebd20ee8..7b735882 100644 --- a/include/internal/routines/level2/xher.h +++ b/include/internal/routines/level2/xher.h @@ -31,9 +31,6 @@ class Xher: public Routine<T> { using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestVectorX; - using Routine<T>::TestMatrixA; - using Routine<T>::TestMatrixAP; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level2/xher2.h b/include/internal/routines/level2/xher2.h index a33a71c3..dd5ca4bf 100644 --- a/include/internal/routines/level2/xher2.h +++ b/include/internal/routines/level2/xher2.h @@ -31,10 +31,6 @@ class Xher2: public Routine<T> { using Routine<T>::event_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestVectorX; - using Routine<T>::TestVectorY; - using Routine<T>::TestMatrixA; - using Routine<T>::TestMatrixAP; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level3/xgemm.h b/include/internal/routines/level3/xgemm.h index 85fb0616..22624e61 100644 --- a/include/internal/routines/level3/xgemm.h +++ b/include/internal/routines/level3/xgemm.h @@ -32,9 +32,6 @@ class Xgemm: public Routine<T> { using Routine<T>::context_; using Routine<T>::GetProgramFromCache; using Routine<T>::PadCopyTransposeMatrix; - using Routine<T>::TestMatrixA; - using Routine<T>::TestMatrixB; - using Routine<T>::TestMatrixC; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level3/xhemm.h b/include/internal/routines/level3/xhemm.h index ec42b569..9f4557b4 100644 --- a/include/internal/routines/level3/xhemm.h +++ b/include/internal/routines/level3/xhemm.h @@ -29,7 +29,6 @@ class Xhemm: public Xgemm<T> { using Routine<T>::db_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestMatrixA; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level3/xher2k.h b/include/internal/routines/level3/xher2k.h index 623afd49..8a4191a6 100644 --- a/include/internal/routines/level3/xher2k.h +++ b/include/internal/routines/level3/xher2k.h @@ -34,9 +34,6 @@ class Xher2k: public Routine<T> { using Routine<T>::context_; using Routine<T>::GetProgramFromCache; using Routine<T>::PadCopyTransposeMatrix; - using Routine<T>::TestMatrixA; - using Routine<T>::TestMatrixB; - using Routine<T>::TestMatrixC; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level3/xherk.h b/include/internal/routines/level3/xherk.h index 629695ff..b279d724 100644 --- a/include/internal/routines/level3/xherk.h +++ b/include/internal/routines/level3/xherk.h @@ -34,8 +34,6 @@ class Xherk: public Routine<T> { using Routine<T>::context_; using Routine<T>::GetProgramFromCache; using Routine<T>::PadCopyTransposeMatrix; - using Routine<T>::TestMatrixA; - using Routine<T>::TestMatrixC; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level3/xsymm.h b/include/internal/routines/level3/xsymm.h index 16ad6f53..a0cb7b90 100644 --- a/include/internal/routines/level3/xsymm.h +++ b/include/internal/routines/level3/xsymm.h @@ -31,7 +31,6 @@ class Xsymm: public Xgemm<T> { using Routine<T>::db_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestMatrixA; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level3/xsyr2k.h b/include/internal/routines/level3/xsyr2k.h index 88669626..e498b7e6 100644 --- a/include/internal/routines/level3/xsyr2k.h +++ b/include/internal/routines/level3/xsyr2k.h @@ -34,9 +34,6 @@ class Xsyr2k: public Routine<T> { using Routine<T>::context_; using Routine<T>::GetProgramFromCache; using Routine<T>::PadCopyTransposeMatrix; - using Routine<T>::TestMatrixA; - using Routine<T>::TestMatrixB; - using Routine<T>::TestMatrixC; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level3/xsyrk.h b/include/internal/routines/level3/xsyrk.h index e95c7c1c..f7fa9b6a 100644 --- a/include/internal/routines/level3/xsyrk.h +++ b/include/internal/routines/level3/xsyrk.h @@ -36,8 +36,6 @@ class Xsyrk: public Routine<T> { using Routine<T>::context_; using Routine<T>::GetProgramFromCache; using Routine<T>::PadCopyTransposeMatrix; - using Routine<T>::TestMatrixA; - using Routine<T>::TestMatrixC; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/level3/xtrmm.h b/include/internal/routines/level3/xtrmm.h index 01f6594d..8527df58 100644 --- a/include/internal/routines/level3/xtrmm.h +++ b/include/internal/routines/level3/xtrmm.h @@ -30,7 +30,6 @@ class Xtrmm: public Xgemm<T> { using Routine<T>::db_; using Routine<T>::context_; using Routine<T>::GetProgramFromCache; - using Routine<T>::TestMatrixA; using Routine<T>::RunKernel; using Routine<T>::ErrorIn; diff --git a/include/internal/routines/levelx/xomatcopy.h b/include/internal/routines/levelx/xomatcopy.h index 38df846e..ec42d64a 100644 --- a/include/internal/routines/levelx/xomatcopy.h +++ b/include/internal/routines/levelx/xomatcopy.h @@ -29,8 +29,6 @@ class Xomatcopy: public Routine<T> { using Routine<T>::event_; using Routine<T>::GetProgramFromCache; using Routine<T>::PadCopyTransposeMatrix; - using Routine<T>::TestMatrixA; - using Routine<T>::TestMatrixB; using Routine<T>::ErrorIn; // Constructor diff --git a/src/routine.cc b/src/routine.cc index 1cf8bff8..ee3ba341 100644 --- a/src/routine.cc +++ b/src/routine.cc @@ -179,120 +179,6 @@ StatusCode Routine<T>::RunKernel(Kernel &kernel, std::vector<size_t> global, // ================================================================================================= -// Tests matrix A for validity: checks for a valid OpenCL buffer, a valid lead-dimension, and for a -// sufficient buffer size. -template <typename T> -StatusCode Routine<T>::TestMatrixA(const size_t one, const size_t two, const Buffer<T> &buffer, - const size_t offset, const size_t ld, const size_t data_size) { - if (ld < one) { return StatusCode::kInvalidLeadDimA; } - try { - const auto required_size = (ld*(two-1) + one + offset)*data_size; - const auto buffer_size = buffer.GetSize(); - if (buffer_size < required_size) { return StatusCode::kInsufficientMemoryA; } - } catch (...) { return StatusCode::kInvalidMatrixA; } - return StatusCode::kSuccess; -} - -// Tests matrix B for validity: checks for a valid OpenCL buffer, a valid lead-dimension, and for a -// sufficient buffer size. -template <typename T> -StatusCode Routine<T>::TestMatrixB(const size_t one, const size_t two, const Buffer<T> &buffer, - const size_t offset, const size_t ld, const size_t data_size) { - if (ld < one) { return StatusCode::kInvalidLeadDimB; } - try { - const auto required_size = (ld*(two-1) + one + offset)*data_size; - const auto buffer_size = buffer.GetSize(); - if (buffer_size < required_size) { return StatusCode::kInsufficientMemoryB; } - } catch (...) { return StatusCode::kInvalidMatrixB; } - return StatusCode::kSuccess; -} - -// Tests matrix C for validity: checks for a valid OpenCL buffer, a valid lead-dimension, and for a -// sufficient buffer size. -template <typename T> -StatusCode Routine<T>::TestMatrixC(const size_t one, const size_t two, const Buffer<T> &buffer, - const size_t offset, const size_t ld, const size_t data_size) { - if (ld < one) { return StatusCode::kInvalidLeadDimC; } - try { - const auto required_size = (ld*(two-1) + one + offset)*data_size; - const auto buffer_size = buffer.GetSize(); - if (buffer_size < required_size) { return StatusCode::kInsufficientMemoryC; } - } catch (...) { return StatusCode::kInvalidMatrixC; } - return StatusCode::kSuccess; -} - -// Tests matrix AP for validity: checks for a valid OpenCL buffer and for a sufficient buffer size -template <typename T> -StatusCode Routine<T>::TestMatrixAP(const size_t n, const Buffer<T> &buffer, - const size_t offset, const size_t data_size) { - try { - const auto required_size = (((n*(n+1))/2) + offset)*data_size; - const auto buffer_size = buffer.GetSize(); - if (buffer_size < required_size) { return StatusCode::kInsufficientMemoryA; } - } catch (...) { return StatusCode::kInvalidMatrixA; } - return StatusCode::kSuccess; -} - -// ================================================================================================= - -// Tests vector X for validity: checks for a valid increment, a valid OpenCL buffer, and for a -// sufficient buffer size. -template <typename T> -StatusCode Routine<T>::TestVectorX(const size_t n, const Buffer<T> &buffer, const size_t offset, - const size_t inc, const size_t data_size) { - if (inc == 0) { return StatusCode::kInvalidIncrementX; } - try { - const auto required_size = ((n-1)*inc + 1 + offset)*data_size; - const auto buffer_size = buffer.GetSize(); - if (buffer_size < required_size) { return StatusCode::kInsufficientMemoryX; } - } catch (...) { return StatusCode::kInvalidVectorX; } - return StatusCode::kSuccess; -} - -// Tests vector Y for validity: checks for a valid increment, a valid OpenCL buffer, and for a -// sufficient buffer size. -template <typename T> -StatusCode Routine<T>::TestVectorY(const size_t n, const Buffer<T> &buffer, const size_t offset, - const size_t inc, const size_t data_size) { - if (inc == 0) { return StatusCode::kInvalidIncrementY; } - try { - const auto required_size = ((n-1)*inc + 1 + offset)*data_size; - const auto buffer_size = buffer.GetSize(); - if (buffer_size < required_size) { return StatusCode::kInsufficientMemoryY; } - } catch (...) { return StatusCode::kInvalidVectorY; } - return StatusCode::kSuccess; -} - -// ================================================================================================= - -// Tests vector dot for validity: checks for a valid increment, a valid OpenCL buffer, and for a -// sufficient buffer size. -template <typename T> -StatusCode Routine<T>::TestVectorDot(const size_t n, const Buffer<T> &buffer, const size_t offset, - const size_t data_size) { - try { - const auto required_size = (n + offset)*data_size; - const auto buffer_size = buffer.GetSize(); - if (buffer_size < required_size) { return StatusCode::kInsufficientMemoryDot; } - } catch (...) { return StatusCode::kInvalidVectorDot; } - return StatusCode::kSuccess; -} - -// Tests vector index for validity: checks for a valid increment, a valid OpenCL buffer, and for a -// sufficient buffer size. -template <typename T> -StatusCode Routine<T>::TestVectorIndex(const size_t n, const Buffer<unsigned int> &buffer, - const size_t offset, const size_t data_size) { - try { - const auto required_size = (n + offset)*data_size; - const auto buffer_size = buffer.GetSize(); - if (buffer_size < required_size) { return StatusCode::kInsufficientMemoryDot; } - } catch (...) { return StatusCode::kInvalidVectorDot; } - return StatusCode::kSuccess; -} - -// ================================================================================================= - // Copies or transposes a matrix and optionally pads/unpads it with zeros template <typename T> StatusCode Routine<T>::PadCopyTransposeMatrix(EventPointer event, std::vector<Event>& waitForEvents, diff --git a/src/routines/level1/xamax.cc b/src/routines/level1/xamax.cc index 335e59bc..9a7d2173 100644 --- a/src/routines/level1/xamax.cc +++ b/src/routines/level1/xamax.cc @@ -49,9 +49,9 @@ StatusCode Xamax<T>::DoAmax(const size_t n, if (n == 0) { return StatusCode::kInvalidDimension; } // Tests the vectors for validity - auto status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); + auto status = TestVectorX(n, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } - status = TestVectorIndex(1, imax_buffer, imax_offset, sizeof(unsigned int)); + status = TestVectorIndex(1, imax_buffer, imax_offset); if (ErrorIn(status)) { return status; } // Retrieves the Xamax kernels from the compiled binary diff --git a/src/routines/level1/xasum.cc b/src/routines/level1/xasum.cc index e04f7064..3dcaa80a 100644 --- a/src/routines/level1/xasum.cc +++ b/src/routines/level1/xasum.cc @@ -49,9 +49,9 @@ StatusCode Xasum<T>::DoAsum(const size_t n, if (n == 0) { return StatusCode::kInvalidDimension; } // Tests the vectors for validity - auto status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); + auto status = TestVectorX(n, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } - status = TestVectorDot(1, asum_buffer, asum_offset, sizeof(T)); + status = TestVectorScalar(1, asum_buffer, asum_offset); if (ErrorIn(status)) { return status; } // Retrieves the Xasum kernels from the compiled binary diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc index 66aa2336..b57001f9 100644 --- a/src/routines/level1/xaxpy.cc +++ b/src/routines/level1/xaxpy.cc @@ -50,9 +50,9 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha, if (n == 0) { return StatusCode::kInvalidDimension; } // Tests the vectors for validity - auto status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); + auto status = TestVectorX(n, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } - status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T)); + status = TestVectorY(n, y_buffer, y_offset, y_inc); if (ErrorIn(status)) { return status; } // Determines whether or not the fast-version can be used diff --git a/src/routines/level1/xcopy.cc b/src/routines/level1/xcopy.cc index ff8f5999..273e87a6 100644 --- a/src/routines/level1/xcopy.cc +++ b/src/routines/level1/xcopy.cc @@ -50,9 +50,9 @@ StatusCode Xcopy<T>::DoCopy(const size_t n, if (n == 0) { return StatusCode::kInvalidDimension; } // Tests the vectors for validity - auto status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); + auto status = TestVectorX(n, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } - status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T)); + status = TestVectorY(n, y_buffer, y_offset, y_inc); if (ErrorIn(status)) { return status; } // Determines whether or not the fast-version can be used diff --git a/src/routines/level1/xdot.cc b/src/routines/level1/xdot.cc index db6a369e..25eccadf 100644 --- a/src/routines/level1/xdot.cc +++ b/src/routines/level1/xdot.cc @@ -51,11 +51,11 @@ StatusCode Xdot<T>::DoDot(const size_t n, if (n == 0) { return StatusCode::kInvalidDimension; } // Tests the vectors for validity - auto status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); + auto status = TestVectorX(n, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } - status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T)); + status = TestVectorY(n, y_buffer, y_offset, y_inc); if (ErrorIn(status)) { return status; } - status = TestVectorDot(1, dot_buffer, dot_offset, sizeof(T)); + status = TestVectorScalar(1, dot_buffer, dot_offset); if (ErrorIn(status)) { return status; } // Retrieves the Xdot kernels from the compiled binary diff --git a/src/routines/level1/xnrm2.cc b/src/routines/level1/xnrm2.cc index 14f7f6aa..eca283f8 100644 --- a/src/routines/level1/xnrm2.cc +++ b/src/routines/level1/xnrm2.cc @@ -49,9 +49,9 @@ StatusCode Xnrm2<T>::DoNrm2(const size_t n, if (n == 0) { return StatusCode::kInvalidDimension; } // Tests the vectors for validity - auto status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); + auto status = TestVectorX(n, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } - status = TestVectorDot(1, nrm2_buffer, nrm2_offset, sizeof(T)); + status = TestVectorScalar(1, nrm2_buffer, nrm2_offset); if (ErrorIn(status)) { return status; } // Retrieves the Xnrm2 kernels from the compiled binary diff --git a/src/routines/level1/xscal.cc b/src/routines/level1/xscal.cc index 1207acfa..0ce211b6 100644 --- a/src/routines/level1/xscal.cc +++ b/src/routines/level1/xscal.cc @@ -49,7 +49,7 @@ StatusCode Xscal<T>::DoScal(const size_t n, const T alpha, if (n == 0) { return StatusCode::kInvalidDimension; } // Tests the vector for validity - auto status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); + auto status = TestVectorX(n, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } // Determines whether or not the fast-version can be used diff --git a/src/routines/level1/xswap.cc b/src/routines/level1/xswap.cc index 8844abff..773d78b5 100644 --- a/src/routines/level1/xswap.cc +++ b/src/routines/level1/xswap.cc @@ -50,9 +50,9 @@ StatusCode Xswap<T>::DoSwap(const size_t n, if (n == 0) { return StatusCode::kInvalidDimension; } // Tests the vectors for validity - auto status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); + auto status = TestVectorX(n, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } - status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T)); + status = TestVectorY(n, y_buffer, y_offset, y_inc); if (ErrorIn(status)) { return status; } // Determines whether or not the fast-version can be used diff --git a/src/routines/level2/xgemv.cc b/src/routines/level2/xgemv.cc index 71839e96..18e61f28 100644 --- a/src/routines/level2/xgemv.cc +++ b/src/routines/level2/xgemv.cc @@ -101,12 +101,12 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose, // Tests the matrix and the vectors for validity auto status = StatusCode::kSuccess; - if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); } - else { status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); } + if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); } + else { status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); } if (ErrorIn(status)) { return status; } - status = TestVectorX(n_real, x_buffer, x_offset, x_inc, sizeof(T)); + status = TestVectorX(n_real, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } - status = TestVectorY(m_real, y_buffer, y_offset, y_inc, sizeof(T)); + status = TestVectorY(m_real, y_buffer, y_offset, y_inc); if (ErrorIn(status)) { return status; } // Determines whether or not the fast-version can be used diff --git a/src/routines/level2/xger.cc b/src/routines/level2/xger.cc index d1f98990..7d6fb091 100644 --- a/src/routines/level2/xger.cc +++ b/src/routines/level2/xger.cc @@ -58,11 +58,11 @@ StatusCode Xger<T>::DoGer(const Layout layout, const auto a_two = (a_is_rowmajor) ? m : n; // Tests the matrix and the vectors for validity - auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); + auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); if (ErrorIn(status)) { return status; } - status = TestVectorX(m, x_buffer, x_offset, x_inc, sizeof(T)); + status = TestVectorX(m, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } - status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T)); + status = TestVectorY(n, y_buffer, y_offset, y_inc); if (ErrorIn(status)) { return status; } // Upload the scalar argument as a constant buffer to the device (needed for half-precision) diff --git a/src/routines/level2/xher.cc b/src/routines/level2/xher.cc index 73e7a47d..3d5c0baf 100644 --- a/src/routines/level2/xher.cc +++ b/src/routines/level2/xher.cc @@ -67,10 +67,10 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle, // Tests the matrix and the vectors for validity auto status = StatusCode::kSuccess; - if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); } - else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld, sizeof(T)); } + if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); } + else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld); } if (ErrorIn(status)) { return status; } - status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); + status = TestVectorX(n, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } // If alpha is zero an update is not required diff --git a/src/routines/level2/xher2.cc b/src/routines/level2/xher2.cc index a73dde52..a7362410 100644 --- a/src/routines/level2/xher2.cc +++ b/src/routines/level2/xher2.cc @@ -59,12 +59,12 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle, // Tests the matrix and the vectors for validity auto status = StatusCode::kSuccess; - if (packed) { status = TestMatrixAP(n, a_buffer, a_offset, sizeof(T)); } - else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld, sizeof(T)); } + if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); } + else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld); } if (ErrorIn(status)) { return status; } - status = TestVectorX(n, x_buffer, x_offset, x_inc, sizeof(T)); + status = TestVectorX(n, x_buffer, x_offset, x_inc); if (ErrorIn(status)) { return status; } - status = TestVectorY(n, y_buffer, y_offset, y_inc, sizeof(T)); + status = TestVectorY(n, y_buffer, y_offset, y_inc); if (ErrorIn(status)) { return status; } // Upload the scalar argument as a constant buffer to the device (needed for half-precision) diff --git a/src/routines/level3/xgemm.cc b/src/routines/level3/xgemm.cc index 42d5f19e..713bed8f 100644 --- a/src/routines/level3/xgemm.cc +++ b/src/routines/level3/xgemm.cc @@ -96,11 +96,11 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout, // matrix A cannot be less than K when rotated, or less than M when not-rotated // matrix B cannot be less than N when rotated, or less than K when not-rotated // matrix C cannot be less than N when rotated, or less than M when not-rotated - auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); + auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); if (ErrorIn(status)) { return status; } - status = TestMatrixB(b_one, b_two, b_buffer, b_offset, b_ld, sizeof(T)); + status = TestMatrixB(b_one, b_two, b_buffer, b_offset, b_ld); if (ErrorIn(status)) { return status; } - status = TestMatrixC(c_one, c_two, c_buffer, c_offset, c_ld, sizeof(T)); + status = TestMatrixC(c_one, c_two, c_buffer, c_offset, c_ld); if (ErrorIn(status)) { return status; } // Calculates the ceiled versions of m, n, and k diff --git a/src/routines/level3/xhemm.cc b/src/routines/level3/xhemm.cc index d2fbf36e..a6e853e9 100644 --- a/src/routines/level3/xhemm.cc +++ b/src/routines/level3/xhemm.cc @@ -45,7 +45,7 @@ StatusCode Xhemm<T>::DoHemm(const Layout layout, const Side side, const Triangle auto k = (side == Side::kLeft) ? m : n; // Checks for validity of the squared A matrix - auto status = TestMatrixA(k, k, a_buffer, a_offset, a_ld, sizeof(T)); + auto status = TestMatrixA(k, k, a_buffer, a_offset, a_ld); if (ErrorIn(status)) { return status; } // Determines which kernel to run based on the layout (the Xgemm kernel assumes column-major as diff --git a/src/routines/level3/xher2k.cc b/src/routines/level3/xher2k.cc index 5ec1f8cd..c891c202 100644 --- a/src/routines/level3/xher2k.cc +++ b/src/routines/level3/xher2k.cc @@ -76,11 +76,11 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co // matrix A cannot be less than N when rotated, or less than K when not-rotated // matrix B cannot be less than N when rotated, or less than K when not-rotated // matrix C cannot be less than N - auto status = TestMatrixA(ab_one, ab_two, a_buffer, a_offset, a_ld, sizeof(T)); + auto status = TestMatrixA(ab_one, ab_two, a_buffer, a_offset, a_ld); if (ErrorIn(status)) { return status; } - status = TestMatrixB(ab_one, ab_two, b_buffer, b_offset, b_ld, sizeof(T)); + status = TestMatrixB(ab_one, ab_two, b_buffer, b_offset, b_ld); if (ErrorIn(status)) { return status; } - status = TestMatrixC(n, n, c_buffer, c_offset, c_ld, sizeof(T)); + status = TestMatrixC(n, n, c_buffer, c_offset, c_ld); if (ErrorIn(status)) { return status; } // Calculates the ceiled versions of n and k diff --git a/src/routines/level3/xherk.cc b/src/routines/level3/xherk.cc index df97a94f..9d64af95 100644 --- a/src/routines/level3/xherk.cc +++ b/src/routines/level3/xherk.cc @@ -75,9 +75,9 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons // space. Also tests that the leading dimensions of: // matrix A cannot be less than N when rotated, or less than K when not-rotated // matrix C cannot be less than N - auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); + auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); if (ErrorIn(status)) { return status; } - status = TestMatrixC(n, n, c_buffer, c_offset, c_ld, sizeof(T)); + status = TestMatrixC(n, n, c_buffer, c_offset, c_ld); if (ErrorIn(status)) { return status; } // Calculates the ceiled versions of n and k diff --git a/src/routines/level3/xsymm.cc b/src/routines/level3/xsymm.cc index d88d4653..379e2908 100644 --- a/src/routines/level3/xsymm.cc +++ b/src/routines/level3/xsymm.cc @@ -45,7 +45,7 @@ StatusCode Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle auto k = (side == Side::kLeft) ? m : n; // Checks for validity of the squared A matrix - auto status = TestMatrixA(k, k, a_buffer, a_offset, a_ld, sizeof(T)); + auto status = TestMatrixA(k, k, a_buffer, a_offset, a_ld); if (ErrorIn(status)) { return status; } // Determines which kernel to run based on the layout (the Xgemm kernel assumes column-major as diff --git a/src/routines/level3/xsyr2k.cc b/src/routines/level3/xsyr2k.cc index dd7d19fe..886d1e16 100644 --- a/src/routines/level3/xsyr2k.cc +++ b/src/routines/level3/xsyr2k.cc @@ -75,11 +75,11 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons // matrix A cannot be less than N when rotated, or less than K when not-rotated // matrix B cannot be less than N when rotated, or less than K when not-rotated // matrix C cannot be less than N - auto status = TestMatrixA(ab_one, ab_two, a_buffer, a_offset, a_ld, sizeof(T)); + auto status = TestMatrixA(ab_one, ab_two, a_buffer, a_offset, a_ld); if (ErrorIn(status)) { return status; } - status = TestMatrixB(ab_one, ab_two, b_buffer, b_offset, b_ld, sizeof(T)); + status = TestMatrixB(ab_one, ab_two, b_buffer, b_offset, b_ld); if (ErrorIn(status)) { return status; } - status = TestMatrixC(n, n, c_buffer, c_offset, c_ld, sizeof(T)); + status = TestMatrixC(n, n, c_buffer, c_offset, c_ld); if (ErrorIn(status)) { return status; } // Calculates the ceiled versions of n and k diff --git a/src/routines/level3/xsyrk.cc b/src/routines/level3/xsyrk.cc index b5817b82..000347f3 100644 --- a/src/routines/level3/xsyrk.cc +++ b/src/routines/level3/xsyrk.cc @@ -73,9 +73,9 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const // space. Also tests that the leading dimensions of: // matrix A cannot be less than N when rotated, or less than K when not-rotated // matrix C cannot be less than N - auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); + auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); if (ErrorIn(status)) { return status; } - status = TestMatrixC(n, n, c_buffer, c_offset, c_ld, sizeof(T)); + status = TestMatrixC(n, n, c_buffer, c_offset, c_ld); if (ErrorIn(status)) { return status; } // Calculates the ceiled versions of n and k diff --git a/src/routines/level3/xtrmm.cc b/src/routines/level3/xtrmm.cc index 27ecb4ed..c62305aa 100644 --- a/src/routines/level3/xtrmm.cc +++ b/src/routines/level3/xtrmm.cc @@ -44,7 +44,7 @@ StatusCode Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle auto k = (side == Side::kLeft) ? m : n; // Checks for validity of the triangular A matrix - auto status = TestMatrixA(k, k, a_buffer, a_offset, a_ld, sizeof(T)); + auto status = TestMatrixA(k, k, a_buffer, a_offset, a_ld); if (ErrorIn(status)) { return status; } // Determines which kernel to run based on the layout (the Xgemm kernel assumes column-major as diff --git a/src/routines/levelx/xomatcopy.cc b/src/routines/levelx/xomatcopy.cc index 77fc445f..dcc4e52a 100644 --- a/src/routines/levelx/xomatcopy.cc +++ b/src/routines/levelx/xomatcopy.cc @@ -72,9 +72,9 @@ StatusCode Xomatcopy<T>::DoOmatcopy(const Layout layout, const Transpose a_trans // Also tests that the leading dimensions of: // matrix A cannot be less than N when rotated, or less than M when not-rotated // matrix B cannot be less than M when rotated, or less than N when not-rotated - auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld, sizeof(T)); + auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); if (ErrorIn(status)) { return status; } - status = TestMatrixB(b_one, b_two, b_buffer, b_offset, b_ld, sizeof(T)); + status = TestMatrixB(b_one, b_two, b_buffer, b_offset, b_ld); if (ErrorIn(status)) { return status; } // Loads the program from the database |