summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-10-22 15:05:12 +0200
committerGitHub <noreply@github.com>2016-10-22 15:05:12 +0200
commit280698d0767219e174b12e51e8e42b228bbf28e9 (patch)
tree25db4d2d360cc161ca7d8e563c847faf08a745a0
parent9b596820d2dd833648706bff505b459c58f45b4b (diff)
parent56f300607b1d0b81ab3269894fda5a066c46cdeb (diff)
Merge pull request #117 from intelfx/exceptions
Convert to use C++ exceptions internally
-rw-r--r--CMakeLists.txt1
-rw-r--r--include/clblast.h5
-rw-r--r--include/clblast_c.h5
-rw-r--r--scripts/generator/generator.py4
-rw-r--r--scripts/generator/generator/cpp.py23
-rw-r--r--src/buffer_test.hpp66
-rw-r--r--src/cache.cpp7
-rw-r--r--src/cache.hpp2
-rw-r--r--src/clblast.cpp955
-rw-r--r--src/clblast_c.cpp3187
-rw-r--r--src/clblast_exceptions.cpp95
-rw-r--r--src/clblast_exceptions.hpp50
-rw-r--r--src/clpp11.hpp134
-rw-r--r--src/cxpp11_common.hpp87
-rw-r--r--src/database/database.cpp2
-rw-r--r--src/routine.cpp85
-rw-r--r--src/routine.hpp10
-rw-r--r--src/routines/common.cpp27
-rw-r--r--src/routines/common.hpp158
-rw-r--r--src/routines/level1/xamax.cpp100
-rw-r--r--src/routines/level1/xamax.hpp6
-rw-r--r--src/routines/level1/xasum.cpp94
-rw-r--r--src/routines/level1/xasum.hpp6
-rw-r--r--src/routines/level1/xaxpy.cpp91
-rw-r--r--src/routines/level1/xaxpy.hpp6
-rw-r--r--src/routines/level1/xcopy.cpp87
-rw-r--r--src/routines/level1/xcopy.hpp6
-rw-r--r--src/routines/level1/xdot.cpp109
-rw-r--r--src/routines/level1/xdot.hpp10
-rw-r--r--src/routines/level1/xdotc.cpp16
-rw-r--r--src/routines/level1/xdotc.hpp8
-rw-r--r--src/routines/level1/xdotu.cpp16
-rw-r--r--src/routines/level1/xdotu.hpp8
-rw-r--r--src/routines/level1/xmax.hpp8
-rw-r--r--src/routines/level1/xmin.hpp8
-rw-r--r--src/routines/level1/xnrm2.cpp94
-rw-r--r--src/routines/level1/xnrm2.hpp6
-rw-r--r--src/routines/level1/xscal.cpp78
-rw-r--r--src/routines/level1/xscal.hpp4
-rw-r--r--src/routines/level1/xsum.hpp8
-rw-r--r--src/routines/level1/xswap.cpp87
-rw-r--r--src/routines/level1/xswap.hpp6
-rw-r--r--src/routines/level2/xgbmv.cpp28
-rw-r--r--src/routines/level2/xgbmv.hpp14
-rw-r--r--src/routines/level2/xgemv.cpp127
-rw-r--r--src/routines/level2/xgemv.hpp34
-rw-r--r--src/routines/level2/xger.cpp82
-rw-r--r--src/routines/level2/xger.hpp12
-rw-r--r--src/routines/level2/xgerc.cpp20
-rw-r--r--src/routines/level2/xgerc.hpp12
-rw-r--r--src/routines/level2/xgeru.cpp20
-rw-r--r--src/routines/level2/xgeru.hpp12
-rw-r--r--src/routines/level2/xhbmv.cpp28
-rw-r--r--src/routines/level2/xhbmv.hpp14
-rw-r--r--src/routines/level2/xhemv.cpp28
-rw-r--r--src/routines/level2/xhemv.hpp14
-rw-r--r--src/routines/level2/xher.cpp78
-rw-r--r--src/routines/level2/xher.hpp12
-rw-r--r--src/routines/level2/xher2.cpp87
-rw-r--r--src/routines/level2/xher2.hpp14
-rw-r--r--src/routines/level2/xhpmv.cpp28
-rw-r--r--src/routines/level2/xhpmv.hpp14
-rw-r--r--src/routines/level2/xhpr.cpp18
-rw-r--r--src/routines/level2/xhpr.hpp10
-rw-r--r--src/routines/level2/xhpr2.cpp22
-rw-r--r--src/routines/level2/xhpr2.hpp12
-rw-r--r--src/routines/level2/xsbmv.cpp28
-rw-r--r--src/routines/level2/xsbmv.hpp14
-rw-r--r--src/routines/level2/xspmv.cpp28
-rw-r--r--src/routines/level2/xspmv.hpp14
-rw-r--r--src/routines/level2/xspr.cpp18
-rw-r--r--src/routines/level2/xspr.hpp10
-rw-r--r--src/routines/level2/xspr2.cpp22
-rw-r--r--src/routines/level2/xspr2.hpp12
-rw-r--r--src/routines/level2/xsymv.cpp28
-rw-r--r--src/routines/level2/xsymv.hpp14
-rw-r--r--src/routines/level2/xsyr.cpp16
-rw-r--r--src/routines/level2/xsyr.hpp10
-rw-r--r--src/routines/level2/xsyr2.cpp20
-rw-r--r--src/routines/level2/xsyr2.hpp12
-rw-r--r--src/routines/level2/xtbmv.cpp44
-rw-r--r--src/routines/level2/xtbmv.hpp10
-rw-r--r--src/routines/level2/xtpmv.cpp44
-rw-r--r--src/routines/level2/xtpmv.hpp10
-rw-r--r--src/routines/level2/xtrmv.cpp44
-rw-r--r--src/routines/level2/xtrmv.hpp10
-rw-r--r--src/routines/level3/xgemm.cpp308
-rw-r--r--src/routines/level3/xgemm.hpp48
-rw-r--r--src/routines/level3/xhemm.cpp132
-rw-r--r--src/routines/level3/xhemm.hpp14
-rw-r--r--src/routines/level3/xher2k.cpp291
-rw-r--r--src/routines/level3/xher2k.hpp14
-rw-r--r--src/routines/level3/xherk.cpp201
-rw-r--r--src/routines/level3/xherk.hpp12
-rw-r--r--src/routines/level3/xsymm.cpp132
-rw-r--r--src/routines/level3/xsymm.hpp14
-rw-r--r--src/routines/level3/xsyr2k.cpp219
-rw-r--r--src/routines/level3/xsyr2k.hpp14
-rw-r--r--src/routines/level3/xsyrk.cpp169
-rw-r--r--src/routines/level3/xsyrk.hpp12
-rw-r--r--src/routines/level3/xtrmm.cpp134
-rw-r--r--src/routines/level3/xtrmm.hpp12
-rw-r--r--src/routines/levelx/xomatcopy.cpp32
-rw-r--r--src/routines/levelx/xomatcopy.hpp8
-rw-r--r--src/utilities.hpp6
105 files changed, 4462 insertions, 4223 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index ae78b5a7..17bff79b 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -169,6 +169,7 @@ set(SOURCES
src/routines/common.cpp
src/cache.cpp
src/clblast.cpp
+ src/clblast_exceptions.cpp
src/clblast_c.cpp
src/routine.cpp
src/utilities.cpp
diff --git a/include/clblast.h b/include/clblast.h
index 0f52b2f9..53e23669 100644
--- a/include/clblast.h
+++ b/include/clblast.h
@@ -75,13 +75,14 @@ enum class StatusCode {
kInsufficientMemoryY = -1007, // Vector Y's OpenCL buffer is too small
// Custom additional status codes for CLBlast
- kKernelLaunchError = -2048, // Problem occurred when enqueuing the kernel
- kKernelRunError = -2047, // Problem occurred while running the kernel
kInvalidLocalMemUsage = -2046, // Not enough local memory available on this device
kNoHalfPrecision = -2045, // Half precision (16-bits) not supported by the device
kNoDoublePrecision = -2044, // Double precision (64-bits) not supported by the device
kInvalidVectorScalar = -2043, // The unit-sized vector is not a valid OpenCL buffer
kInsufficientMemoryScalar = -2042, // The unit-sized vector's OpenCL buffer is too small
+ kDatabaseError = -2041, // Entry for the device was not found in the database
+ kUnknownError = -2040, // A catch-all error code representing an unspecified error
+ kUnexpectedError = -2039, // A catch-all error code representing an unexpected exception
};
// Matrix layout and transpose types
diff --git a/include/clblast_c.h b/include/clblast_c.h
index 33fb4acf..2805c20f 100644
--- a/include/clblast_c.h
+++ b/include/clblast_c.h
@@ -76,13 +76,14 @@ typedef enum StatusCode_ {
kInsufficientMemoryY = -1007, // Vector Y's OpenCL buffer is too small
// Custom additional status codes for CLBlast
- kKernelLaunchError = -2048, // Problem occurred when enqueuing the kernel
- kKernelRunError = -2047, // Problem occurred while running the kernel
kInvalidLocalMemUsage = -2046, // Not enough local memory available on this device
kNoHalfPrecision = -2045, // Half precision (16-bits) not supported by the device
kNoDoublePrecision = -2044, // Double precision (64-bits) not supported by the device
kInvalidVectorScalar = -2043, // The unit-sized vector is not a valid OpenCL buffer
kInsufficientMemoryScalar = -2042, // The unit-sized vector's OpenCL buffer is too small
+ kDatabaseError = -2041, // Entry for the device was not found in the database
+ kUnknownError = -2040, // A catch-all error code representing an unspecified error
+ kUnexpectedError = -2039, // A catch-all error code representing an unexpected exception
} StatusCode;
// Matrix layout and transpose types
diff --git a/scripts/generator/generator.py b/scripts/generator/generator.py
index d82b13a6..04ab5475 100644
--- a/scripts/generator/generator.py
+++ b/scripts/generator/generator.py
@@ -30,8 +30,8 @@ from generator.routine import Routine
from generator.datatype import H, S, D, C, Z, Sc, Dz, iH, iS, iD, iC, iZ, Css, Zdd, Ccs, Zzd, T, Tc, TU
-HEADER_LINES = [96, 73, 97, 22, 29, 41]
-FOOTER_LINES = [17, 75, 19, 14, 6, 6]
+HEADER_LINES = [97, 73, 98, 22, 29, 41]
+FOOTER_LINES = [17, 80, 19, 18, 6, 6]
# Different possibilities for requirements
ald_m = "The value of `a_ld` must be at least `m`."
diff --git a/scripts/generator/generator/cpp.py b/scripts/generator/generator/cpp.py
index 427eb180..a0d43667 100644
--- a/scripts/generator/generator/cpp.py
+++ b/scripts/generator/generator/cpp.py
@@ -45,17 +45,18 @@ def clblast_h(routine):
def clblast_cc(routine):
"""The C++ API implementation (.cpp)"""
- indent1 = " " * (20 + routine.length())
+ indent1 = " " * (15 + routine.length())
result = NL + "// " + routine.description + ": " + routine.short_names() + NL
if routine.implemented:
result += routine.routine_header_cpp(12, "") + " {" + NL
- result += " auto queue_cpp = Queue(*queue);" + NL
- result += " auto routine = X" + routine.name + "<" + routine.template.template + ">(queue_cpp, event);" + NL
- result += " auto status = routine.SetUp();" + NL
- result += " if (status != StatusCode::kSuccess) { return status; }" + NL
- result += " return routine.Do" + routine.name.capitalize() + "("
+ result += " try {" + NL
+ result += " auto queue_cpp = Queue(*queue);" + NL
+ result += " auto routine = X" + routine.name + "<" + routine.template.template + ">(queue_cpp, event);" + NL
+ result += " routine.Do" + routine.name.capitalize() + "("
result += ("," + NL + indent1).join([a for a in routine.arguments_clcudaapi()])
result += ");" + NL
+ result += " return StatusCode::kSuccess;" + NL
+ result += " } catch (...) { return DispatchException(); }" + NL
else:
result += routine.routine_header_type_cpp(12) + " {" + NL
result += " return StatusCode::kNotImplemented;" + NL
@@ -81,12 +82,14 @@ def clblast_c_cc(routine):
result = NL + "// " + routine.name.upper() + NL
for flavour in routine.flavours:
template = "<" + flavour.template + ">" if routine.no_scalars() else ""
- indent = " " * (26 + routine.length() + len(template))
+ indent = " " * (45 + routine.length() + len(template))
result += routine.routine_header_c(flavour, 20, "") + " {" + NL
- result += " auto status = clblast::" + routine.name.capitalize() + template + "("
+ result += " try {" + NL
+ result += " return static_cast<StatusCode>(clblast::" + routine.name.capitalize() + template + "("
result += ("," + NL + indent).join([a for a in routine.arguments_cast(flavour, indent)])
- result += "," + NL + indent + "queue, event);"
- result += NL + " return static_cast<StatusCode>(status);" + NL + "}" + NL
+ result += "," + NL + indent + "queue, event));" + NL
+ result += " } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }" + NL
+ result += "}" + NL
return result
diff --git a/src/buffer_test.hpp b/src/buffer_test.hpp
index 80f5243f..9a23e0b7 100644
--- a/src/buffer_test.hpp
+++ b/src/buffer_test.hpp
@@ -22,96 +22,88 @@ namespace clblast {
// Tests matrix 'A' for validity
template <typename T>
-StatusCode TestMatrixA(const size_t one, const size_t two, const Buffer<T> &buffer,
+void TestMatrixA(const size_t one, const size_t two, const Buffer<T> &buffer,
const size_t offset, const size_t ld) {
- if (ld < one) { return StatusCode::kInvalidLeadDimA; }
+ if (ld < one) { throw BLASError(StatusCode::kInvalidLeadDimA); }
try {
const auto required_size = (ld * (two - 1) + one + offset) * sizeof(T);
- if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryA; }
- } catch (...) { return StatusCode::kInvalidMatrixA; }
- return StatusCode::kSuccess;
+ if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryA); }
+ } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidMatrixA, e.what()); }
}
// Tests matrix 'B' for validity
template <typename T>
-StatusCode TestMatrixB(const size_t one, const size_t two, const Buffer<T> &buffer,
+void TestMatrixB(const size_t one, const size_t two, const Buffer<T> &buffer,
const size_t offset, const size_t ld) {
- if (ld < one) { return StatusCode::kInvalidLeadDimB; }
+ if (ld < one) { throw BLASError(StatusCode::kInvalidLeadDimB); }
try {
const auto required_size = (ld * (two - 1) + one + offset) * sizeof(T);
- if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryB; }
- } catch (...) { return StatusCode::kInvalidMatrixB; }
- return StatusCode::kSuccess;
+ if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryB); }
+ } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidMatrixB, e.what()); }
}
// Tests matrix 'C' for validity
template <typename T>
-StatusCode TestMatrixC(const size_t one, const size_t two, const Buffer<T> &buffer,
+void TestMatrixC(const size_t one, const size_t two, const Buffer<T> &buffer,
const size_t offset, const size_t ld) {
- if (ld < one) { return StatusCode::kInvalidLeadDimC; }
+ if (ld < one) { throw BLASError(StatusCode::kInvalidLeadDimC); }
try {
const auto required_size = (ld * (two - 1) + one + offset) * sizeof(T);
- if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryC; }
- } catch (...) { return StatusCode::kInvalidMatrixC; }
- return StatusCode::kSuccess;
+ if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryC); }
+ } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidMatrixC, e.what()); }
}
// Tests matrix 'AP' for validity
template <typename T>
-StatusCode TestMatrixAP(const size_t n, const Buffer<T> &buffer, const size_t offset) {
+void TestMatrixAP(const size_t n, const Buffer<T> &buffer, const size_t offset) {
try {
const auto required_size = (((n * (n + 1)) / 2) + offset) * sizeof(T);
- if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryA; }
- } catch (...) { return StatusCode::kInvalidMatrixA; }
- return StatusCode::kSuccess;
+ if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryA); }
+ } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidMatrixA, e.what()); }
}
// =================================================================================================
// Tests vector 'X' for validity
template <typename T>
-StatusCode TestVectorX(const size_t n, const Buffer<T> &buffer, const size_t offset,
+void TestVectorX(const size_t n, const Buffer<T> &buffer, const size_t offset,
const size_t inc) {
- if (inc == 0) { return StatusCode::kInvalidIncrementX; }
+ if (inc == 0) { throw BLASError(StatusCode::kInvalidIncrementX); }
try {
const auto required_size = ((n - 1) * inc + 1 + offset) * sizeof(T);
- if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryX; }
- } catch (...) { return StatusCode::kInvalidVectorX; }
- return StatusCode::kSuccess;
+ if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryX); }
+ } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidVectorX, e.what()); }
}
// Tests vector 'Y' for validity
template <typename T>
-StatusCode TestVectorY(const size_t n, const Buffer<T> &buffer, const size_t offset,
+void TestVectorY(const size_t n, const Buffer<T> &buffer, const size_t offset,
const size_t inc) {
- if (inc == 0) { return StatusCode::kInvalidIncrementY; }
+ if (inc == 0) { throw BLASError(StatusCode::kInvalidIncrementY); }
try {
const auto required_size = ((n - 1) * inc + 1 + offset) * sizeof(T);
- if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryY; }
- } catch (...) { return StatusCode::kInvalidVectorY; }
- return StatusCode::kSuccess;
+ if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryY); }
+ } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidVectorY, e.what()); }
}
// =================================================================================================
// Tests vector 'scalar' for validity
template <typename T>
-StatusCode TestVectorScalar(const size_t n, const Buffer<T> &buffer, const size_t offset) {
+void TestVectorScalar(const size_t n, const Buffer<T> &buffer, const size_t offset) {
try {
const auto required_size = (n + offset) * sizeof(T);
- if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryScalar; }
- } catch (...) { return StatusCode::kInvalidVectorScalar; }
- return StatusCode::kSuccess;
+ if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryScalar); }
+ } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidVectorScalar, e.what()); }
}
// Tests vector 'index' for validity
template <typename T>
-StatusCode TestVectorIndex(const size_t n, const Buffer<T> &buffer, const size_t offset) {
+void TestVectorIndex(const size_t n, const Buffer<T> &buffer, const size_t offset) {
try {
const auto required_size = (n + offset) * sizeof(T);
- if (buffer.GetSize() < required_size) { return StatusCode::kInsufficientMemoryScalar; }
- } catch (...) { return StatusCode::kInvalidVectorScalar; }
- return StatusCode::kSuccess;
+ if (buffer.GetSize() < required_size) { throw BLASError(StatusCode::kInsufficientMemoryScalar); }
+ } catch (const Error<std::runtime_error> &e) { throw BLASError(StatusCode::kInvalidVectorScalar, e.what()); }
}
// =================================================================================================
diff --git a/src/cache.cpp b/src/cache.cpp
index 6080f082..6786eaa2 100644
--- a/src/cache.cpp
+++ b/src/cache.cpp
@@ -57,7 +57,7 @@ const std::string& GetBinaryFromCache(const std::string &device_name, const Prec
}
}
binary_cache_mutex_.unlock();
- throw std::runtime_error("Internal CLBlast error: Expected binary in cache, but found none.");
+ throw LogicError("GetBinaryFromCache: Expected binary in cache, but found none");
}
// Queries the cache and retrieves a matching program. Assumes that the match is available, throws
@@ -75,7 +75,7 @@ const Program& GetProgramFromCache(const Context &context, const Precision &prec
}
}
program_cache_mutex_.unlock();
- throw std::runtime_error("Internal CLBlast error: Expected program in cache, but found none.");
+ throw LogicError("GetProgramFromCache: Expected program in cache, but found none");
}
// Queries the cache to see whether or not the compiled kernel is already there
@@ -109,14 +109,13 @@ bool ProgramIsInCache(const Context &context, const Precision &precision,
// =================================================================================================
// Clears the cache of stored binaries and programs
-StatusCode CacheClearAll() {
+void CacheClearAll() {
binary_cache_mutex_.lock();
binary_cache_.clear();
binary_cache_mutex_.unlock();
program_cache_mutex_.lock();
program_cache_.clear();
program_cache_mutex_.unlock();
- return StatusCode::kSuccess;
}
// =================================================================================================
diff --git a/src/cache.hpp b/src/cache.hpp
index 9075da0d..f2b44edf 100644
--- a/src/cache.hpp
+++ b/src/cache.hpp
@@ -89,7 +89,7 @@ bool ProgramIsInCache(const Context &context, const Precision &precision,
// =================================================================================================
// Clears the cache of stored binaries
-StatusCode CacheClearAll();
+void CacheClearAll();
// =================================================================================================
} // namespace clblast
diff --git a/src/clblast.cpp b/src/clblast.cpp
index 79c30ca4..4bb4e0b3 100644
--- a/src/clblast.cpp
+++ b/src/clblast.cpp
@@ -168,13 +168,14 @@ StatusCode Swap(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xswap<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoSwap(n,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(y_buffer), y_offset, y_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xswap<T>(queue_cpp, event);
+ routine.DoSwap(n,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Swap<float>(const size_t,
cl_mem, const size_t, const size_t,
@@ -203,13 +204,14 @@ StatusCode Scal(const size_t n,
const T alpha,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xscal<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoScal(n,
- alpha,
- Buffer<T>(x_buffer), x_offset, x_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xscal<T>(queue_cpp, event);
+ routine.DoScal(n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Scal<float>(const size_t,
const float,
@@ -238,13 +240,14 @@ StatusCode Copy(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xcopy<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoCopy(n,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(y_buffer), y_offset, y_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xcopy<T>(queue_cpp, event);
+ routine.DoCopy(n,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Copy<float>(const size_t,
const cl_mem, const size_t, const size_t,
@@ -274,14 +277,15 @@ StatusCode Axpy(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xaxpy<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoAxpy(n,
- alpha,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(y_buffer), y_offset, y_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xaxpy<T>(queue_cpp, event);
+ routine.DoAxpy(n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Axpy<float>(const size_t,
const float,
@@ -316,14 +320,15 @@ StatusCode Dot(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xdot<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoDot(n,
- Buffer<T>(dot_buffer), dot_offset,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(y_buffer), y_offset, y_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xdot<T>(queue_cpp, event);
+ routine.DoDot(n,
+ Buffer<T>(dot_buffer), dot_offset,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Dot<float>(const size_t,
cl_mem, const size_t,
@@ -348,14 +353,15 @@ StatusCode Dotu(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xdotu<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoDotu(n,
- Buffer<T>(dot_buffer), dot_offset,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(y_buffer), y_offset, y_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xdotu<T>(queue_cpp, event);
+ routine.DoDotu(n,
+ Buffer<T>(dot_buffer), dot_offset,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Dotu<float2>(const size_t,
cl_mem, const size_t,
@@ -375,14 +381,15 @@ StatusCode Dotc(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xdotc<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoDotc(n,
- Buffer<T>(dot_buffer), dot_offset,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(y_buffer), y_offset, y_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xdotc<T>(queue_cpp, event);
+ routine.DoDotc(n,
+ Buffer<T>(dot_buffer), dot_offset,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Dotc<float2>(const size_t,
cl_mem, const size_t,
@@ -401,13 +408,14 @@ StatusCode Nrm2(const size_t n,
cl_mem nrm2_buffer, const size_t nrm2_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xnrm2<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoNrm2(n,
- Buffer<T>(nrm2_buffer), nrm2_offset,
- Buffer<T>(x_buffer), x_offset, x_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xnrm2<T>(queue_cpp, event);
+ routine.DoNrm2(n,
+ Buffer<T>(nrm2_buffer), nrm2_offset,
+ Buffer<T>(x_buffer), x_offset, x_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Nrm2<float>(const size_t,
cl_mem, const size_t,
@@ -436,13 +444,14 @@ StatusCode Asum(const size_t n,
cl_mem asum_buffer, const size_t asum_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xasum<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoAsum(n,
- Buffer<T>(asum_buffer), asum_offset,
- Buffer<T>(x_buffer), x_offset, x_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xasum<T>(queue_cpp, event);
+ routine.DoAsum(n,
+ Buffer<T>(asum_buffer), asum_offset,
+ Buffer<T>(x_buffer), x_offset, x_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Asum<float>(const size_t,
cl_mem, const size_t,
@@ -471,13 +480,14 @@ StatusCode Sum(const size_t n,
cl_mem sum_buffer, const size_t sum_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xsum<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoSum(n,
- Buffer<T>(sum_buffer), sum_offset,
- Buffer<T>(x_buffer), x_offset, x_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xsum<T>(queue_cpp, event);
+ routine.DoSum(n,
+ Buffer<T>(sum_buffer), sum_offset,
+ Buffer<T>(x_buffer), x_offset, x_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Sum<float>(const size_t,
cl_mem, const size_t,
@@ -506,13 +516,14 @@ StatusCode Amax(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xamax<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoAmax(n,
- Buffer<unsigned int>(imax_buffer), imax_offset,
- Buffer<T>(x_buffer), x_offset, x_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xamax<T>(queue_cpp, event);
+ routine.DoAmax(n,
+ Buffer<unsigned int>(imax_buffer), imax_offset,
+ Buffer<T>(x_buffer), x_offset, x_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Amax<float>(const size_t,
cl_mem, const size_t,
@@ -541,13 +552,14 @@ StatusCode Max(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xmax<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoMax(n,
- Buffer<unsigned int>(imax_buffer), imax_offset,
- Buffer<T>(x_buffer), x_offset, x_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xmax<T>(queue_cpp, event);
+ routine.DoMax(n,
+ Buffer<unsigned int>(imax_buffer), imax_offset,
+ Buffer<T>(x_buffer), x_offset, x_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Max<float>(const size_t,
cl_mem, const size_t,
@@ -576,13 +588,14 @@ StatusCode Min(const size_t n,
cl_mem imin_buffer, const size_t imin_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xmin<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoMin(n,
- Buffer<unsigned int>(imin_buffer), imin_offset,
- Buffer<T>(x_buffer), x_offset, x_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xmin<T>(queue_cpp, event);
+ routine.DoMin(n,
+ Buffer<unsigned int>(imin_buffer), imin_offset,
+ Buffer<T>(x_buffer), x_offset, x_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Min<float>(const size_t,
cl_mem, const size_t,
@@ -619,17 +632,18 @@ StatusCode Gemv(const Layout layout, const Transpose a_transpose,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xgemv<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoGemv(layout, a_transpose,
- m, n,
- alpha,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(x_buffer), x_offset, x_inc,
- beta,
- Buffer<T>(y_buffer), y_offset, y_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xgemv<T>(queue_cpp, event);
+ routine.DoGemv(layout, a_transpose,
+ m, n,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ beta,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Gemv<float>(const Layout, const Transpose,
const size_t, const size_t,
@@ -682,17 +696,18 @@ StatusCode Gbmv(const Layout layout, const Transpose a_transpose,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xgbmv<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoGbmv(layout, a_transpose,
- m, n, kl, ku,
- alpha,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(x_buffer), x_offset, x_inc,
- beta,
- Buffer<T>(y_buffer), y_offset, y_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xgbmv<T>(queue_cpp, event);
+ routine.DoGbmv(layout, a_transpose,
+ m, n, kl, ku,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ beta,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Gbmv<float>(const Layout, const Transpose,
const size_t, const size_t, const size_t, const size_t,
@@ -745,17 +760,18 @@ StatusCode Hemv(const Layout layout, const Triangle triangle,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xhemv<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoHemv(layout, triangle,
- n,
- alpha,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(x_buffer), x_offset, x_inc,
- beta,
- Buffer<T>(y_buffer), y_offset, y_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xhemv<T>(queue_cpp, event);
+ routine.DoHemv(layout, triangle,
+ n,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ beta,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Hemv<float2>(const Layout, const Triangle,
const size_t,
@@ -784,17 +800,18 @@ StatusCode Hbmv(const Layout layout, const Triangle triangle,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xhbmv<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoHbmv(layout, triangle,
- n, k,
- alpha,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(x_buffer), x_offset, x_inc,
- beta,
- Buffer<T>(y_buffer), y_offset, y_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xhbmv<T>(queue_cpp, event);
+ routine.DoHbmv(layout, triangle,
+ n, k,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ beta,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Hbmv<float2>(const Layout, const Triangle,
const size_t, const size_t,
@@ -823,17 +840,18 @@ StatusCode Hpmv(const Layout layout, const Triangle triangle,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xhpmv<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoHpmv(layout, triangle,
- n,
- alpha,
- Buffer<T>(ap_buffer), ap_offset,
- Buffer<T>(x_buffer), x_offset, x_inc,
- beta,
- Buffer<T>(y_buffer), y_offset, y_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xhpmv<T>(queue_cpp, event);
+ routine.DoHpmv(layout, triangle,
+ n,
+ alpha,
+ Buffer<T>(ap_buffer), ap_offset,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ beta,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Hpmv<float2>(const Layout, const Triangle,
const size_t,
@@ -862,17 +880,18 @@ StatusCode Symv(const Layout layout, const Triangle triangle,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xsymv<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoSymv(layout, triangle,
- n,
- alpha,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(x_buffer), x_offset, x_inc,
- beta,
- Buffer<T>(y_buffer), y_offset, y_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xsymv<T>(queue_cpp, event);
+ routine.DoSymv(layout, triangle,
+ n,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ beta,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Symv<float>(const Layout, const Triangle,
const size_t,
@@ -909,17 +928,18 @@ StatusCode Sbmv(const Layout layout, const Triangle triangle,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xsbmv<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoSbmv(layout, triangle,
- n, k,
- alpha,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(x_buffer), x_offset, x_inc,
- beta,
- Buffer<T>(y_buffer), y_offset, y_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xsbmv<T>(queue_cpp, event);
+ routine.DoSbmv(layout, triangle,
+ n, k,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ beta,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Sbmv<float>(const Layout, const Triangle,
const size_t, const size_t,
@@ -956,17 +976,18 @@ StatusCode Spmv(const Layout layout, const Triangle triangle,
const T beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xspmv<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoSpmv(layout, triangle,
- n,
- alpha,
- Buffer<T>(ap_buffer), ap_offset,
- Buffer<T>(x_buffer), x_offset, x_inc,
- beta,
- Buffer<T>(y_buffer), y_offset, y_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xspmv<T>(queue_cpp, event);
+ routine.DoSpmv(layout, triangle,
+ n,
+ alpha,
+ Buffer<T>(ap_buffer), ap_offset,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ beta,
+ Buffer<T>(y_buffer), y_offset, y_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Spmv<float>(const Layout, const Triangle,
const size_t,
@@ -1000,14 +1021,15 @@ StatusCode Trmv(const Layout layout, const Triangle triangle, const Transpose a_
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xtrmv<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoTrmv(layout, triangle, a_transpose, diagonal,
- n,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(x_buffer), x_offset, x_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xtrmv<T>(queue_cpp, event);
+ routine.DoTrmv(layout, triangle, a_transpose, diagonal,
+ n,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(x_buffer), x_offset, x_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Trmv<float>(const Layout, const Triangle, const Transpose, const Diagonal,
const size_t,
@@ -1042,14 +1064,15 @@ StatusCode Tbmv(const Layout layout, const Triangle triangle, const Transpose a_
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xtbmv<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoTbmv(layout, triangle, a_transpose, diagonal,
- n, k,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(x_buffer), x_offset, x_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xtbmv<T>(queue_cpp, event);
+ routine.DoTbmv(layout, triangle, a_transpose, diagonal,
+ n, k,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(x_buffer), x_offset, x_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Tbmv<float>(const Layout, const Triangle, const Transpose, const Diagonal,
const size_t, const size_t,
@@ -1084,14 +1107,15 @@ StatusCode Tpmv(const Layout layout, const Triangle triangle, const Transpose a_
const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xtpmv<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoTpmv(layout, triangle, a_transpose, diagonal,
- n,
- Buffer<T>(ap_buffer), ap_offset,
- Buffer<T>(x_buffer), x_offset, x_inc);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xtpmv<T>(queue_cpp, event);
+ routine.DoTpmv(layout, triangle, a_transpose, diagonal,
+ n,
+ Buffer<T>(ap_buffer), ap_offset,
+ Buffer<T>(x_buffer), x_offset, x_inc);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Tpmv<float>(const Layout, const Triangle, const Transpose, const Diagonal,
const size_t,
@@ -1218,16 +1242,17 @@ StatusCode Ger(const Layout layout,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xger<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoGer(layout,
- m, n,
- alpha,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(y_buffer), y_offset, y_inc,
- Buffer<T>(a_buffer), a_offset, a_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xger<T>(queue_cpp, event);
+ routine.DoGer(layout,
+ m, n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc,
+ Buffer<T>(a_buffer), a_offset, a_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Ger<float>(const Layout,
const size_t, const size_t,
@@ -1260,16 +1285,17 @@ StatusCode Geru(const Layout layout,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xgeru<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoGeru(layout,
- m, n,
- alpha,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(y_buffer), y_offset, y_inc,
- Buffer<T>(a_buffer), a_offset, a_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xgeru<T>(queue_cpp, event);
+ routine.DoGeru(layout,
+ m, n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc,
+ Buffer<T>(a_buffer), a_offset, a_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Geru<float2>(const Layout,
const size_t, const size_t,
@@ -1295,16 +1321,17 @@ StatusCode Gerc(const Layout layout,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xgerc<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoGerc(layout,
- m, n,
- alpha,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(y_buffer), y_offset, y_inc,
- Buffer<T>(a_buffer), a_offset, a_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xgerc<T>(queue_cpp, event);
+ routine.DoGerc(layout,
+ m, n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc,
+ Buffer<T>(a_buffer), a_offset, a_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Gerc<float2>(const Layout,
const size_t, const size_t,
@@ -1329,15 +1356,16 @@ StatusCode Her(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xher<std::complex<T>,T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoHer(layout, triangle,
- n,
- alpha,
- Buffer<std::complex<T>>(x_buffer), x_offset, x_inc,
- Buffer<std::complex<T>>(a_buffer), a_offset, a_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xher<std::complex<T>,T>(queue_cpp, event);
+ routine.DoHer(layout, triangle,
+ n,
+ alpha,
+ Buffer<std::complex<T>>(x_buffer), x_offset, x_inc,
+ Buffer<std::complex<T>>(a_buffer), a_offset, a_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Her<float>(const Layout, const Triangle,
const size_t,
@@ -1360,15 +1388,16 @@ StatusCode Hpr(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xhpr<std::complex<T>,T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoHpr(layout, triangle,
- n,
- alpha,
- Buffer<std::complex<T>>(x_buffer), x_offset, x_inc,
- Buffer<std::complex<T>>(ap_buffer), ap_offset);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xhpr<std::complex<T>,T>(queue_cpp, event);
+ routine.DoHpr(layout, triangle,
+ n,
+ alpha,
+ Buffer<std::complex<T>>(x_buffer), x_offset, x_inc,
+ Buffer<std::complex<T>>(ap_buffer), ap_offset);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Hpr<float>(const Layout, const Triangle,
const size_t,
@@ -1392,16 +1421,17 @@ StatusCode Her2(const Layout layout, const Triangle triangle,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xher2<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoHer2(layout, triangle,
- n,
- alpha,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(y_buffer), y_offset, y_inc,
- Buffer<T>(a_buffer), a_offset, a_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xher2<T>(queue_cpp, event);
+ routine.DoHer2(layout, triangle,
+ n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc,
+ Buffer<T>(a_buffer), a_offset, a_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Her2<float2>(const Layout, const Triangle,
const size_t,
@@ -1427,16 +1457,17 @@ StatusCode Hpr2(const Layout layout, const Triangle triangle,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xhpr2<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoHpr2(layout, triangle,
- n,
- alpha,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(y_buffer), y_offset, y_inc,
- Buffer<T>(ap_buffer), ap_offset);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xhpr2<T>(queue_cpp, event);
+ routine.DoHpr2(layout, triangle,
+ n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc,
+ Buffer<T>(ap_buffer), ap_offset);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Hpr2<float2>(const Layout, const Triangle,
const size_t,
@@ -1461,15 +1492,16 @@ StatusCode Syr(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xsyr<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoSyr(layout, triangle,
- n,
- alpha,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(a_buffer), a_offset, a_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xsyr<T>(queue_cpp, event);
+ routine.DoSyr(layout, triangle,
+ n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(a_buffer), a_offset, a_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Syr<float>(const Layout, const Triangle,
const size_t,
@@ -1498,15 +1530,16 @@ StatusCode Spr(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xspr<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoSpr(layout, triangle,
- n,
- alpha,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(ap_buffer), ap_offset);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xspr<T>(queue_cpp, event);
+ routine.DoSpr(layout, triangle,
+ n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(ap_buffer), ap_offset);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Spr<float>(const Layout, const Triangle,
const size_t,
@@ -1536,16 +1569,17 @@ StatusCode Syr2(const Layout layout, const Triangle triangle,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xsyr2<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoSyr2(layout, triangle,
- n,
- alpha,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(y_buffer), y_offset, y_inc,
- Buffer<T>(a_buffer), a_offset, a_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xsyr2<T>(queue_cpp, event);
+ routine.DoSyr2(layout, triangle,
+ n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc,
+ Buffer<T>(a_buffer), a_offset, a_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Syr2<float>(const Layout, const Triangle,
const size_t,
@@ -1578,16 +1612,17 @@ StatusCode Spr2(const Layout layout, const Triangle triangle,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xspr2<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoSpr2(layout, triangle,
- n,
- alpha,
- Buffer<T>(x_buffer), x_offset, x_inc,
- Buffer<T>(y_buffer), y_offset, y_inc,
- Buffer<T>(ap_buffer), ap_offset);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xspr2<T>(queue_cpp, event);
+ routine.DoSpr2(layout, triangle,
+ n,
+ alpha,
+ Buffer<T>(x_buffer), x_offset, x_inc,
+ Buffer<T>(y_buffer), y_offset, y_inc,
+ Buffer<T>(ap_buffer), ap_offset);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Spr2<float>(const Layout, const Triangle,
const size_t,
@@ -1625,17 +1660,18 @@ StatusCode Gemm(const Layout layout, const Transpose a_transpose, const Transpos
const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xgemm<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoGemm(layout, a_transpose, b_transpose,
- m, n, k,
- alpha,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(b_buffer), b_offset, b_ld,
- beta,
- Buffer<T>(c_buffer), c_offset, c_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xgemm<T>(queue_cpp, event);
+ routine.DoGemm(layout, a_transpose, b_transpose,
+ m, n, k,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(b_buffer), b_offset, b_ld,
+ beta,
+ Buffer<T>(c_buffer), c_offset, c_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Gemm<float>(const Layout, const Transpose, const Transpose,
const size_t, const size_t, const size_t,
@@ -1688,17 +1724,18 @@ StatusCode Symm(const Layout layout, const Side side, const Triangle triangle,
const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xsymm<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoSymm(layout, side, triangle,
- m, n,
- alpha,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(b_buffer), b_offset, b_ld,
- beta,
- Buffer<T>(c_buffer), c_offset, c_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xsymm<T>(queue_cpp, event);
+ routine.DoSymm(layout, side, triangle,
+ m, n,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(b_buffer), b_offset, b_ld,
+ beta,
+ Buffer<T>(c_buffer), c_offset, c_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Symm<float>(const Layout, const Side, const Triangle,
const size_t, const size_t,
@@ -1751,17 +1788,18 @@ StatusCode Hemm(const Layout layout, const Side side, const Triangle triangle,
const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xhemm<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoHemm(layout, side, triangle,
- m, n,
- alpha,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(b_buffer), b_offset, b_ld,
- beta,
- Buffer<T>(c_buffer), c_offset, c_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xhemm<T>(queue_cpp, event);
+ routine.DoHemm(layout, side, triangle,
+ m, n,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(b_buffer), b_offset, b_ld,
+ beta,
+ Buffer<T>(c_buffer), c_offset, c_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Hemm<float2>(const Layout, const Side, const Triangle,
const size_t, const size_t,
@@ -1789,16 +1827,17 @@ StatusCode Syrk(const Layout layout, const Triangle triangle, const Transpose a_
const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xsyrk<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoSyrk(layout, triangle, a_transpose,
- n, k,
- alpha,
- Buffer<T>(a_buffer), a_offset, a_ld,
- beta,
- Buffer<T>(c_buffer), c_offset, c_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xsyrk<T>(queue_cpp, event);
+ routine.DoSyrk(layout, triangle, a_transpose,
+ n, k,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ beta,
+ Buffer<T>(c_buffer), c_offset, c_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Syrk<float>(const Layout, const Triangle, const Transpose,
const size_t, const size_t,
@@ -1845,16 +1884,17 @@ StatusCode Herk(const Layout layout, const Triangle triangle, const Transpose a_
const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xherk<std::complex<T>,T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoHerk(layout, triangle, a_transpose,
- n, k,
- alpha,
- Buffer<std::complex<T>>(a_buffer), a_offset, a_ld,
- beta,
- Buffer<std::complex<T>>(c_buffer), c_offset, c_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xherk<std::complex<T>,T>(queue_cpp, event);
+ routine.DoHerk(layout, triangle, a_transpose,
+ n, k,
+ alpha,
+ Buffer<std::complex<T>>(a_buffer), a_offset, a_ld,
+ beta,
+ Buffer<std::complex<T>>(c_buffer), c_offset, c_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Herk<float>(const Layout, const Triangle, const Transpose,
const size_t, const size_t,
@@ -1881,17 +1921,18 @@ StatusCode Syr2k(const Layout layout, const Triangle triangle, const Transpose a
const T beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xsyr2k<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoSyr2k(layout, triangle, ab_transpose,
- n, k,
- alpha,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(b_buffer), b_offset, b_ld,
- beta,
- Buffer<T>(c_buffer), c_offset, c_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xsyr2k<T>(queue_cpp, event);
+ routine.DoSyr2k(layout, triangle, ab_transpose,
+ n, k,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(b_buffer), b_offset, b_ld,
+ beta,
+ Buffer<T>(c_buffer), c_offset, c_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Syr2k<float>(const Layout, const Triangle, const Transpose,
const size_t, const size_t,
@@ -1944,17 +1985,18 @@ StatusCode Her2k(const Layout layout, const Triangle triangle, const Transpose a
const U beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xher2k<T,U>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoHer2k(layout, triangle, ab_transpose,
- n, k,
- alpha,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(b_buffer), b_offset, b_ld,
- beta,
- Buffer<T>(c_buffer), c_offset, c_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xher2k<T,U>(queue_cpp, event);
+ routine.DoHer2k(layout, triangle, ab_transpose,
+ n, k,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(b_buffer), b_offset, b_ld,
+ beta,
+ Buffer<T>(c_buffer), c_offset, c_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Her2k<float2,float>(const Layout, const Triangle, const Transpose,
const size_t, const size_t,
@@ -1981,15 +2023,16 @@ StatusCode Trmm(const Layout layout, const Side side, const Triangle triangle, c
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xtrmm<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoTrmm(layout, side, triangle, a_transpose, diagonal,
- m, n,
- alpha,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(b_buffer), b_offset, b_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xtrmm<T>(queue_cpp, event);
+ routine.DoTrmm(layout, side, triangle, a_transpose, diagonal,
+ m, n,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(b_buffer), b_offset, b_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Trmm<float>(const Layout, const Side, const Triangle, const Transpose, const Diagonal,
const size_t, const size_t,
@@ -2075,15 +2118,16 @@ StatusCode Omatcopy(const Layout layout, const Transpose a_transpose,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto queue_cpp = Queue(*queue);
- auto routine = Xomatcopy<T>(queue_cpp, event);
- auto status = routine.SetUp();
- if (status != StatusCode::kSuccess) { return status; }
- return routine.DoOmatcopy(layout, a_transpose,
- m, n,
- alpha,
- Buffer<T>(a_buffer), a_offset, a_ld,
- Buffer<T>(b_buffer), b_offset, b_ld);
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = Xomatcopy<T>(queue_cpp, event);
+ routine.DoOmatcopy(layout, a_transpose,
+ m, n,
+ alpha,
+ Buffer<T>(a_buffer), a_offset, a_ld,
+ Buffer<T>(b_buffer), b_offset, b_ld);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
}
template StatusCode PUBLIC_API Omatcopy<float>(const Layout, const Transpose,
const size_t, const size_t,
@@ -2119,7 +2163,12 @@ template StatusCode PUBLIC_API Omatcopy<half>(const Layout, const Transpose,
// =================================================================================================
// Clears the cache of stored binaries
-StatusCode ClearCache() { return CacheClearAll(); }
+StatusCode ClearCache() {
+ try {
+ CacheClearAll();
+ } catch (...) { return DispatchException(); }
+ return StatusCode::kSuccess;
+}
// Fills the cache with all binaries for a specific device
// TODO: Add half-precision FP16 set-up calls
@@ -2132,59 +2181,59 @@ StatusCode FillCache(const cl_device_id device) {
auto queue = Queue(context, device_cpp);
// Runs all the level 1 set-up functions
- Xswap<float>(queue, nullptr).SetUp(); Xswap<double>(queue, nullptr).SetUp(); Xswap<float2>(queue, nullptr).SetUp(); Xswap<double2>(queue, nullptr).SetUp();
- Xswap<float>(queue, nullptr).SetUp(); Xswap<double>(queue, nullptr).SetUp(); Xswap<float2>(queue, nullptr).SetUp(); Xswap<double2>(queue, nullptr).SetUp();
- Xscal<float>(queue, nullptr).SetUp(); Xscal<double>(queue, nullptr).SetUp(); Xscal<float2>(queue, nullptr).SetUp(); Xscal<double2>(queue, nullptr).SetUp();
- Xcopy<float>(queue, nullptr).SetUp(); Xcopy<double>(queue, nullptr).SetUp(); Xcopy<float2>(queue, nullptr).SetUp(); Xcopy<double2>(queue, nullptr).SetUp();
- Xaxpy<float>(queue, nullptr).SetUp(); Xaxpy<double>(queue, nullptr).SetUp(); Xaxpy<float2>(queue, nullptr).SetUp(); Xaxpy<double2>(queue, nullptr).SetUp();
- Xdot<float>(queue, nullptr).SetUp(); Xdot<double>(queue, nullptr).SetUp();
- Xdotu<float2>(queue, nullptr).SetUp(); Xdotu<double2>(queue, nullptr).SetUp();
- Xdotc<float2>(queue, nullptr).SetUp(); Xdotc<double2>(queue, nullptr).SetUp();
- Xnrm2<float>(queue, nullptr).SetUp(); Xnrm2<double>(queue, nullptr).SetUp(); Xnrm2<float2>(queue, nullptr).SetUp(); Xnrm2<double2>(queue, nullptr).SetUp();
- Xasum<float>(queue, nullptr).SetUp(); Xasum<double>(queue, nullptr).SetUp(); Xasum<float2>(queue, nullptr).SetUp(); Xasum<double2>(queue, nullptr).SetUp();
- Xsum<float>(queue, nullptr).SetUp(); Xsum<double>(queue, nullptr).SetUp(); Xsum<float2>(queue, nullptr).SetUp(); Xsum<double2>(queue, nullptr).SetUp();
- Xamax<float>(queue, nullptr).SetUp(); Xamax<double>(queue, nullptr).SetUp(); Xamax<float2>(queue, nullptr).SetUp(); Xamax<double2>(queue, nullptr).SetUp();
- Xmax<float>(queue, nullptr).SetUp(); Xmax<double>(queue, nullptr).SetUp(); Xmax<float2>(queue, nullptr).SetUp(); Xmax<double2>(queue, nullptr).SetUp();
- Xmin<float>(queue, nullptr).SetUp(); Xmin<double>(queue, nullptr).SetUp(); Xmin<float2>(queue, nullptr).SetUp(); Xmin<double2>(queue, nullptr).SetUp();
+ Xswap<float>(queue, nullptr); Xswap<double>(queue, nullptr); Xswap<float2>(queue, nullptr); Xswap<double2>(queue, nullptr);
+ Xswap<float>(queue, nullptr); Xswap<double>(queue, nullptr); Xswap<float2>(queue, nullptr); Xswap<double2>(queue, nullptr);
+ Xscal<float>(queue, nullptr); Xscal<double>(queue, nullptr); Xscal<float2>(queue, nullptr); Xscal<double2>(queue, nullptr);
+ Xcopy<float>(queue, nullptr); Xcopy<double>(queue, nullptr); Xcopy<float2>(queue, nullptr); Xcopy<double2>(queue, nullptr);
+ Xaxpy<float>(queue, nullptr); Xaxpy<double>(queue, nullptr); Xaxpy<float2>(queue, nullptr); Xaxpy<double2>(queue, nullptr);
+ Xdot<float>(queue, nullptr); Xdot<double>(queue, nullptr);
+ Xdotu<float2>(queue, nullptr); Xdotu<double2>(queue, nullptr);
+ Xdotc<float2>(queue, nullptr); Xdotc<double2>(queue, nullptr);
+ Xnrm2<float>(queue, nullptr); Xnrm2<double>(queue, nullptr); Xnrm2<float2>(queue, nullptr); Xnrm2<double2>(queue, nullptr);
+ Xasum<float>(queue, nullptr); Xasum<double>(queue, nullptr); Xasum<float2>(queue, nullptr); Xasum<double2>(queue, nullptr);
+ Xsum<float>(queue, nullptr); Xsum<double>(queue, nullptr); Xsum<float2>(queue, nullptr); Xsum<double2>(queue, nullptr);
+ Xamax<float>(queue, nullptr); Xamax<double>(queue, nullptr); Xamax<float2>(queue, nullptr); Xamax<double2>(queue, nullptr);
+ Xmax<float>(queue, nullptr); Xmax<double>(queue, nullptr); Xmax<float2>(queue, nullptr); Xmax<double2>(queue, nullptr);
+ Xmin<float>(queue, nullptr); Xmin<double>(queue, nullptr); Xmin<float2>(queue, nullptr); Xmin<double2>(queue, nullptr);
// Runs all the level 2 set-up functions
- Xgemv<float>(queue, nullptr).SetUp(); Xgemv<double>(queue, nullptr).SetUp(); Xgemv<float2>(queue, nullptr).SetUp(); Xgemv<double2>(queue, nullptr).SetUp();
- Xgbmv<float>(queue, nullptr).SetUp(); Xgbmv<double>(queue, nullptr).SetUp(); Xgbmv<float2>(queue, nullptr).SetUp(); Xgbmv<double2>(queue, nullptr).SetUp();
- Xhemv<float2>(queue, nullptr).SetUp(); Xhemv<double2>(queue, nullptr).SetUp();
- Xhbmv<float2>(queue, nullptr).SetUp(); Xhbmv<double2>(queue, nullptr).SetUp();
- Xhpmv<float2>(queue, nullptr).SetUp(); Xhpmv<double2>(queue, nullptr).SetUp();
- Xsymv<float>(queue, nullptr).SetUp(); Xsymv<double>(queue, nullptr).SetUp();
- Xsbmv<float>(queue, nullptr).SetUp(); Xsbmv<double>(queue, nullptr).SetUp();
- Xspmv<float>(queue, nullptr).SetUp(); Xspmv<double>(queue, nullptr).SetUp();
- Xtrmv<float>(queue, nullptr).SetUp(); Xtrmv<double>(queue, nullptr).SetUp(); Xtrmv<float2>(queue, nullptr).SetUp(); Xtrmv<double2>(queue, nullptr).SetUp();
- Xtbmv<float>(queue, nullptr).SetUp(); Xtbmv<double>(queue, nullptr).SetUp(); Xtbmv<float2>(queue, nullptr).SetUp(); Xtbmv<double2>(queue, nullptr).SetUp();
- Xtpmv<float>(queue, nullptr).SetUp(); Xtpmv<double>(queue, nullptr).SetUp(); Xtpmv<float2>(queue, nullptr).SetUp(); Xtpmv<double2>(queue, nullptr).SetUp();
- Xger<float>(queue, nullptr).SetUp(); Xger<double>(queue, nullptr).SetUp();
- Xgeru<float2>(queue, nullptr).SetUp(); Xgeru<double2>(queue, nullptr).SetUp();
- Xgerc<float2>(queue, nullptr).SetUp(); Xgerc<double2>(queue, nullptr).SetUp();
- Xher<float2,float>(queue, nullptr).SetUp(); Xher<double2,double>(queue, nullptr).SetUp();
- Xhpr<float2,float>(queue, nullptr).SetUp(); Xhpr<double2,double>(queue, nullptr).SetUp();
- Xher2<float2>(queue, nullptr).SetUp(); Xher2<double2>(queue, nullptr).SetUp();
- Xhpr2<float2>(queue, nullptr).SetUp(); Xhpr2<double2>(queue, nullptr).SetUp();
- Xsyr<float>(queue, nullptr).SetUp(); Xsyr<double>(queue, nullptr).SetUp();
- Xspr<float>(queue, nullptr).SetUp(); Xspr<double>(queue, nullptr).SetUp();
- Xsyr2<float>(queue, nullptr).SetUp(); Xsyr2<double>(queue, nullptr).SetUp();
- Xspr2<float>(queue, nullptr).SetUp(); Xspr2<double>(queue, nullptr).SetUp();
+ Xgemv<float>(queue, nullptr); Xgemv<double>(queue, nullptr); Xgemv<float2>(queue, nullptr); Xgemv<double2>(queue, nullptr);
+ Xgbmv<float>(queue, nullptr); Xgbmv<double>(queue, nullptr); Xgbmv<float2>(queue, nullptr); Xgbmv<double2>(queue, nullptr);
+ Xhemv<float2>(queue, nullptr); Xhemv<double2>(queue, nullptr);
+ Xhbmv<float2>(queue, nullptr); Xhbmv<double2>(queue, nullptr);
+ Xhpmv<float2>(queue, nullptr); Xhpmv<double2>(queue, nullptr);
+ Xsymv<float>(queue, nullptr); Xsymv<double>(queue, nullptr);
+ Xsbmv<float>(queue, nullptr); Xsbmv<double>(queue, nullptr);
+ Xspmv<float>(queue, nullptr); Xspmv<double>(queue, nullptr);
+ Xtrmv<float>(queue, nullptr); Xtrmv<double>(queue, nullptr); Xtrmv<float2>(queue, nullptr); Xtrmv<double2>(queue, nullptr);
+ Xtbmv<float>(queue, nullptr); Xtbmv<double>(queue, nullptr); Xtbmv<float2>(queue, nullptr); Xtbmv<double2>(queue, nullptr);
+ Xtpmv<float>(queue, nullptr); Xtpmv<double>(queue, nullptr); Xtpmv<float2>(queue, nullptr); Xtpmv<double2>(queue, nullptr);
+ Xger<float>(queue, nullptr); Xger<double>(queue, nullptr);
+ Xgeru<float2>(queue, nullptr); Xgeru<double2>(queue, nullptr);
+ Xgerc<float2>(queue, nullptr); Xgerc<double2>(queue, nullptr);
+ Xher<float2,float>(queue, nullptr); Xher<double2,double>(queue, nullptr);
+ Xhpr<float2,float>(queue, nullptr); Xhpr<double2,double>(queue, nullptr);
+ Xher2<float2>(queue, nullptr); Xher2<double2>(queue, nullptr);
+ Xhpr2<float2>(queue, nullptr); Xhpr2<double2>(queue, nullptr);
+ Xsyr<float>(queue, nullptr); Xsyr<double>(queue, nullptr);
+ Xspr<float>(queue, nullptr); Xspr<double>(queue, nullptr);
+ Xsyr2<float>(queue, nullptr); Xsyr2<double>(queue, nullptr);
+ Xspr2<float>(queue, nullptr); Xspr2<double>(queue, nullptr);
// Runs all the level 3 set-up functions
- Xgemm<float>(queue, nullptr).SetUp(); Xgemm<double>(queue, nullptr).SetUp(); Xgemm<float2>(queue, nullptr).SetUp(); Xgemm<double2>(queue, nullptr).SetUp();
- Xsymm<float>(queue, nullptr).SetUp(); Xsymm<double>(queue, nullptr).SetUp(); Xsymm<float2>(queue, nullptr).SetUp(); Xsymm<double2>(queue, nullptr).SetUp();
- Xhemm<float2>(queue, nullptr).SetUp(); Xhemm<double2>(queue, nullptr).SetUp();
- Xsyrk<float>(queue, nullptr).SetUp(); Xsyrk<double>(queue, nullptr).SetUp(); Xsyrk<float2>(queue, nullptr).SetUp(); Xsyrk<double2>(queue, nullptr).SetUp();
- Xherk<float2,float>(queue, nullptr).SetUp(); Xherk<double2,double>(queue, nullptr).SetUp();
- Xsyr2k<float>(queue, nullptr).SetUp(); Xsyr2k<double>(queue, nullptr).SetUp(); Xsyr2k<float2>(queue, nullptr).SetUp(); Xsyr2k<double2>(queue, nullptr).SetUp();
- Xher2k<float2,float>(queue, nullptr).SetUp(); Xher2k<double2,double>(queue, nullptr).SetUp();
- Xtrmm<float>(queue, nullptr).SetUp(); Xtrmm<double>(queue, nullptr).SetUp(); Xtrmm<float2>(queue, nullptr).SetUp(); Xtrmm<double2>(queue, nullptr).SetUp();
+ Xgemm<float>(queue, nullptr); Xgemm<double>(queue, nullptr); Xgemm<float2>(queue, nullptr); Xgemm<double2>(queue, nullptr);
+ Xsymm<float>(queue, nullptr); Xsymm<double>(queue, nullptr); Xsymm<float2>(queue, nullptr); Xsymm<double2>(queue, nullptr);
+ Xhemm<float2>(queue, nullptr); Xhemm<double2>(queue, nullptr);
+ Xsyrk<float>(queue, nullptr); Xsyrk<double>(queue, nullptr); Xsyrk<float2>(queue, nullptr); Xsyrk<double2>(queue, nullptr);
+ Xherk<float2,float>(queue, nullptr); Xherk<double2,double>(queue, nullptr);
+ Xsyr2k<float>(queue, nullptr); Xsyr2k<double>(queue, nullptr); Xsyr2k<float2>(queue, nullptr); Xsyr2k<double2>(queue, nullptr);
+ Xher2k<float2,float>(queue, nullptr); Xher2k<double2,double>(queue, nullptr);
+ Xtrmm<float>(queue, nullptr); Xtrmm<double>(queue, nullptr); Xtrmm<float2>(queue, nullptr); Xtrmm<double2>(queue, nullptr);
// Runs all the level 3 set-up functions
- Xomatcopy<float>(queue, nullptr).SetUp(); Xomatcopy<double>(queue, nullptr).SetUp(); Xomatcopy<float2>(queue, nullptr).SetUp(); Xomatcopy<double2>(queue, nullptr).SetUp();
+ Xomatcopy<float>(queue, nullptr); Xomatcopy<double>(queue, nullptr); Xomatcopy<float2>(queue, nullptr); Xomatcopy<double2>(queue, nullptr);
- } catch (...) { return StatusCode::kBuildProgramFailure; }
+ } catch (...) { return DispatchException(); }
return StatusCode::kSuccess;
}
diff --git a/src/clblast_c.cpp b/src/clblast_c.cpp
index 9ea2c884..0174fd19 100644
--- a/src/clblast_c.cpp
+++ b/src/clblast_c.cpp
@@ -31,24 +31,26 @@ StatusCode CLBlastSrotg(cl_mem sa_buffer, const size_t sa_offset,
cl_mem sc_buffer, const size_t sc_offset,
cl_mem ss_buffer, const size_t ss_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Rotg<float>(sa_buffer, sa_offset,
- sb_buffer, sb_offset,
- sc_buffer, sc_offset,
- ss_buffer, ss_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Rotg<float>(sa_buffer, sa_offset,
+ sb_buffer, sb_offset,
+ sc_buffer, sc_offset,
+ ss_buffer, ss_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDrotg(cl_mem sa_buffer, const size_t sa_offset,
cl_mem sb_buffer, const size_t sb_offset,
cl_mem sc_buffer, const size_t sc_offset,
cl_mem ss_buffer, const size_t ss_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Rotg<double>(sa_buffer, sa_offset,
- sb_buffer, sb_offset,
- sc_buffer, sc_offset,
- ss_buffer, ss_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Rotg<double>(sa_buffer, sa_offset,
+ sb_buffer, sb_offset,
+ sc_buffer, sc_offset,
+ ss_buffer, ss_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// ROTMG
@@ -58,13 +60,14 @@ StatusCode CLBlastSrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
const cl_mem sy1_buffer, const size_t sy1_offset,
cl_mem sparam_buffer, const size_t sparam_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Rotmg<float>(sd1_buffer, sd1_offset,
- sd2_buffer, sd2_offset,
- sx1_buffer, sx1_offset,
- sy1_buffer, sy1_offset,
- sparam_buffer, sparam_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Rotmg<float>(sd1_buffer, sd1_offset,
+ sd2_buffer, sd2_offset,
+ sx1_buffer, sx1_offset,
+ sy1_buffer, sy1_offset,
+ sparam_buffer, sparam_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
cl_mem sd2_buffer, const size_t sd2_offset,
@@ -72,13 +75,14 @@ StatusCode CLBlastDrotmg(cl_mem sd1_buffer, const size_t sd1_offset,
const cl_mem sy1_buffer, const size_t sy1_offset,
cl_mem sparam_buffer, const size_t sparam_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Rotmg<double>(sd1_buffer, sd1_offset,
- sd2_buffer, sd2_offset,
- sx1_buffer, sx1_offset,
- sy1_buffer, sy1_offset,
- sparam_buffer, sparam_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Rotmg<double>(sd1_buffer, sd1_offset,
+ sd2_buffer, sd2_offset,
+ sx1_buffer, sx1_offset,
+ sy1_buffer, sy1_offset,
+ sparam_buffer, sparam_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// ROT
@@ -88,13 +92,14 @@ StatusCode CLBlastSrot(const size_t n,
const float cos,
const float sin,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Rot(n,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- cos,
- sin,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Rot(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ cos,
+ sin,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDrot(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
@@ -102,13 +107,14 @@ StatusCode CLBlastDrot(const size_t n,
const double cos,
const double sin,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Rot(n,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- cos,
- sin,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Rot(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ cos,
+ sin,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// ROTM
@@ -117,24 +123,26 @@ StatusCode CLBlastSrotm(const size_t n,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem sparam_buffer, const size_t sparam_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Rotm<float>(n,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- sparam_buffer, sparam_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Rotm<float>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ sparam_buffer, sparam_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDrotm(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem sparam_buffer, const size_t sparam_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Rotm<double>(n,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- sparam_buffer, sparam_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Rotm<double>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ sparam_buffer, sparam_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// SWAP
@@ -142,51 +150,56 @@ StatusCode CLBlastSswap(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Swap<float>(n,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Swap<float>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDswap(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Swap<double>(n,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Swap<double>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCswap(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Swap<float2>(n,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Swap<float2>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZswap(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Swap<double2>(n,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Swap<double2>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHswap(const size_t n,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Swap<half>(n,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Swap<half>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// SCAL
@@ -194,51 +207,56 @@ StatusCode CLBlastSscal(const size_t n,
const float alpha,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Scal(n,
- alpha,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Scal(n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDscal(const size_t n,
const double alpha,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Scal(n,
- alpha,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Scal(n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCscal(const size_t n,
const cl_float2 alpha,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Scal(n,
- float2{alpha.s[0], alpha.s[1]},
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Scal(n,
+ float2{alpha.s[0], alpha.s[1]},
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZscal(const size_t n,
const cl_double2 alpha,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Scal(n,
- double2{alpha.s[0], alpha.s[1]},
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Scal(n,
+ double2{alpha.s[0], alpha.s[1]},
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHscal(const size_t n,
const cl_half alpha,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Scal(n,
- alpha,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Scal(n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// COPY
@@ -246,51 +264,56 @@ StatusCode CLBlastScopy(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Copy<float>(n,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Copy<float>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDcopy(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Copy<double>(n,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Copy<double>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCcopy(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Copy<float2>(n,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Copy<float2>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZcopy(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Copy<double2>(n,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Copy<double2>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHcopy(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Copy<half>(n,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Copy<half>(n,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// AXPY
@@ -299,60 +322,65 @@ StatusCode CLBlastSaxpy(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Axpy(n,
- alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Axpy(n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDaxpy(const size_t n,
const double alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Axpy(n,
- alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Axpy(n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCaxpy(const size_t n,
const cl_float2 alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Axpy(n,
- float2{alpha.s[0], alpha.s[1]},
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Axpy(n,
+ float2{alpha.s[0], alpha.s[1]},
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZaxpy(const size_t n,
const cl_double2 alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Axpy(n,
- double2{alpha.s[0], alpha.s[1]},
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Axpy(n,
+ double2{alpha.s[0], alpha.s[1]},
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHaxpy(const size_t n,
const cl_half alpha,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Axpy(n,
- alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Axpy(n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// DOT
@@ -361,36 +389,39 @@ StatusCode CLBlastSdot(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Dot<float>(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Dot<float>(n,
+ dot_buffer, dot_offset,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDdot(const size_t n,
cl_mem dot_buffer, const size_t dot_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Dot<double>(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Dot<double>(n,
+ dot_buffer, dot_offset,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHdot(const size_t n,
cl_mem dot_buffer, const size_t dot_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Dot<half>(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Dot<half>(n,
+ dot_buffer, dot_offset,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// DOTU
@@ -399,24 +430,26 @@ StatusCode CLBlastCdotu(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Dotu<float2>(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Dotu<float2>(n,
+ dot_buffer, dot_offset,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZdotu(const size_t n,
cl_mem dot_buffer, const size_t dot_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Dotu<double2>(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Dotu<double2>(n,
+ dot_buffer, dot_offset,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// DOTC
@@ -425,24 +458,26 @@ StatusCode CLBlastCdotc(const size_t n,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Dotc<float2>(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Dotc<float2>(n,
+ dot_buffer, dot_offset,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZdotc(const size_t n,
cl_mem dot_buffer, const size_t dot_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Dotc<double2>(n,
- dot_buffer, dot_offset,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Dotc<double2>(n,
+ dot_buffer, dot_offset,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// NRM2
@@ -450,51 +485,56 @@ StatusCode CLBlastSnrm2(const size_t n,
cl_mem nrm2_buffer, const size_t nrm2_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Nrm2<float>(n,
- nrm2_buffer, nrm2_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Nrm2<float>(n,
+ nrm2_buffer, nrm2_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDnrm2(const size_t n,
cl_mem nrm2_buffer, const size_t nrm2_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Nrm2<double>(n,
- nrm2_buffer, nrm2_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Nrm2<double>(n,
+ nrm2_buffer, nrm2_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastScnrm2(const size_t n,
cl_mem nrm2_buffer, const size_t nrm2_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Nrm2<float2>(n,
- nrm2_buffer, nrm2_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Nrm2<float2>(n,
+ nrm2_buffer, nrm2_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDznrm2(const size_t n,
cl_mem nrm2_buffer, const size_t nrm2_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Nrm2<double2>(n,
- nrm2_buffer, nrm2_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Nrm2<double2>(n,
+ nrm2_buffer, nrm2_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHnrm2(const size_t n,
cl_mem nrm2_buffer, const size_t nrm2_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Nrm2<half>(n,
- nrm2_buffer, nrm2_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Nrm2<half>(n,
+ nrm2_buffer, nrm2_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// ASUM
@@ -502,51 +542,56 @@ StatusCode CLBlastSasum(const size_t n,
cl_mem asum_buffer, const size_t asum_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Asum<float>(n,
- asum_buffer, asum_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Asum<float>(n,
+ asum_buffer, asum_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDasum(const size_t n,
cl_mem asum_buffer, const size_t asum_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Asum<double>(n,
- asum_buffer, asum_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Asum<double>(n,
+ asum_buffer, asum_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastScasum(const size_t n,
cl_mem asum_buffer, const size_t asum_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Asum<float2>(n,
- asum_buffer, asum_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Asum<float2>(n,
+ asum_buffer, asum_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDzasum(const size_t n,
cl_mem asum_buffer, const size_t asum_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Asum<double2>(n,
- asum_buffer, asum_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Asum<double2>(n,
+ asum_buffer, asum_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHasum(const size_t n,
cl_mem asum_buffer, const size_t asum_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Asum<half>(n,
- asum_buffer, asum_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Asum<half>(n,
+ asum_buffer, asum_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// SUM
@@ -554,51 +599,56 @@ StatusCode CLBlastSsum(const size_t n,
cl_mem sum_buffer, const size_t sum_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Sum<float>(n,
- sum_buffer, sum_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Sum<float>(n,
+ sum_buffer, sum_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDsum(const size_t n,
cl_mem sum_buffer, const size_t sum_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Sum<double>(n,
- sum_buffer, sum_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Sum<double>(n,
+ sum_buffer, sum_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastScsum(const size_t n,
cl_mem sum_buffer, const size_t sum_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Sum<float2>(n,
- sum_buffer, sum_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Sum<float2>(n,
+ sum_buffer, sum_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDzsum(const size_t n,
cl_mem sum_buffer, const size_t sum_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Sum<double2>(n,
- sum_buffer, sum_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Sum<double2>(n,
+ sum_buffer, sum_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHsum(const size_t n,
cl_mem sum_buffer, const size_t sum_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Sum<half>(n,
- sum_buffer, sum_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Sum<half>(n,
+ sum_buffer, sum_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// AMAX
@@ -606,51 +656,56 @@ StatusCode CLBlastiSamax(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Amax<float>(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Amax<float>(n,
+ imax_buffer, imax_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastiDamax(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Amax<double>(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Amax<double>(n,
+ imax_buffer, imax_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastiCamax(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Amax<float2>(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Amax<float2>(n,
+ imax_buffer, imax_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastiZamax(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Amax<double2>(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Amax<double2>(n,
+ imax_buffer, imax_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastiHamax(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Amax<half>(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Amax<half>(n,
+ imax_buffer, imax_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// MAX
@@ -658,51 +713,56 @@ StatusCode CLBlastiSmax(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Max<float>(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Max<float>(n,
+ imax_buffer, imax_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastiDmax(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Max<double>(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Max<double>(n,
+ imax_buffer, imax_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastiCmax(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Max<float2>(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Max<float2>(n,
+ imax_buffer, imax_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastiZmax(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Max<double2>(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Max<double2>(n,
+ imax_buffer, imax_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastiHmax(const size_t n,
cl_mem imax_buffer, const size_t imax_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Max<half>(n,
- imax_buffer, imax_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Max<half>(n,
+ imax_buffer, imax_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// MIN
@@ -710,51 +770,56 @@ StatusCode CLBlastiSmin(const size_t n,
cl_mem imin_buffer, const size_t imin_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Min<float>(n,
- imin_buffer, imin_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Min<float>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastiDmin(const size_t n,
cl_mem imin_buffer, const size_t imin_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Min<double>(n,
- imin_buffer, imin_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Min<double>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastiCmin(const size_t n,
cl_mem imin_buffer, const size_t imin_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Min<float2>(n,
- imin_buffer, imin_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Min<float2>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastiZmin(const size_t n,
cl_mem imin_buffer, const size_t imin_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Min<double2>(n,
- imin_buffer, imin_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Min<double2>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastiHmin(const size_t n,
cl_mem imin_buffer, const size_t imin_offset,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Min<half>(n,
- imin_buffer, imin_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Min<half>(n,
+ imin_buffer, imin_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// =================================================================================================
@@ -770,16 +835,17 @@ StatusCode CLBlastSgemv(const Layout layout, const Transpose a_transpose,
const float beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gemv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gemv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDgemv(const Layout layout, const Transpose a_transpose,
const size_t m, const size_t n,
@@ -789,16 +855,17 @@ StatusCode CLBlastDgemv(const Layout layout, const Transpose a_transpose,
const double beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gemv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gemv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCgemv(const Layout layout, const Transpose a_transpose,
const size_t m, const size_t n,
@@ -808,16 +875,17 @@ StatusCode CLBlastCgemv(const Layout layout, const Transpose a_transpose,
const cl_float2 beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gemv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n,
- float2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- float2{beta.s[0], beta.s[1]},
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gemv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n,
+ float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ float2{beta.s[0], beta.s[1]},
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZgemv(const Layout layout, const Transpose a_transpose,
const size_t m, const size_t n,
@@ -827,16 +895,17 @@ StatusCode CLBlastZgemv(const Layout layout, const Transpose a_transpose,
const cl_double2 beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gemv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n,
- double2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- double2{beta.s[0], beta.s[1]},
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gemv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n,
+ double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ double2{beta.s[0], beta.s[1]},
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHgemv(const Layout layout, const Transpose a_transpose,
const size_t m, const size_t n,
@@ -846,16 +915,17 @@ StatusCode CLBlastHgemv(const Layout layout, const Transpose a_transpose,
const cl_half beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gemv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gemv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// GBMV
@@ -867,16 +937,17 @@ StatusCode CLBlastSgbmv(const Layout layout, const Transpose a_transpose,
const float beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gbmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n, kl, ku,
- alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gbmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n, kl, ku,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDgbmv(const Layout layout, const Transpose a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
@@ -886,16 +957,17 @@ StatusCode CLBlastDgbmv(const Layout layout, const Transpose a_transpose,
const double beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gbmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n, kl, ku,
- alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gbmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n, kl, ku,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCgbmv(const Layout layout, const Transpose a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
@@ -905,16 +977,17 @@ StatusCode CLBlastCgbmv(const Layout layout, const Transpose a_transpose,
const cl_float2 beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gbmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n, kl, ku,
- float2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- float2{beta.s[0], beta.s[1]},
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gbmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n, kl, ku,
+ float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ float2{beta.s[0], beta.s[1]},
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZgbmv(const Layout layout, const Transpose a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
@@ -924,16 +997,17 @@ StatusCode CLBlastZgbmv(const Layout layout, const Transpose a_transpose,
const cl_double2 beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gbmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n, kl, ku,
- double2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- double2{beta.s[0], beta.s[1]},
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gbmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n, kl, ku,
+ double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ double2{beta.s[0], beta.s[1]},
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHgbmv(const Layout layout, const Transpose a_transpose,
const size_t m, const size_t n, const size_t kl, const size_t ku,
@@ -943,16 +1017,17 @@ StatusCode CLBlastHgbmv(const Layout layout, const Transpose a_transpose,
const cl_half beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gbmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n, kl, ku,
- alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gbmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n, kl, ku,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// HEMV
@@ -964,16 +1039,17 @@ StatusCode CLBlastChemv(const Layout layout, const Triangle triangle,
const cl_float2 beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Hemv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- float2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- float2{beta.s[0], beta.s[1]},
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Hemv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ float2{beta.s[0], beta.s[1]},
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZhemv(const Layout layout, const Triangle triangle,
const size_t n,
@@ -983,16 +1059,17 @@ StatusCode CLBlastZhemv(const Layout layout, const Triangle triangle,
const cl_double2 beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Hemv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- double2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- double2{beta.s[0], beta.s[1]},
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Hemv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ double2{beta.s[0], beta.s[1]},
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// HBMV
@@ -1004,16 +1081,17 @@ StatusCode CLBlastChbmv(const Layout layout, const Triangle triangle,
const cl_float2 beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Hbmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n, k,
- float2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- float2{beta.s[0], beta.s[1]},
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Hbmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n, k,
+ float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ float2{beta.s[0], beta.s[1]},
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZhbmv(const Layout layout, const Triangle triangle,
const size_t n, const size_t k,
@@ -1023,16 +1101,17 @@ StatusCode CLBlastZhbmv(const Layout layout, const Triangle triangle,
const cl_double2 beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Hbmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n, k,
- double2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- double2{beta.s[0], beta.s[1]},
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Hbmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n, k,
+ double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ double2{beta.s[0], beta.s[1]},
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// HPMV
@@ -1044,16 +1123,17 @@ StatusCode CLBlastChpmv(const Layout layout, const Triangle triangle,
const cl_float2 beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Hpmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- float2{alpha.s[0], alpha.s[1]},
- ap_buffer, ap_offset,
- x_buffer, x_offset, x_inc,
- float2{beta.s[0], beta.s[1]},
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Hpmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ float2{alpha.s[0], alpha.s[1]},
+ ap_buffer, ap_offset,
+ x_buffer, x_offset, x_inc,
+ float2{beta.s[0], beta.s[1]},
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZhpmv(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1063,16 +1143,17 @@ StatusCode CLBlastZhpmv(const Layout layout, const Triangle triangle,
const cl_double2 beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Hpmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- double2{alpha.s[0], alpha.s[1]},
- ap_buffer, ap_offset,
- x_buffer, x_offset, x_inc,
- double2{beta.s[0], beta.s[1]},
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Hpmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ double2{alpha.s[0], alpha.s[1]},
+ ap_buffer, ap_offset,
+ x_buffer, x_offset, x_inc,
+ double2{beta.s[0], beta.s[1]},
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// SYMV
@@ -1084,16 +1165,17 @@ StatusCode CLBlastSsymv(const Layout layout, const Triangle triangle,
const float beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Symv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Symv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDsymv(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1103,16 +1185,17 @@ StatusCode CLBlastDsymv(const Layout layout, const Triangle triangle,
const double beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Symv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Symv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHsymv(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1122,16 +1205,17 @@ StatusCode CLBlastHsymv(const Layout layout, const Triangle triangle,
const cl_half beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Symv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Symv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// SBMV
@@ -1143,16 +1227,17 @@ StatusCode CLBlastSsbmv(const Layout layout, const Triangle triangle,
const float beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Sbmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n, k,
- alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Sbmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDsbmv(const Layout layout, const Triangle triangle,
const size_t n, const size_t k,
@@ -1162,16 +1247,17 @@ StatusCode CLBlastDsbmv(const Layout layout, const Triangle triangle,
const double beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Sbmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n, k,
- alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Sbmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHsbmv(const Layout layout, const Triangle triangle,
const size_t n, const size_t k,
@@ -1181,16 +1267,17 @@ StatusCode CLBlastHsbmv(const Layout layout, const Triangle triangle,
const cl_half beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Sbmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n, k,
- alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Sbmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// SPMV
@@ -1202,16 +1289,17 @@ StatusCode CLBlastSspmv(const Layout layout, const Triangle triangle,
const float beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Spmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- ap_buffer, ap_offset,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Spmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ ap_buffer, ap_offset,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDspmv(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1221,16 +1309,17 @@ StatusCode CLBlastDspmv(const Layout layout, const Triangle triangle,
const double beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Spmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- ap_buffer, ap_offset,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Spmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ ap_buffer, ap_offset,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHspmv(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1240,16 +1329,17 @@ StatusCode CLBlastHspmv(const Layout layout, const Triangle triangle,
const cl_half beta,
cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Spmv(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- ap_buffer, ap_offset,
- x_buffer, x_offset, x_inc,
- beta,
- y_buffer, y_offset, y_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Spmv(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ ap_buffer, ap_offset,
+ x_buffer, x_offset, x_inc,
+ beta,
+ y_buffer, y_offset, y_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// TRMV
@@ -1258,75 +1348,80 @@ StatusCode CLBlastStrmv(const Layout layout, const Triangle triangle, const Tran
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trmv<float>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trmv<float>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDtrmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trmv<double>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trmv<double>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCtrmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trmv<float2>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trmv<float2>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZtrmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trmv<double2>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trmv<double2>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHtrmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trmv<half>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trmv<half>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// TBMV
@@ -1335,75 +1430,80 @@ StatusCode CLBlastStbmv(const Layout layout, const Triangle triangle, const Tran
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tbmv<float>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tbmv<float>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n, k,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDtbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n, const size_t k,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tbmv<double>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tbmv<double>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n, k,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCtbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n, const size_t k,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tbmv<float2>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tbmv<float2>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n, k,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZtbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n, const size_t k,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tbmv<double2>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tbmv<double2>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n, k,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHtbmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n, const size_t k,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tbmv<half>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tbmv<half>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n, k,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// TPMV
@@ -1412,75 +1512,80 @@ StatusCode CLBlastStpmv(const Layout layout, const Triangle triangle, const Tran
const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tpmv<float>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tpmv<float>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ ap_buffer, ap_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDtpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n,
const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tpmv<double>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tpmv<double>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ ap_buffer, ap_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCtpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n,
const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tpmv<float2>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tpmv<float2>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ ap_buffer, ap_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZtpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n,
const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tpmv<double2>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tpmv<double2>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ ap_buffer, ap_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHtpmv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n,
const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tpmv<half>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tpmv<half>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ ap_buffer, ap_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// TRSV
@@ -1489,60 +1594,64 @@ StatusCode CLBlastStrsv(const Layout layout, const Triangle triangle, const Tran
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trsv<float>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trsv<float>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDtrsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trsv<double>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trsv<double>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCtrsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trsv<float2>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trsv<float2>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZtrsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trsv<double2>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trsv<double2>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// TBSV
@@ -1551,60 +1660,64 @@ StatusCode CLBlastStbsv(const Layout layout, const Triangle triangle, const Tran
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tbsv<float>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tbsv<float>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n, k,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDtbsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n, const size_t k,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tbsv<double>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tbsv<double>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n, k,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCtbsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n, const size_t k,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tbsv<float2>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tbsv<float2>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n, k,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZtbsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n, const size_t k,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tbsv<double2>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n, k,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tbsv<double2>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n, k,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// TPSV
@@ -1613,60 +1726,64 @@ StatusCode CLBlastStpsv(const Layout layout, const Triangle triangle, const Tran
const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tpsv<float>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tpsv<float>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ ap_buffer, ap_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDtpsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n,
const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tpsv<double>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tpsv<double>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ ap_buffer, ap_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCtpsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n,
const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tpsv<float2>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tpsv<float2>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ ap_buffer, ap_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZtpsv(const Layout layout, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t n,
const cl_mem ap_buffer, const size_t ap_offset,
cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Tpsv<double2>(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- n,
- ap_buffer, ap_offset,
- x_buffer, x_offset, x_inc,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Tpsv<double2>(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ n,
+ ap_buffer, ap_offset,
+ x_buffer, x_offset, x_inc,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// GER
@@ -1677,14 +1794,15 @@ StatusCode CLBlastSger(const Layout layout,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Ger(static_cast<clblast::Layout>(layout),
- m, n,
- alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Ger(static_cast<clblast::Layout>(layout),
+ m, n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDger(const Layout layout,
const size_t m, const size_t n,
@@ -1693,14 +1811,15 @@ StatusCode CLBlastDger(const Layout layout,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Ger(static_cast<clblast::Layout>(layout),
- m, n,
- alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Ger(static_cast<clblast::Layout>(layout),
+ m, n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHger(const Layout layout,
const size_t m, const size_t n,
@@ -1709,14 +1828,15 @@ StatusCode CLBlastHger(const Layout layout,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Ger(static_cast<clblast::Layout>(layout),
- m, n,
- alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Ger(static_cast<clblast::Layout>(layout),
+ m, n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// GERU
@@ -1727,14 +1847,15 @@ StatusCode CLBlastCgeru(const Layout layout,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Geru(static_cast<clblast::Layout>(layout),
- m, n,
- float2{alpha.s[0], alpha.s[1]},
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Geru(static_cast<clblast::Layout>(layout),
+ m, n,
+ float2{alpha.s[0], alpha.s[1]},
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZgeru(const Layout layout,
const size_t m, const size_t n,
@@ -1743,14 +1864,15 @@ StatusCode CLBlastZgeru(const Layout layout,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Geru(static_cast<clblast::Layout>(layout),
- m, n,
- double2{alpha.s[0], alpha.s[1]},
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Geru(static_cast<clblast::Layout>(layout),
+ m, n,
+ double2{alpha.s[0], alpha.s[1]},
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// GERC
@@ -1761,14 +1883,15 @@ StatusCode CLBlastCgerc(const Layout layout,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gerc(static_cast<clblast::Layout>(layout),
- m, n,
- float2{alpha.s[0], alpha.s[1]},
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gerc(static_cast<clblast::Layout>(layout),
+ m, n,
+ float2{alpha.s[0], alpha.s[1]},
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZgerc(const Layout layout,
const size_t m, const size_t n,
@@ -1777,14 +1900,15 @@ StatusCode CLBlastZgerc(const Layout layout,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gerc(static_cast<clblast::Layout>(layout),
- m, n,
- double2{alpha.s[0], alpha.s[1]},
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gerc(static_cast<clblast::Layout>(layout),
+ m, n,
+ double2{alpha.s[0], alpha.s[1]},
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// HER
@@ -1794,14 +1918,15 @@ StatusCode CLBlastCher(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Her(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Her(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZher(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1809,14 +1934,15 @@ StatusCode CLBlastZher(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Her(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Her(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// HPR
@@ -1826,14 +1952,15 @@ StatusCode CLBlastChpr(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Hpr(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- ap_buffer, ap_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Hpr(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ ap_buffer, ap_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZhpr(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1841,14 +1968,15 @@ StatusCode CLBlastZhpr(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Hpr(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- ap_buffer, ap_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Hpr(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ ap_buffer, ap_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// HER2
@@ -1859,15 +1987,16 @@ StatusCode CLBlastCher2(const Layout layout, const Triangle triangle,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Her2(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- float2{alpha.s[0], alpha.s[1]},
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Her2(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ float2{alpha.s[0], alpha.s[1]},
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZher2(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1876,15 +2005,16 @@ StatusCode CLBlastZher2(const Layout layout, const Triangle triangle,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Her2(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- double2{alpha.s[0], alpha.s[1]},
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Her2(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ double2{alpha.s[0], alpha.s[1]},
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// HPR2
@@ -1895,15 +2025,16 @@ StatusCode CLBlastChpr2(const Layout layout, const Triangle triangle,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Hpr2(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- float2{alpha.s[0], alpha.s[1]},
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- ap_buffer, ap_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Hpr2(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ float2{alpha.s[0], alpha.s[1]},
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ ap_buffer, ap_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZhpr2(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1912,15 +2043,16 @@ StatusCode CLBlastZhpr2(const Layout layout, const Triangle triangle,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Hpr2(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- double2{alpha.s[0], alpha.s[1]},
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- ap_buffer, ap_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Hpr2(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ double2{alpha.s[0], alpha.s[1]},
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ ap_buffer, ap_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// SYR
@@ -1930,14 +2062,15 @@ StatusCode CLBlastSsyr(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syr(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syr(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDsyr(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1945,14 +2078,15 @@ StatusCode CLBlastDsyr(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syr(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syr(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHsyr(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1960,14 +2094,15 @@ StatusCode CLBlastHsyr(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syr(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syr(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// SPR
@@ -1977,14 +2112,15 @@ StatusCode CLBlastSspr(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Spr(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- ap_buffer, ap_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Spr(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ ap_buffer, ap_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDspr(const Layout layout, const Triangle triangle,
const size_t n,
@@ -1992,14 +2128,15 @@ StatusCode CLBlastDspr(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Spr(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- ap_buffer, ap_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Spr(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ ap_buffer, ap_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHspr(const Layout layout, const Triangle triangle,
const size_t n,
@@ -2007,14 +2144,15 @@ StatusCode CLBlastHspr(const Layout layout, const Triangle triangle,
const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Spr(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- ap_buffer, ap_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Spr(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ ap_buffer, ap_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// SYR2
@@ -2025,15 +2163,16 @@ StatusCode CLBlastSsyr2(const Layout layout, const Triangle triangle,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syr2(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syr2(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDsyr2(const Layout layout, const Triangle triangle,
const size_t n,
@@ -2042,15 +2181,16 @@ StatusCode CLBlastDsyr2(const Layout layout, const Triangle triangle,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syr2(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syr2(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHsyr2(const Layout layout, const Triangle triangle,
const size_t n,
@@ -2059,15 +2199,16 @@ StatusCode CLBlastHsyr2(const Layout layout, const Triangle triangle,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syr2(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syr2(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// SPR2
@@ -2078,15 +2219,16 @@ StatusCode CLBlastSspr2(const Layout layout, const Triangle triangle,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Spr2(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- ap_buffer, ap_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Spr2(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ ap_buffer, ap_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDspr2(const Layout layout, const Triangle triangle,
const size_t n,
@@ -2095,15 +2237,16 @@ StatusCode CLBlastDspr2(const Layout layout, const Triangle triangle,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Spr2(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- ap_buffer, ap_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Spr2(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ ap_buffer, ap_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHspr2(const Layout layout, const Triangle triangle,
const size_t n,
@@ -2112,15 +2255,16 @@ StatusCode CLBlastHspr2(const Layout layout, const Triangle triangle,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_mem ap_buffer, const size_t ap_offset,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Spr2(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- n,
- alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- ap_buffer, ap_offset,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Spr2(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ n,
+ alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ ap_buffer, ap_offset,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// =================================================================================================
@@ -2136,17 +2280,18 @@ StatusCode CLBlastSgemm(const Layout layout, const Transpose a_transpose, const
const float beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gemm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Transpose>(b_transpose),
- m, n, k,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gemm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Transpose>(b_transpose),
+ m, n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
const size_t m, const size_t n, const size_t k,
@@ -2156,17 +2301,18 @@ StatusCode CLBlastDgemm(const Layout layout, const Transpose a_transpose, const
const double beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gemm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Transpose>(b_transpose),
- m, n, k,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gemm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Transpose>(b_transpose),
+ m, n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
const size_t m, const size_t n, const size_t k,
@@ -2176,17 +2322,18 @@ StatusCode CLBlastCgemm(const Layout layout, const Transpose a_transpose, const
const cl_float2 beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gemm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Transpose>(b_transpose),
- m, n, k,
- float2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- float2{beta.s[0], beta.s[1]},
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gemm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Transpose>(b_transpose),
+ m, n, k,
+ float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ float2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
const size_t m, const size_t n, const size_t k,
@@ -2196,17 +2343,18 @@ StatusCode CLBlastZgemm(const Layout layout, const Transpose a_transpose, const
const cl_double2 beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gemm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Transpose>(b_transpose),
- m, n, k,
- double2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- double2{beta.s[0], beta.s[1]},
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gemm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Transpose>(b_transpose),
+ m, n, k,
+ double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ double2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHgemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
const size_t m, const size_t n, const size_t k,
@@ -2216,17 +2364,18 @@ StatusCode CLBlastHgemm(const Layout layout, const Transpose a_transpose, const
const cl_half beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Gemm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Transpose>(b_transpose),
- m, n, k,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Gemm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Transpose>(b_transpose),
+ m, n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// SYMM
@@ -2238,17 +2387,18 @@ StatusCode CLBlastSsymm(const Layout layout, const Side side, const Triangle tri
const float beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Symm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Symm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDsymm(const Layout layout, const Side side, const Triangle triangle,
const size_t m, const size_t n,
@@ -2258,17 +2408,18 @@ StatusCode CLBlastDsymm(const Layout layout, const Side side, const Triangle tri
const double beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Symm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Symm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCsymm(const Layout layout, const Side side, const Triangle triangle,
const size_t m, const size_t n,
@@ -2278,17 +2429,18 @@ StatusCode CLBlastCsymm(const Layout layout, const Side side, const Triangle tri
const cl_float2 beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Symm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- m, n,
- float2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- float2{beta.s[0], beta.s[1]},
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Symm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ m, n,
+ float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ float2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZsymm(const Layout layout, const Side side, const Triangle triangle,
const size_t m, const size_t n,
@@ -2298,17 +2450,18 @@ StatusCode CLBlastZsymm(const Layout layout, const Side side, const Triangle tri
const cl_double2 beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Symm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- m, n,
- double2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- double2{beta.s[0], beta.s[1]},
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Symm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ m, n,
+ double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ double2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHsymm(const Layout layout, const Side side, const Triangle triangle,
const size_t m, const size_t n,
@@ -2318,17 +2471,18 @@ StatusCode CLBlastHsymm(const Layout layout, const Side side, const Triangle tri
const cl_half beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Symm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Symm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// HEMM
@@ -2340,17 +2494,18 @@ StatusCode CLBlastChemm(const Layout layout, const Side side, const Triangle tri
const cl_float2 beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Hemm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- m, n,
- float2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- float2{beta.s[0], beta.s[1]},
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Hemm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ m, n,
+ float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ float2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZhemm(const Layout layout, const Side side, const Triangle triangle,
const size_t m, const size_t n,
@@ -2360,17 +2515,18 @@ StatusCode CLBlastZhemm(const Layout layout, const Side side, const Triangle tri
const cl_double2 beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Hemm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- m, n,
- double2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- double2{beta.s[0], beta.s[1]},
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Hemm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ m, n,
+ double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ double2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// SYRK
@@ -2381,16 +2537,17 @@ StatusCode CLBlastSsyrk(const Layout layout, const Triangle triangle, const Tran
const float beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syrk(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- n, k,
- alpha,
- a_buffer, a_offset, a_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syrk(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
const size_t n, const size_t k,
@@ -2399,16 +2556,17 @@ StatusCode CLBlastDsyrk(const Layout layout, const Triangle triangle, const Tran
const double beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syrk(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- n, k,
- alpha,
- a_buffer, a_offset, a_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syrk(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
const size_t n, const size_t k,
@@ -2417,16 +2575,17 @@ StatusCode CLBlastCsyrk(const Layout layout, const Triangle triangle, const Tran
const cl_float2 beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syrk(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- n, k,
- float2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- float2{beta.s[0], beta.s[1]},
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syrk(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ n, k,
+ float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ float2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
const size_t n, const size_t k,
@@ -2435,16 +2594,17 @@ StatusCode CLBlastZsyrk(const Layout layout, const Triangle triangle, const Tran
const cl_double2 beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syrk(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- n, k,
- double2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- double2{beta.s[0], beta.s[1]},
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syrk(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ n, k,
+ double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ double2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHsyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
const size_t n, const size_t k,
@@ -2453,16 +2613,17 @@ StatusCode CLBlastHsyrk(const Layout layout, const Triangle triangle, const Tran
const cl_half beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syrk(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- n, k,
- alpha,
- a_buffer, a_offset, a_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syrk(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// HERK
@@ -2473,16 +2634,17 @@ StatusCode CLBlastCherk(const Layout layout, const Triangle triangle, const Tran
const float beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Herk(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- n, k,
- alpha,
- a_buffer, a_offset, a_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Herk(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZherk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
const size_t n, const size_t k,
@@ -2491,16 +2653,17 @@ StatusCode CLBlastZherk(const Layout layout, const Triangle triangle, const Tran
const double beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Herk(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- n, k,
- alpha,
- a_buffer, a_offset, a_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Herk(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// SYR2K
@@ -2512,17 +2675,18 @@ StatusCode CLBlastSsyr2k(const Layout layout, const Triangle triangle, const Tra
const float beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syr2k(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(ab_transpose),
- n, k,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syr2k(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(ab_transpose),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
const size_t n, const size_t k,
@@ -2532,17 +2696,18 @@ StatusCode CLBlastDsyr2k(const Layout layout, const Triangle triangle, const Tra
const double beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syr2k(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(ab_transpose),
- n, k,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syr2k(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(ab_transpose),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
const size_t n, const size_t k,
@@ -2552,17 +2717,18 @@ StatusCode CLBlastCsyr2k(const Layout layout, const Triangle triangle, const Tra
const cl_float2 beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syr2k(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(ab_transpose),
- n, k,
- float2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- float2{beta.s[0], beta.s[1]},
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syr2k(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(ab_transpose),
+ n, k,
+ float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ float2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
const size_t n, const size_t k,
@@ -2572,17 +2738,18 @@ StatusCode CLBlastZsyr2k(const Layout layout, const Triangle triangle, const Tra
const cl_double2 beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syr2k(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(ab_transpose),
- n, k,
- double2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- double2{beta.s[0], beta.s[1]},
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syr2k(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(ab_transpose),
+ n, k,
+ double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ double2{beta.s[0], beta.s[1]},
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHsyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
const size_t n, const size_t k,
@@ -2592,17 +2759,18 @@ StatusCode CLBlastHsyr2k(const Layout layout, const Triangle triangle, const Tra
const cl_half beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Syr2k(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(ab_transpose),
- n, k,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Syr2k(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(ab_transpose),
+ n, k,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// HER2K
@@ -2614,17 +2782,18 @@ StatusCode CLBlastCher2k(const Layout layout, const Triangle triangle, const Tra
const float beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Her2k(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(ab_transpose),
- n, k,
- float2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Her2k(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(ab_transpose),
+ n, k,
+ float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZher2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
const size_t n, const size_t k,
@@ -2634,17 +2803,18 @@ StatusCode CLBlastZher2k(const Layout layout, const Triangle triangle, const Tra
const double beta,
cl_mem c_buffer, const size_t c_offset, const size_t c_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Her2k(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(ab_transpose),
- n, k,
- double2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- beta,
- c_buffer, c_offset, c_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Her2k(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(ab_transpose),
+ n, k,
+ double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// TRMM
@@ -2654,17 +2824,18 @@ StatusCode CLBlastStrmm(const Layout layout, const Side side, const Triangle tri
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trmm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trmm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDtrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t m, const size_t n,
@@ -2672,17 +2843,18 @@ StatusCode CLBlastDtrmm(const Layout layout, const Side side, const Triangle tri
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trmm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trmm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCtrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t m, const size_t n,
@@ -2690,17 +2862,18 @@ StatusCode CLBlastCtrmm(const Layout layout, const Side side, const Triangle tri
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trmm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- m, n,
- float2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trmm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ m, n,
+ float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZtrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t m, const size_t n,
@@ -2708,17 +2881,18 @@ StatusCode CLBlastZtrmm(const Layout layout, const Side side, const Triangle tri
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trmm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- m, n,
- double2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trmm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ m, n,
+ double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHtrmm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t m, const size_t n,
@@ -2726,17 +2900,18 @@ StatusCode CLBlastHtrmm(const Layout layout, const Side side, const Triangle tri
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trmm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trmm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// TRSM
@@ -2746,17 +2921,18 @@ StatusCode CLBlastStrsm(const Layout layout, const Side side, const Triangle tri
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trsm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trsm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDtrsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t m, const size_t n,
@@ -2764,17 +2940,18 @@ StatusCode CLBlastDtrsm(const Layout layout, const Side side, const Triangle tri
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trsm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trsm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastCtrsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t m, const size_t n,
@@ -2782,17 +2959,18 @@ StatusCode CLBlastCtrsm(const Layout layout, const Side side, const Triangle tri
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trsm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- m, n,
- float2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trsm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ m, n,
+ float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZtrsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t m, const size_t n,
@@ -2800,17 +2978,18 @@ StatusCode CLBlastZtrsm(const Layout layout, const Side side, const Triangle tri
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trsm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- m, n,
- double2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trsm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ m, n,
+ double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHtrsm(const Layout layout, const Side side, const Triangle triangle, const Transpose a_transpose, const Diagonal diagonal,
const size_t m, const size_t n,
@@ -2818,17 +2997,18 @@ StatusCode CLBlastHtrsm(const Layout layout, const Side side, const Triangle tri
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Trsm(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Side>(side),
- static_cast<clblast::Triangle>(triangle),
- static_cast<clblast::Transpose>(a_transpose),
- static_cast<clblast::Diagonal>(diagonal),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Trsm(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Side>(side),
+ static_cast<clblast::Triangle>(triangle),
+ static_cast<clblast::Transpose>(a_transpose),
+ static_cast<clblast::Diagonal>(diagonal),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// =================================================================================================
@@ -2842,14 +3022,15 @@ StatusCode CLBlastSomatcopy(const Layout layout, const Transpose a_transpose,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Omatcopy(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Omatcopy(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastDomatcopy(const Layout layout, const Transpose a_transpose,
const size_t m, const size_t n,
@@ -2857,14 +3038,15 @@ StatusCode CLBlastDomatcopy(const Layout layout, const Transpose a_transpose,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Omatcopy(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Omatcopy(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastComatcopy(const Layout layout, const Transpose a_transpose,
const size_t m, const size_t n,
@@ -2872,14 +3054,15 @@ StatusCode CLBlastComatcopy(const Layout layout, const Transpose a_transpose,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Omatcopy(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n,
- float2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Omatcopy(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n,
+ float2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastZomatcopy(const Layout layout, const Transpose a_transpose,
const size_t m, const size_t n,
@@ -2887,14 +3070,15 @@ StatusCode CLBlastZomatcopy(const Layout layout, const Transpose a_transpose,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Omatcopy(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n,
- double2{alpha.s[0], alpha.s[1]},
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Omatcopy(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n,
+ double2{alpha.s[0], alpha.s[1]},
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
StatusCode CLBlastHomatcopy(const Layout layout, const Transpose a_transpose,
const size_t m, const size_t n,
@@ -2902,26 +3086,31 @@ StatusCode CLBlastHomatcopy(const Layout layout, const Transpose a_transpose,
const cl_mem a_buffer, const size_t a_offset, const size_t a_ld,
cl_mem b_buffer, const size_t b_offset, const size_t b_ld,
cl_command_queue* queue, cl_event* event) {
- auto status = clblast::Omatcopy(static_cast<clblast::Layout>(layout),
- static_cast<clblast::Transpose>(a_transpose),
- m, n,
- alpha,
- a_buffer, a_offset, a_ld,
- b_buffer, b_offset, b_ld,
- queue, event);
- return static_cast<StatusCode>(status);
+ try {
+ return static_cast<StatusCode>(clblast::Omatcopy(static_cast<clblast::Layout>(layout),
+ static_cast<clblast::Transpose>(a_transpose),
+ m, n,
+ alpha,
+ a_buffer, a_offset, a_ld,
+ b_buffer, b_offset, b_ld,
+ queue, event));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// =================================================================================================
// Clears the cache of stored binaries
StatusCode CLBlastClearCache() {
- return static_cast<StatusCode>(clblast::ClearCache());
+ try {
+ return static_cast<StatusCode>(clblast::ClearCache());
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// Fills the cache with binaries for a specific device
StatusCode CLBlastFillCache(const cl_device_id device) {
- return static_cast<StatusCode>(clblast::FillCache(device));
+ try {
+ return static_cast<StatusCode>(clblast::FillCache(device));
+ } catch (...) { return static_cast<StatusCode>(clblast::DispatchExceptionForC()); }
}
// =================================================================================================
diff --git a/src/clblast_exceptions.cpp b/src/clblast_exceptions.cpp
new file mode 100644
index 00000000..68d31e46
--- /dev/null
+++ b/src/clblast_exceptions.cpp
@@ -0,0 +1,95 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Ivan Shapovalov <intelfx@intelfx.name>
+//
+// This file implements the exception hierarchy for CLBlast. It contains classes for exceptions
+// generated by different parts of CLBlast (e.g. OpenCL API calls, internal logic, semantic BLAS
+// errors).
+//
+// =================================================================================================
+
+#include "clblast_exceptions.hpp"
+
+namespace {
+// =================================================================================================
+
+std::string MakeReason(const std::string &reason, const std::string &subreason) {
+ std::string r = reason;
+ if (!subreason.empty()) {
+ r += " (" + subreason + ")";
+ }
+ return r;
+}
+
+} // anonymous namespace
+
+namespace clblast {
+// =================================================================================================
+
+BLASError::BLASError(StatusCode status, const std::string &subreason):
+ ErrorCode(status,
+ subreason,
+ "BLAS error: " + MakeReason(std::to_string(static_cast<int>(status)), subreason)) {
+}
+
+RuntimeErrorCode::RuntimeErrorCode(StatusCode status, const std::string &subreason):
+ ErrorCode(status,
+ subreason,
+ MakeReason(std::to_string(static_cast<int>(status)), subreason)) {
+}
+
+// =================================================================================================
+
+StatusCode DispatchException()
+{
+ const char *message = nullptr;
+ StatusCode status;
+
+ try {
+ throw;
+ } catch (BLASError &e) {
+ // no message is printed for invalid argument errors
+ status = e.status();
+ } catch (CLError &e) {
+ message = e.what();
+ status = static_cast<StatusCode>(e.status());
+ } catch (RuntimeErrorCode &e) {
+ message = e.what();
+ status = e.status();
+ } catch (Error<std::runtime_error> &e) {
+ message = e.what();
+ status = StatusCode::kUnknownError;
+ }
+
+ if (message) {
+ fprintf(stderr, "CLBlast: %s\n", message);
+ }
+ return status;
+}
+
+// =================================================================================================
+
+StatusCode DispatchExceptionForC()
+{
+ const char *message = nullptr;
+
+ try {
+ throw;
+ } catch (std::exception &e) {
+ message = e.what();
+ } catch (...) {
+ message = "unknown exception";
+ }
+
+ fprintf (stderr, "CLBlast (unexpected): %s\n", message);
+ return StatusCode::kUnexpectedError;
+}
+
+// =================================================================================================
+
+} // namespace clblast
diff --git a/src/clblast_exceptions.hpp b/src/clblast_exceptions.hpp
new file mode 100644
index 00000000..89f5e761
--- /dev/null
+++ b/src/clblast_exceptions.hpp
@@ -0,0 +1,50 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Ivan Shapovalov <intelfx@intelfx.name>
+//
+// This file implements the exception hierarchy for CLBlast. It contains classes for exceptions
+// generated by different parts of CLBlast (e.g. OpenCL API calls, internal logic, semantic BLAS
+// errors).
+//
+// =================================================================================================
+
+#ifndef CLBLAST_EXCEPTIONS_H_
+#define CLBLAST_EXCEPTIONS_H_
+
+#include "clblast.h"
+#include "clpp11.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// Represents a semantic error in BLAS function arguments
+class PUBLIC_API BLASError : public ErrorCode<Error<std::invalid_argument>, StatusCode> {
+ public:
+ explicit BLASError(StatusCode status, const std::string &subreason = std::string{});
+};
+// =================================================================================================
+
+// Represents a runtime error generated by internal logic
+class PUBLIC_API RuntimeErrorCode : public ErrorCode<RuntimeError, StatusCode> {
+ public:
+ explicit RuntimeErrorCode(StatusCode status, const std::string &subreason = std::string{});
+};
+
+// =================================================================================================
+
+// Handles (most of the) runtime exceptions and converts them to StatusCode
+StatusCode DispatchException();
+
+// Handles remaining exceptions and converts them to StatusCode::kUnhandledError
+StatusCode DispatchExceptionForC();
+
+// =================================================================================================
+
+} // namespace clblast
+
+#endif // CLBLAST_EXCEPTIONS_H_
diff --git a/src/clpp11.hpp b/src/clpp11.hpp
index aaa76cb4..d306bb87 100644
--- a/src/clpp11.hpp
+++ b/src/clpp11.hpp
@@ -41,8 +41,8 @@
#include <string> // std::string
#include <vector> // std::vector
#include <memory> // std::shared_ptr
-#include <stdexcept> // std::runtime_error
#include <numeric> // std::accumulate
+#include <cstring> // std::strlen
// OpenCL
#if defined(__APPLE__) || defined(__MACOSX)
@@ -51,20 +51,41 @@
#include <CL/opencl.h>
#endif
+// Exception classes
+#include "cxpp11_common.hpp"
+
namespace clblast {
// =================================================================================================
-// Error occurred in the C++11 OpenCL header (this file)
-inline void Error(const std::string &message) {
- throw std::runtime_error("Internal OpenCL error: "+message);
-}
+// Represents a runtime error returned by an OpenCL API function
+class CLError : public ErrorCode<DeviceError, cl_int> {
+ public:
+ explicit CLError(cl_int status, const std::string &where):
+ ErrorCode(status,
+ where,
+ "OpenCL error: " + where + ": " + std::to_string(static_cast<int>(status))) {
+ }
-// Error occurred in OpenCL
-inline void CheckError(const cl_int status) {
- if (status != CL_SUCCESS) {
- throw std::runtime_error("Internal OpenCL error: "+std::to_string(status));
+ static void Check(const cl_int status, const std::string &where) {
+ if (status != CL_SUCCESS) {
+ throw CLError(status, where);
+ }
}
-}
+
+ static void CheckDtor(const cl_int status, const std::string &where) {
+ if (status != CL_SUCCESS) {
+ fprintf(stderr, "CLBlast: %s (ignoring)\n", CLError(status, where).what());
+ }
+ }
+};
+
+// =================================================================================================
+
+// Error occurred in OpenCL
+#define CheckError(call) CLError::Check(call, CLError::TrimCallString(#call))
+
+// Error occured in OpenCL (no-exception version for destructors)
+#define CheckErrorDtor(call) CLError::CheckDtor(call, CLError::TrimCallString(#call))
// =================================================================================================
@@ -81,7 +102,7 @@ class Event {
// Regular constructor with memory management
explicit Event():
event_(new cl_event, [](cl_event* e) {
- if (*e) { CheckError(clReleaseEvent(*e)); }
+ if (*e) { CheckErrorDtor(clReleaseEvent(*e)); }
delete e;
}) {
*event_ = nullptr;
@@ -92,16 +113,17 @@ class Event {
CheckError(clWaitForEvents(1, &(*event_)));
}
- // Retrieves the elapsed time of the last recorded event. Note that no error checking is done on
- // the 'clGetEventProfilingInfo' function, since there is a bug in Apple's OpenCL implementation:
- // http://stackoverflow.com/questions/26145603/clgeteventprofilinginfo-bug-in-macosx
+ // Retrieves the elapsed time of the last recorded event.
+ // (Note that there is a bug in Apple's OpenCL implementation of the 'clGetEventProfilingInfo' function:
+ // http://stackoverflow.com/questions/26145603/clgeteventprofilinginfo-bug-in-macosx)
+ // However, in our case the reply size is fixed to be cl_ulong, so we are not affected.
float GetElapsedTime() const {
WaitForCompletion();
const auto bytes = sizeof(cl_ulong);
auto time_start = cl_ulong{0};
- clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_START, bytes, &time_start, nullptr);
+ CheckError(clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_START, bytes, &time_start, nullptr));
auto time_end = cl_ulong{0};
- clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_END, bytes, &time_end, nullptr);
+ CheckError(clGetEventProfilingInfo(*event_, CL_PROFILING_COMMAND_END, bytes, &time_end, nullptr));
return static_cast<float>(time_end - time_start) * 1.0e-6f;
}
@@ -130,10 +152,14 @@ class Platform {
explicit Platform(const size_t platform_id) {
auto num_platforms = cl_uint{0};
CheckError(clGetPlatformIDs(0, nullptr, &num_platforms));
- if (num_platforms == 0) { Error("no platforms found"); }
+ if (num_platforms == 0) {
+ throw RuntimeError("Platform: no platforms found");
+ }
+ if (platform_id >= num_platforms) {
+ throw RuntimeError("Platform: invalid platform ID "+std::to_string(platform_id));
+ }
auto platforms = std::vector<cl_platform_id>(num_platforms);
CheckError(clGetPlatformIDs(num_platforms, platforms.data(), nullptr));
- if (platform_id >= num_platforms) { Error("invalid platform ID "+std::to_string(platform_id)); }
platform_ = platforms[platform_id];
}
@@ -173,11 +199,16 @@ class Device {
// Initialize the device. Note that this constructor can throw exceptions!
explicit Device(const Platform &platform, const size_t device_id) {
auto num_devices = platform.NumDevices();
- if (num_devices == 0) { Error("no devices found"); }
+ if (num_devices == 0) {
+ throw RuntimeError("Device: no devices found");
+ }
+ if (device_id >= num_devices) {
+ throw RuntimeError("Device: invalid device ID "+std::to_string(device_id));
+ }
+
auto devices = std::vector<cl_device_id>(num_devices);
CheckError(clGetDeviceIDs(platform(), CL_DEVICE_TYPE_ALL, static_cast<cl_uint>(num_devices),
devices.data(), nullptr));
- if (device_id >= num_devices) { Error("invalid device ID "+std::to_string(device_id)); }
device_ = devices[device_id];
}
@@ -282,7 +313,8 @@ class Device {
auto result = std::string{};
result.resize(bytes);
CheckError(clGetDeviceInfo(device_, info, bytes, &result[0], nullptr));
- return std::string{result.c_str()}; // Removes any trailing '\0'-characters
+ result.resize(strlen(result.c_str())); // Removes any trailing '\0'-characters
+ return result;
}
};
@@ -300,11 +332,11 @@ class Context {
// Regular constructor with memory management
explicit Context(const Device &device):
- context_(new cl_context, [](cl_context* c) { CheckError(clReleaseContext(*c)); delete c; }) {
+ context_(new cl_context, [](cl_context* c) { CheckErrorDtor(clReleaseContext(*c)); delete c; }) {
auto status = CL_SUCCESS;
const cl_device_id dev = device();
*context_ = clCreateContext(nullptr, 1, &dev, nullptr, nullptr, &status);
- CheckError(status);
+ CLError::Check(status, "clCreateContext");
}
// Accessor to the private data-member
@@ -329,18 +361,18 @@ class Program {
// Source-based constructor with memory management
explicit Program(const Context &context, std::string source):
- program_(new cl_program, [](cl_program* p) { CheckError(clReleaseProgram(*p)); delete p; }),
+ program_(new cl_program, [](cl_program* p) { CheckErrorDtor(clReleaseProgram(*p)); delete p; }),
length_(source.length()),
source_(std::move(source)),
source_ptr_(&source_[0]) {
auto status = CL_SUCCESS;
*program_ = clCreateProgramWithSource(context(), 1, &source_ptr_, &length_, &status);
- CheckError(status);
+ CLError::Check(status, "clCreateProgramWithSource");
}
// Binary-based constructor with memory management
explicit Program(const Device &device, const Context &context, const std::string& binary):
- program_(new cl_program, [](cl_program* p) { CheckError(clReleaseProgram(*p)); delete p; }),
+ program_(new cl_program, [](cl_program* p) { CheckErrorDtor(clReleaseProgram(*p)); delete p; }),
length_(binary.length()),
source_(binary),
source_ptr_(&source_[0]) {
@@ -350,25 +382,15 @@ class Program {
*program_ = clCreateProgramWithBinary(context(), 1, &dev, &length_,
reinterpret_cast<const unsigned char**>(&source_ptr_),
&status1, &status2);
- CheckError(status1);
- CheckError(status2);
+ CLError::Check(status1, "clCreateProgramWithBinary (binary status)");
+ CLError::Check(status2, "clCreateProgramWithBinary");
}
// Compiles the device program and returns whether or not there where any warnings/errors
- BuildStatus Build(const Device &device, std::vector<std::string> &options) {
+ void Build(const Device &device, std::vector<std::string> &options) {
auto options_string = std::accumulate(options.begin(), options.end(), std::string{" "});
const cl_device_id dev = device();
- auto status = clBuildProgram(*program_, 1, &dev, options_string.c_str(), nullptr, nullptr);
- if (status == CL_BUILD_PROGRAM_FAILURE) {
- return BuildStatus::kError;
- }
- else if (status == CL_INVALID_BINARY) {
- return BuildStatus::kInvalid;
- }
- else {
- CheckError(status);
- return BuildStatus::kSuccess;
- }
+ CheckError(clBuildProgram(*program_, 1, &dev, options_string.c_str(), nullptr, nullptr));
}
// Retrieves the warning/error message from the compiler (if any)
@@ -416,7 +438,7 @@ class Queue {
// Regular constructor with memory management
explicit Queue(const Context &context, const Device &device):
- queue_(new cl_command_queue, [](cl_command_queue* s) { CheckError(clReleaseCommandQueue(*s));
+ queue_(new cl_command_queue, [](cl_command_queue* s) { CheckErrorDtor(clReleaseCommandQueue(*s));
delete s; }) {
auto status = CL_SUCCESS;
#ifdef CL_VERSION_2_0
@@ -425,15 +447,17 @@ class Queue {
{
cl_queue_properties properties[] = {CL_QUEUE_PROPERTIES, CL_QUEUE_PROFILING_ENABLE, 0};
*queue_ = clCreateCommandQueueWithProperties(context(), device(), properties, &status);
+ CLError::Check(status, "clCreateCommandQueueWithProperties");
}
else
{
*queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status);
+ CLError::Check(status, "clCreateCommandQueue");
}
#else
*queue_ = clCreateCommandQueue(context(), device(), CL_QUEUE_PROFILING_ENABLE, &status);
+ CLError::Check(status, "clCreateCommandQueue");
#endif
- CheckError(status);
}
// Synchronizes the queue
@@ -525,7 +549,7 @@ class Buffer {
if (access_ == BufferAccess::kWriteOnly) { flags = CL_MEM_WRITE_ONLY; }
auto status = CL_SUCCESS;
*buffer_ = clCreateBuffer(context(), flags, size*sizeof(T), nullptr, &status);
- CheckError(status);
+ CLError::Check(status, "clCreateBuffer");
}
// As above, but now with read/write access as a default
@@ -546,18 +570,24 @@ class Buffer {
// Copies from device to host: reading the device buffer a-synchronously
void ReadAsync(const Queue &queue, const size_t size, T* host, const size_t offset = 0) const {
- if (access_ == BufferAccess::kWriteOnly) { Error("reading from a write-only buffer"); }
+ if (access_ == BufferAccess::kWriteOnly) {
+ throw LogicError("Buffer: reading from a write-only buffer");
+ }
CheckError(clEnqueueReadBuffer(queue(), *buffer_, CL_FALSE, offset*sizeof(T), size*sizeof(T),
host, 0, nullptr, nullptr));
}
void ReadAsync(const Queue &queue, const size_t size, std::vector<T> &host,
const size_t offset = 0) const {
- if (host.size() < size) { Error("target host buffer is too small"); }
+ if (host.size() < size) {
+ throw LogicError("Buffer: target host buffer is too small");
+ }
ReadAsync(queue, size, host.data(), offset);
}
void ReadAsync(const Queue &queue, const size_t size, BufferHost<T> &host,
const size_t offset = 0) const {
- if (host.size() < size) { Error("target host buffer is too small"); }
+ if (host.size() < size) {
+ throw LogicError("Buffer: target host buffer is too small");
+ }
ReadAsync(queue, size, host.data(), offset);
}
@@ -577,8 +607,12 @@ class Buffer {
// Copies from host to device: writing the device buffer a-synchronously
void WriteAsync(const Queue &queue, const size_t size, const T* host, const size_t offset = 0) {
- if (access_ == BufferAccess::kReadOnly) { Error("writing to a read-only buffer"); }
- if (GetSize() < (offset+size)*sizeof(T)) { Error("target device buffer is too small"); }
+ if (access_ == BufferAccess::kReadOnly) {
+ throw LogicError("Buffer: writing to a read-only buffer");
+ }
+ if (GetSize() < (offset+size)*sizeof(T)) {
+ throw LogicError("Buffer: target device buffer is too small");
+ }
CheckError(clEnqueueWriteBuffer(queue(), *buffer_, CL_FALSE, offset*sizeof(T), size*sizeof(T),
host, 0, nullptr, nullptr));
}
@@ -644,10 +678,10 @@ class Kernel {
// Regular constructor with memory management
explicit Kernel(const Program &program, const std::string &name):
- kernel_(new cl_kernel, [](cl_kernel* k) { CheckError(clReleaseKernel(*k)); delete k; }) {
+ kernel_(new cl_kernel, [](cl_kernel* k) { CheckErrorDtor(clReleaseKernel(*k)); delete k; }) {
auto status = CL_SUCCESS;
*kernel_ = clCreateKernel(program(), name.c_str(), &status);
- CheckError(status);
+ CLError::Check(status, "clCreateKernel");
}
// Sets a kernel argument at the indicated position
diff --git a/src/cxpp11_common.hpp b/src/cxpp11_common.hpp
new file mode 100644
index 00000000..c164ec1d
--- /dev/null
+++ b/src/cxpp11_common.hpp
@@ -0,0 +1,87 @@
+#ifndef CLBLAST_CXPP11_COMMON_H_
+#define CLBLAST_CXPP11_COMMON_H_
+
+// C++
+#include <string> // std::string
+#include <stdexcept> // std::runtime_error
+
+namespace clblast {
+// =================================================================================================
+
+// Basic exception class: represents an error happened inside our code
+// (as opposed to an error in C++ runtime)
+template <typename Base>
+class Error : public Base {
+ public:
+ using Base::Base;
+};
+
+// =================================================================================================
+
+// Represents a generic device-specific runtime error (returned by an OpenCL or CUDA API function)
+class DeviceError : public Error<std::runtime_error> {
+ public:
+ using Error<std::runtime_error>::Error;
+
+ static std::string TrimCallString(const char *where) {
+ const char *paren = strchr(where, '(');
+ if (paren) {
+ return std::string(where, paren);
+ } else {
+ return std::string(where);
+ }
+ }
+};
+
+// =================================================================================================
+
+// Represents a generic runtime error (aka environmental problem)
+class RuntimeError : public Error<std::runtime_error> {
+ public:
+ explicit RuntimeError(const std::string &reason):
+ Error("Run-time error: " + reason) {
+ }
+};
+
+// =================================================================================================
+
+// Represents a generic logic error (aka failed assertion)
+class LogicError : public Error<std::logic_error> {
+ public:
+ explicit LogicError(const std::string &reason):
+ Error("Internal logic error: " + reason) {
+ }
+};
+
+// =================================================================================================
+
+// Internal exception base class with a status field and a subclass-specific "details" field
+// which can be used to recreate an exception
+template <typename Base, typename Status>
+class ErrorCode : public Base {
+ public:
+ ErrorCode(Status status, const std::string &details, const std::string &reason):
+ Base(reason),
+ status_(status),
+ details_(details) {
+ }
+
+ Status status() const {
+ return status_;
+ }
+
+ const std::string& details() const {
+ return details_;
+ }
+
+ private:
+ const Status status_;
+ const std::string details_;
+};
+
+// =================================================================================================
+
+} // namespace clblast
+
+// CLBLAST_CXPP11_COMMON_H_
+#endif
diff --git a/src/database/database.cpp b/src/database/database.cpp
index 2340a89c..9b8537c2 100644
--- a/src/database/database.cpp
+++ b/src/database/database.cpp
@@ -92,7 +92,7 @@ Database::Database(const Queue &queue, const std::vector<std::string> &kernels,
}
}
- if (!search_result) { throw std::runtime_error("Database error, could not find a suitable entry"); }
+ if (!search_result) { throw RuntimeErrorCode(StatusCode::kDatabaseError); }
}
}
diff --git a/src/routine.cpp b/src/routine.cpp
index 80764b74..acafb0d2 100644
--- a/src/routine.cpp
+++ b/src/routine.cpp
@@ -21,10 +21,11 @@
namespace clblast {
// =================================================================================================
-// Constructor: not much here, because no status codes can be returned
+// The constructor does all heavy work, errors are returned as exceptions
Routine::Routine(Queue &queue, EventPointer event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision,
- const std::vector<const Database::DatabaseEntry*> &userDatabase):
+ const std::vector<const Database::DatabaseEntry*> &userDatabase,
+ std::initializer_list<const char *> source):
precision_(precision),
routine_name_(name),
queue_(queue),
@@ -33,15 +34,9 @@ Routine::Routine(Queue &queue, EventPointer event, const std::string &name,
device_(queue_.GetDevice()),
device_name_(device_.Name()),
db_(queue_, routines, precision_, userDatabase) {
-}
-
-// =================================================================================================
-
-// Separate set-up function to allow for status codes to be returned
-StatusCode Routine::SetUp() {
// Queries the cache to see whether or not the program (context-specific) is already there
- if (ProgramIsInCache(context_, precision_, routine_name_)) { return StatusCode::kSuccess; }
+ if (ProgramIsInCache(context_, precision_, routine_name_)) { return; }
// Sets the build options from an environmental variable (if set)
auto options = std::vector<std::string>();
@@ -53,13 +48,10 @@ StatusCode Routine::SetUp() {
// Queries the cache to see whether or not the binary (device-specific) is already there. If it
// is, a program is created and stored in the cache
if (BinaryIsInCache(device_name_, precision_, routine_name_)) {
- try {
- auto& binary = GetBinaryFromCache(device_name_, precision_, routine_name_);
- auto program = Program(device_, context_, binary);
- program.Build(device_, options);
- StoreProgramToCache(program, context_, precision_, routine_name_);
- } catch (...) { return StatusCode::kBuildProgramFailure; }
- return StatusCode::kSuccess;
+ auto& binary = GetBinaryFromCache(device_name_, precision_, routine_name_);
+ auto program = Program(device_, context_, binary);
+ program.Build(device_, options);
+ StoreProgramToCache(program, context_, precision_, routine_name_);
}
// Otherwise, the kernel will be compiled and program will be built. Both the binary and the
@@ -69,48 +61,50 @@ StatusCode Routine::SetUp() {
const auto extensions = device_.Capabilities();
if (precision_ == Precision::kDouble || precision_ == Precision::kComplexDouble) {
if (extensions.find(kKhronosDoublePrecision) == std::string::npos) {
- return StatusCode::kNoDoublePrecision;
+ throw RuntimeErrorCode(StatusCode::kNoDoublePrecision);
}
}
// As above, but for cl_khr_fp16 (half precision)
if (precision_ == Precision::kHalf) {
if (extensions.find(kKhronosHalfPrecision) == std::string::npos) {
- return StatusCode::kNoHalfPrecision;
+ throw RuntimeErrorCode(StatusCode::kNoHalfPrecision);
}
}
- // Loads the common header (typedefs and defines and such)
- std::string common_header =
- #include "kernels/common.opencl"
- ;
-
// Collects the parameters for this device in the form of defines, and adds the precision
- auto defines = db_.GetDefines();
- defines += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n";
+ auto source_string = db_.GetDefines();
+ source_string += "#define PRECISION "+ToString(static_cast<int>(precision_))+"\n";
// Adds the name of the routine as a define
- defines += "#define ROUTINE_"+routine_name_+"\n";
+ source_string += "#define ROUTINE_"+routine_name_+"\n";
// For specific devices, use the non-IEE754 compilant OpenCL mad() instruction. This can improve
// performance, but might result in a reduced accuracy.
if (device_.IsAMD() && device_.IsGPU()) {
- defines += "#define USE_CL_MAD 1\n";
+ source_string += "#define USE_CL_MAD 1\n";
}
// For specific devices, use staggered/shuffled workgroup indices.
if (device_.IsAMD() && device_.IsGPU()) {
- defines += "#define USE_STAGGERED_INDICES 1\n";
+ source_string += "#define USE_STAGGERED_INDICES 1\n";
}
// For specific devices add a global synchronisation barrier to the GEMM kernel to optimize
// performance through better cache behaviour
if (device_.IsARM() && device_.IsGPU()) {
- defines += "#define GLOBAL_MEM_FENCE 1\n";
+ source_string += "#define GLOBAL_MEM_FENCE 1\n";
}
- // Combines everything together into a single source string
- const auto source_string = defines + common_header + source_string_;
+ // Loads the common header (typedefs and defines and such)
+ source_string +=
+ #include "kernels/common.opencl"
+ ;
+
+ // Adds routine-specific code to the constructed source string
+ for (const char *s: source) {
+ source_string += s;
+ }
// Prints details of the routine to compile in case of debugging in verbose mode
#ifdef VERBOSE
@@ -120,23 +114,21 @@ StatusCode Routine::SetUp() {
#endif
// Compiles the kernel
+ auto program = Program(context_, source_string);
try {
- auto program = Program(context_, source_string);
- const auto build_status = program.Build(device_, options);
-
- // Checks for compiler crashes/errors/warnings
- if (build_status == BuildStatus::kError) {
- const auto message = program.GetBuildInfo(device_);
- fprintf(stdout, "OpenCL compiler error/warning: %s\n", message.c_str());
- return StatusCode::kBuildProgramFailure;
+ program.Build(device_, options);
+ } catch (const CLError &e) {
+ if (e.status() == CL_BUILD_PROGRAM_FAILURE) {
+ fprintf(stdout, "OpenCL compiler error/warning: %s\n",
+ program.GetBuildInfo(device_).c_str());
}
- if (build_status == BuildStatus::kInvalid) { return StatusCode::kInvalidBinary; }
+ throw;
+ }
- // Store the compiled binary and program in the cache
- const auto binary = program.GetIR();
- StoreBinaryToCache(binary, device_name_, precision_, routine_name_);
- StoreProgramToCache(program, context_, precision_, routine_name_);
- } catch (...) { return StatusCode::kBuildProgramFailure; }
+ // Store the compiled binary and program in the cache
+ const auto binary = program.GetIR();
+ StoreBinaryToCache(binary, device_name_, precision_, routine_name_);
+ StoreProgramToCache(program, context_, precision_, routine_name_);
// Prints the elapsed compilation time in case of debugging in verbose mode
#ifdef VERBOSE
@@ -144,9 +136,6 @@ StatusCode Routine::SetUp() {
const auto timing = std::chrono::duration<double,std::milli>(elapsed_time).count();
printf("[DEBUG] Completed compilation in %.2lf ms\n", timing);
#endif
-
- // No errors, normal termination of this function
- return StatusCode::kSuccess;
}
// =================================================================================================
diff --git a/src/routine.hpp b/src/routine.hpp
index 8582a2b7..f4ad435e 100644
--- a/src/routine.hpp
+++ b/src/routine.hpp
@@ -34,21 +34,19 @@ class Routine {
// Base class constructor. The user database is an optional extra database to override the
// built-in database.
+ // All heavy preparation work is done inside this constructor.
explicit Routine(Queue &queue, EventPointer event, const std::string &name,
const std::vector<std::string> &routines, const Precision precision,
- const std::vector<const Database::DatabaseEntry*> &userDatabase = {});
-
- // Set-up phase of the kernel
- StatusCode SetUp();
+ const std::vector<const Database::DatabaseEntry*> &userDatabase,
+ std::initializer_list<const char *> source);
protected:
// Non-static variable for the precision
const Precision precision_;
- // The routine's name and its kernel-source in string form
+ // The routine's name
const std::string routine_name_;
- std::string source_string_;
// The OpenCL objects, accessible only from derived classes
Queue queue_;
diff --git a/src/routines/common.cpp b/src/routines/common.cpp
index 3969cf9f..c995dc12 100644
--- a/src/routines/common.cpp
+++ b/src/routines/common.cpp
@@ -20,22 +20,26 @@ namespace clblast {
// =================================================================================================
// Enqueues a kernel, waits for completion, and checks for errors
-StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
- std::vector<size_t> global, const std::vector<size_t> &local,
- EventPointer event, const std::vector<Event> &waitForEvents) {
+void RunKernel(Kernel &kernel, Queue &queue, const Device &device,
+ std::vector<size_t> global, const std::vector<size_t> &local,
+ EventPointer event, const std::vector<Event> &waitForEvents) {
if (!local.empty()) {
// Tests for validity of the local thread sizes
if (local.size() > device.MaxWorkItemDimensions()) {
- return StatusCode::kInvalidLocalNumDimensions;
+ throw RuntimeErrorCode(StatusCode::kInvalidLocalNumDimensions);
}
const auto max_work_item_sizes = device.MaxWorkItemSizes();
for (auto i=size_t{0}; i<local.size(); ++i) {
- if (local[i] > max_work_item_sizes[i]) { return StatusCode::kInvalidLocalThreadsDim; }
+ if (local[i] > max_work_item_sizes[i]) {
+ throw RuntimeErrorCode(StatusCode::kInvalidLocalThreadsDim);
+ }
}
auto local_size = size_t{1};
for (auto &item: local) { local_size *= item; }
- if (local_size > device.MaxWorkGroupSize()) { return StatusCode::kInvalidLocalThreadsTotal; }
+ if (local_size > device.MaxWorkGroupSize()) {
+ throw RuntimeErrorCode(StatusCode::kInvalidLocalThreadsTotal);
+ }
// Make sure the global thread sizes are at least equal to the local sizes
for (auto i=size_t{0}; i<global.size(); ++i) {
@@ -45,7 +49,9 @@ StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
// Tests for local memory usage
const auto local_mem_usage = kernel.LocalMemUsage(device);
- if (!device.IsLocalMemoryValid(local_mem_usage)) { return StatusCode::kInvalidLocalMemUsage; }
+ if (!device.IsLocalMemoryValid(local_mem_usage)) {
+ throw RuntimeErrorCode(StatusCode::kInvalidLocalMemUsage);
+ }
// Prints the name of the kernel to launch in case of debugging in verbose mode
#ifdef VERBOSE
@@ -55,9 +61,7 @@ StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
#endif
// Launches the kernel (and checks for launch errors)
- try {
- kernel.Launch(queue, global, local, event, waitForEvents);
- } catch (...) { return StatusCode::kKernelLaunchError; }
+ kernel.Launch(queue, global, local, event, waitForEvents);
// Prints the elapsed execution time in case of debugging in verbose mode
#ifdef VERBOSE
@@ -66,9 +70,6 @@ StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
const auto timing = std::chrono::duration<double,std::milli>(elapsed_time).count();
printf("[DEBUG] Completed kernel in %.2lf ms\n", timing);
#endif
-
- // No errors, normal termination of this function
- return StatusCode::kSuccess;
}
// =================================================================================================
diff --git a/src/routines/common.hpp b/src/routines/common.hpp
index 9d8849c3..802abec4 100644
--- a/src/routines/common.hpp
+++ b/src/routines/common.hpp
@@ -27,29 +27,29 @@ namespace clblast {
// =================================================================================================
// Enqueues a kernel, waits for completion, and checks for errors
-StatusCode RunKernel(Kernel &kernel, Queue &queue, const Device &device,
- std::vector<size_t> global, const std::vector<size_t> &local,
- EventPointer event, const std::vector<Event> &waitForEvents = {});
+void RunKernel(Kernel &kernel, Queue &queue, const Device &device,
+ std::vector<size_t> global, const std::vector<size_t> &local,
+ EventPointer event, const std::vector<Event> &waitForEvents = {});
// =================================================================================================
// Copies or transposes a matrix and optionally pads/unpads it with zeros. This method is also able
// to write to symmetric and triangular matrices through optional arguments.
template <typename T>
-StatusCode PadCopyTransposeMatrix(Queue &queue, const Device &device,
- const Database &db,
- EventPointer event, const std::vector<Event> &waitForEvents,
- const size_t src_one, const size_t src_two,
- const size_t src_ld, const size_t src_offset,
- const Buffer<T> &src,
- const size_t dest_one, const size_t dest_two,
- const size_t dest_ld, const size_t dest_offset,
- const Buffer<T> &dest,
- const T alpha,
- const Program &program, const bool do_pad,
- const bool do_transpose, const bool do_conjugate,
- const bool upper = false, const bool lower = false,
- const bool diagonal_imag_zero = false) {
+void PadCopyTransposeMatrix(Queue &queue, const Device &device,
+ const Database &db,
+ EventPointer event, const std::vector<Event> &waitForEvents,
+ const size_t src_one, const size_t src_two,
+ const size_t src_ld, const size_t src_offset,
+ const Buffer<T> &src,
+ const size_t dest_one, const size_t dest_two,
+ const size_t dest_ld, const size_t dest_offset,
+ const Buffer<T> &dest,
+ const T alpha,
+ const Program &program, const bool do_pad,
+ const bool do_transpose, const bool do_conjugate,
+ const bool upper = false, const bool lower = false,
+ const bool diagonal_imag_zero = false) {
// Determines whether or not the fast-version could potentially be used
auto use_fast_kernel = (src_offset == 0) && (dest_offset == 0) && (do_conjugate == false) &&
@@ -84,77 +84,75 @@ StatusCode PadCopyTransposeMatrix(Queue &queue, const Device &device,
}
// Retrieves the kernel from the compiled binary
- try {
- auto kernel = Kernel(program, kernel_name);
+ auto kernel = Kernel(program, kernel_name);
- // Sets the kernel arguments
- if (use_fast_kernel) {
- kernel.SetArgument(0, static_cast<int>(src_ld));
- kernel.SetArgument(1, src());
- kernel.SetArgument(2, dest());
- kernel.SetArgument(3, GetRealArg(alpha));
+ // Sets the kernel arguments
+ if (use_fast_kernel) {
+ kernel.SetArgument(0, static_cast<int>(src_ld));
+ kernel.SetArgument(1, src());
+ kernel.SetArgument(2, dest());
+ kernel.SetArgument(3, GetRealArg(alpha));
+ }
+ else {
+ kernel.SetArgument(0, static_cast<int>(src_one));
+ kernel.SetArgument(1, static_cast<int>(src_two));
+ kernel.SetArgument(2, static_cast<int>(src_ld));
+ kernel.SetArgument(3, static_cast<int>(src_offset));
+ kernel.SetArgument(4, src());
+ kernel.SetArgument(5, static_cast<int>(dest_one));
+ kernel.SetArgument(6, static_cast<int>(dest_two));
+ kernel.SetArgument(7, static_cast<int>(dest_ld));
+ kernel.SetArgument(8, static_cast<int>(dest_offset));
+ kernel.SetArgument(9, dest());
+ kernel.SetArgument(10, GetRealArg(alpha));
+ if (do_pad) {
+ kernel.SetArgument(11, static_cast<int>(do_conjugate));
}
else {
- kernel.SetArgument(0, static_cast<int>(src_one));
- kernel.SetArgument(1, static_cast<int>(src_two));
- kernel.SetArgument(2, static_cast<int>(src_ld));
- kernel.SetArgument(3, static_cast<int>(src_offset));
- kernel.SetArgument(4, src());
- kernel.SetArgument(5, static_cast<int>(dest_one));
- kernel.SetArgument(6, static_cast<int>(dest_two));
- kernel.SetArgument(7, static_cast<int>(dest_ld));
- kernel.SetArgument(8, static_cast<int>(dest_offset));
- kernel.SetArgument(9, dest());
- kernel.SetArgument(10, GetRealArg(alpha));
- if (do_pad) {
- kernel.SetArgument(11, static_cast<int>(do_conjugate));
- }
- else {
- kernel.SetArgument(11, static_cast<int>(upper));
- kernel.SetArgument(12, static_cast<int>(lower));
- kernel.SetArgument(13, static_cast<int>(diagonal_imag_zero));
- }
+ kernel.SetArgument(11, static_cast<int>(upper));
+ kernel.SetArgument(12, static_cast<int>(lower));
+ kernel.SetArgument(13, static_cast<int>(diagonal_imag_zero));
}
+ }
- // Launches the kernel and returns the error code. Uses global and local thread sizes based on
- // parameters in the database.
- if (do_transpose) {
- if (use_fast_kernel) {
- const auto global = std::vector<size_t>{
- dest_one / db["TRA_WPT"],
- dest_two / db["TRA_WPT"]
- };
- const auto local = std::vector<size_t>{db["TRA_DIM"], db["TRA_DIM"]};
- return RunKernel(kernel, queue, device, global, local, event, waitForEvents);
- }
- else {
- const auto global = std::vector<size_t>{
- Ceil(CeilDiv(dest_one, db["PADTRA_WPT"]), db["PADTRA_TILE"]),
- Ceil(CeilDiv(dest_two, db["PADTRA_WPT"]), db["PADTRA_TILE"])
- };
- const auto local = std::vector<size_t>{db["PADTRA_TILE"], db["PADTRA_TILE"]};
- return RunKernel(kernel, queue, device, global, local, event, waitForEvents);
- }
+ // Launches the kernel and returns the error code. Uses global and local thread sizes based on
+ // parameters in the database.
+ if (do_transpose) {
+ if (use_fast_kernel) {
+ const auto global = std::vector<size_t>{
+ dest_one / db["TRA_WPT"],
+ dest_two / db["TRA_WPT"]
+ };
+ const auto local = std::vector<size_t>{db["TRA_DIM"], db["TRA_DIM"]};
+ RunKernel(kernel, queue, device, global, local, event, waitForEvents);
}
else {
- if (use_fast_kernel) {
- const auto global = std::vector<size_t>{
- dest_one / db["COPY_VW"],
- dest_two / db["COPY_WPT"]
- };
- const auto local = std::vector<size_t>{db["COPY_DIMX"], db["COPY_DIMY"]};
- return RunKernel(kernel, queue, device, global, local, event, waitForEvents);
- }
- else {
- const auto global = std::vector<size_t>{
- Ceil(CeilDiv(dest_one, db["PAD_WPTX"]), db["PAD_DIMX"]),
- Ceil(CeilDiv(dest_two, db["PAD_WPTY"]), db["PAD_DIMY"])
- };
- const auto local = std::vector<size_t>{db["PAD_DIMX"], db["PAD_DIMY"]};
- return RunKernel(kernel, queue, device, global, local, event, waitForEvents);
- }
+ const auto global = std::vector<size_t>{
+ Ceil(CeilDiv(dest_one, db["PADTRA_WPT"]), db["PADTRA_TILE"]),
+ Ceil(CeilDiv(dest_two, db["PADTRA_WPT"]), db["PADTRA_TILE"])
+ };
+ const auto local = std::vector<size_t>{db["PADTRA_TILE"], db["PADTRA_TILE"]};
+ RunKernel(kernel, queue, device, global, local, event, waitForEvents);
}
- } catch (...) { return StatusCode::kInvalidKernel; }
+ }
+ else {
+ if (use_fast_kernel) {
+ const auto global = std::vector<size_t>{
+ dest_one / db["COPY_VW"],
+ dest_two / db["COPY_WPT"]
+ };
+ const auto local = std::vector<size_t>{db["COPY_DIMX"], db["COPY_DIMY"]};
+ RunKernel(kernel, queue, device, global, local, event, waitForEvents);
+ }
+ else {
+ const auto global = std::vector<size_t>{
+ Ceil(CeilDiv(dest_one, db["PAD_WPTX"]), db["PAD_DIMX"]),
+ Ceil(CeilDiv(dest_two, db["PAD_WPTY"]), db["PAD_DIMY"])
+ };
+ const auto local = std::vector<size_t>{db["PAD_DIMX"], db["PAD_DIMY"]};
+ RunKernel(kernel, queue, device, global, local, event, waitForEvents);
+ }
+ }
}
// =================================================================================================
diff --git a/src/routines/level1/xamax.cpp b/src/routines/level1/xamax.cpp
index 6b6e7f9e..e9efa1a7 100644
--- a/src/routines/level1/xamax.cpp
+++ b/src/routines/level1/xamax.cpp
@@ -22,74 +22,64 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xamax<T>::Xamax(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/xamax.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xamax<T>::DoAmax(const size_t n,
- const Buffer<unsigned int> &imax_buffer, const size_t imax_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+void Xamax<T>::DoAmax(const size_t n,
+ const Buffer<unsigned int> &imax_buffer, const size_t imax_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
// Makes sure all dimensions are larger than zero
- if (n == 0) { return StatusCode::kInvalidDimension; }
+ if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
// Tests the vectors for validity
- auto status = TestVectorX(n, x_buffer, x_offset, x_inc);
- if (ErrorIn(status)) { return status; }
- status = TestVectorIndex(1, imax_buffer, imax_offset);
- if (ErrorIn(status)) { return status; }
+ TestVectorX(n, x_buffer, x_offset, x_inc);
+ TestVectorIndex(1, imax_buffer, imax_offset);
// Retrieves the Xamax kernels from the compiled binary
- try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel1 = Kernel(program, "Xamax");
- auto kernel2 = Kernel(program, "XamaxEpilogue");
-
- // Creates the buffer for intermediate values
- auto temp_size = 2*db_["WGS2"];
- auto temp_buffer1 = Buffer<T>(context_, temp_size);
- auto temp_buffer2 = Buffer<unsigned int>(context_, temp_size);
-
- // Sets the kernel arguments
- kernel1.SetArgument(0, static_cast<int>(n));
- kernel1.SetArgument(1, x_buffer());
- kernel1.SetArgument(2, static_cast<int>(x_offset));
- kernel1.SetArgument(3, static_cast<int>(x_inc));
- kernel1.SetArgument(4, temp_buffer1());
- kernel1.SetArgument(5, temp_buffer2());
-
- // Event waiting list
- auto eventWaitList = std::vector<Event>();
-
- // Launches the main kernel
- auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size};
- auto local1 = std::vector<size_t>{db_["WGS1"]};
- auto kernelEvent = Event();
- status = RunKernel(kernel1, queue_, device_, global1, local1, kernelEvent.pointer());
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(kernelEvent);
-
- // Sets the arguments for the epilogue kernel
- kernel2.SetArgument(0, temp_buffer1());
- kernel2.SetArgument(1, temp_buffer2());
- kernel2.SetArgument(2, imax_buffer());
- kernel2.SetArgument(3, static_cast<int>(imax_offset));
-
- // Launches the epilogue kernel
- auto global2 = std::vector<size_t>{db_["WGS2"]};
- auto local2 = std::vector<size_t>{db_["WGS2"]};
- status = RunKernel(kernel2, queue_, device_, global2, local2, event_, eventWaitList);
- if (ErrorIn(status)) { return status; }
-
- // Succesfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel1 = Kernel(program, "Xamax");
+ auto kernel2 = Kernel(program, "XamaxEpilogue");
+
+ // Creates the buffer for intermediate values
+ auto temp_size = 2*db_["WGS2"];
+ auto temp_buffer1 = Buffer<T>(context_, temp_size);
+ auto temp_buffer2 = Buffer<unsigned int>(context_, temp_size);
+
+ // Sets the kernel arguments
+ kernel1.SetArgument(0, static_cast<int>(n));
+ kernel1.SetArgument(1, x_buffer());
+ kernel1.SetArgument(2, static_cast<int>(x_offset));
+ kernel1.SetArgument(3, static_cast<int>(x_inc));
+ kernel1.SetArgument(4, temp_buffer1());
+ kernel1.SetArgument(5, temp_buffer2());
+
+ // Event waiting list
+ auto eventWaitList = std::vector<Event>();
+
+ // Launches the main kernel
+ auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size};
+ auto local1 = std::vector<size_t>{db_["WGS1"]};
+ auto kernelEvent = Event();
+ RunKernel(kernel1, queue_, device_, global1, local1, kernelEvent.pointer());
+ eventWaitList.push_back(kernelEvent);
+
+ // Sets the arguments for the epilogue kernel
+ kernel2.SetArgument(0, temp_buffer1());
+ kernel2.SetArgument(1, temp_buffer2());
+ kernel2.SetArgument(2, imax_buffer());
+ kernel2.SetArgument(3, static_cast<int>(imax_offset));
+
+ // Launches the epilogue kernel
+ auto global2 = std::vector<size_t>{db_["WGS2"]};
+ auto local2 = std::vector<size_t>{db_["WGS2"]};
+ RunKernel(kernel2, queue_, device_, global2, local2, event_, eventWaitList);
}
// =================================================================================================
diff --git a/src/routines/level1/xamax.hpp b/src/routines/level1/xamax.hpp
index aa45a8e4..4d1e0082 100644
--- a/src/routines/level1/xamax.hpp
+++ b/src/routines/level1/xamax.hpp
@@ -28,9 +28,9 @@ class Xamax: public Routine {
Xamax(Queue &queue, EventPointer event, const std::string &name = "AMAX");
// Templated-precision implementation of the routine
- StatusCode DoAmax(const size_t n,
- const Buffer<unsigned int> &imax_buffer, const size_t imax_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+ void DoAmax(const size_t n,
+ const Buffer<unsigned int> &imax_buffer, const size_t imax_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
};
// =================================================================================================
diff --git a/src/routines/level1/xasum.cpp b/src/routines/level1/xasum.cpp
index 0c1ce903..a242a5fa 100644
--- a/src/routines/level1/xasum.cpp
+++ b/src/routines/level1/xasum.cpp
@@ -22,71 +22,61 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xasum<T>::Xasum(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/xasum.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xasum<T>::DoAsum(const size_t n,
- const Buffer<T> &asum_buffer, const size_t asum_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+void Xasum<T>::DoAsum(const size_t n,
+ const Buffer<T> &asum_buffer, const size_t asum_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
// Makes sure all dimensions are larger than zero
- if (n == 0) { return StatusCode::kInvalidDimension; }
+ if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
// Tests the vectors for validity
- auto status = TestVectorX(n, x_buffer, x_offset, x_inc);
- if (ErrorIn(status)) { return status; }
- status = TestVectorScalar(1, asum_buffer, asum_offset);
- if (ErrorIn(status)) { return status; }
+ TestVectorX(n, x_buffer, x_offset, x_inc);
+ TestVectorScalar(1, asum_buffer, asum_offset);
// Retrieves the Xasum kernels from the compiled binary
- try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel1 = Kernel(program, "Xasum");
- auto kernel2 = Kernel(program, "XasumEpilogue");
-
- // Creates the buffer for intermediate values
- auto temp_size = 2*db_["WGS2"];
- auto temp_buffer = Buffer<T>(context_, temp_size);
-
- // Sets the kernel arguments
- kernel1.SetArgument(0, static_cast<int>(n));
- kernel1.SetArgument(1, x_buffer());
- kernel1.SetArgument(2, static_cast<int>(x_offset));
- kernel1.SetArgument(3, static_cast<int>(x_inc));
- kernel1.SetArgument(4, temp_buffer());
-
- // Event waiting list
- auto eventWaitList = std::vector<Event>();
-
- // Launches the main kernel
- auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size};
- auto local1 = std::vector<size_t>{db_["WGS1"]};
- auto kernelEvent = Event();
- status = RunKernel(kernel1, queue_, device_, global1, local1, kernelEvent.pointer());
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(kernelEvent);
-
- // Sets the arguments for the epilogue kernel
- kernel2.SetArgument(0, temp_buffer());
- kernel2.SetArgument(1, asum_buffer());
- kernel2.SetArgument(2, static_cast<int>(asum_offset));
-
- // Launches the epilogue kernel
- auto global2 = std::vector<size_t>{db_["WGS2"]};
- auto local2 = std::vector<size_t>{db_["WGS2"]};
- status = RunKernel(kernel2, queue_, device_, global2, local2, event_, eventWaitList);
- if (ErrorIn(status)) { return status; }
-
- // Succesfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel1 = Kernel(program, "Xasum");
+ auto kernel2 = Kernel(program, "XasumEpilogue");
+
+ // Creates the buffer for intermediate values
+ auto temp_size = 2*db_["WGS2"];
+ auto temp_buffer = Buffer<T>(context_, temp_size);
+
+ // Sets the kernel arguments
+ kernel1.SetArgument(0, static_cast<int>(n));
+ kernel1.SetArgument(1, x_buffer());
+ kernel1.SetArgument(2, static_cast<int>(x_offset));
+ kernel1.SetArgument(3, static_cast<int>(x_inc));
+ kernel1.SetArgument(4, temp_buffer());
+
+ // Event waiting list
+ auto eventWaitList = std::vector<Event>();
+
+ // Launches the main kernel
+ auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size};
+ auto local1 = std::vector<size_t>{db_["WGS1"]};
+ auto kernelEvent = Event();
+ RunKernel(kernel1, queue_, device_, global1, local1, kernelEvent.pointer());
+ eventWaitList.push_back(kernelEvent);
+
+ // Sets the arguments for the epilogue kernel
+ kernel2.SetArgument(0, temp_buffer());
+ kernel2.SetArgument(1, asum_buffer());
+ kernel2.SetArgument(2, static_cast<int>(asum_offset));
+
+ // Launches the epilogue kernel
+ auto global2 = std::vector<size_t>{db_["WGS2"]};
+ auto local2 = std::vector<size_t>{db_["WGS2"]};
+ RunKernel(kernel2, queue_, device_, global2, local2, event_, eventWaitList);
}
// =================================================================================================
diff --git a/src/routines/level1/xasum.hpp b/src/routines/level1/xasum.hpp
index 5a253f4d..0afcc4ff 100644
--- a/src/routines/level1/xasum.hpp
+++ b/src/routines/level1/xasum.hpp
@@ -28,9 +28,9 @@ class Xasum: public Routine {
Xasum(Queue &queue, EventPointer event, const std::string &name = "ASUM");
// Templated-precision implementation of the routine
- StatusCode DoAsum(const size_t n,
- const Buffer<T> &asum_buffer, const size_t asum_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+ void DoAsum(const size_t n,
+ const Buffer<T> &asum_buffer, const size_t asum_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
};
// =================================================================================================
diff --git a/src/routines/level1/xaxpy.cpp b/src/routines/level1/xaxpy.cpp
index 3445e2b5..5436c5b7 100644
--- a/src/routines/level1/xaxpy.cpp
+++ b/src/routines/level1/xaxpy.cpp
@@ -22,29 +22,26 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xaxpy<T>::Xaxpy(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xaxpy.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
+void Xaxpy<T>::DoAxpy(const size_t n, const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
// Makes sure all dimensions are larger than zero
- if (n == 0) { return StatusCode::kInvalidDimension; }
+ if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
// Tests the vectors for validity
- auto status = TestVectorX(n, x_buffer, x_offset, x_inc);
- if (ErrorIn(status)) { return status; }
- status = TestVectorY(n, y_buffer, y_offset, y_inc);
- if (ErrorIn(status)) { return status; }
+ TestVectorX(n, x_buffer, x_offset, x_inc);
+ TestVectorY(n, y_buffer, y_offset, y_inc);
// Determines whether or not the fast-version can be used
bool use_fast_kernel = (x_offset == 0) && (x_inc == 1) &&
@@ -55,45 +52,39 @@ StatusCode Xaxpy<T>::DoAxpy(const size_t n, const T alpha,
auto kernel_name = (use_fast_kernel) ? "XaxpyFast" : "Xaxpy";
// Retrieves the Xaxpy kernel from the compiled binary
- try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, kernel_name);
-
- // Sets the kernel arguments
- if (use_fast_kernel) {
- kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, GetRealArg(alpha));
- kernel.SetArgument(2, x_buffer());
- kernel.SetArgument(3, y_buffer());
- }
- else {
- kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, GetRealArg(alpha));
- kernel.SetArgument(2, x_buffer());
- kernel.SetArgument(3, static_cast<int>(x_offset));
- kernel.SetArgument(4, static_cast<int>(x_inc));
- kernel.SetArgument(5, y_buffer());
- kernel.SetArgument(6, static_cast<int>(y_offset));
- kernel.SetArgument(7, static_cast<int>(y_inc));
- }
-
- // Launches the kernel
- if (use_fast_kernel) {
- auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])};
- auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, queue_, device_, global, local, event_);
- }
- else {
- auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
- auto global = std::vector<size_t>{n_ceiled/db_["WPT"]};
- auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, queue_, device_, global, local, event_);
- }
- if (ErrorIn(status)) { return status; }
-
- // Succesfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel = Kernel(program, kernel_name);
+
+ // Sets the kernel arguments
+ if (use_fast_kernel) {
+ kernel.SetArgument(0, static_cast<int>(n));
+ kernel.SetArgument(1, GetRealArg(alpha));
+ kernel.SetArgument(2, x_buffer());
+ kernel.SetArgument(3, y_buffer());
+ }
+ else {
+ kernel.SetArgument(0, static_cast<int>(n));
+ kernel.SetArgument(1, GetRealArg(alpha));
+ kernel.SetArgument(2, x_buffer());
+ kernel.SetArgument(3, static_cast<int>(x_offset));
+ kernel.SetArgument(4, static_cast<int>(x_inc));
+ kernel.SetArgument(5, y_buffer());
+ kernel.SetArgument(6, static_cast<int>(y_offset));
+ kernel.SetArgument(7, static_cast<int>(y_inc));
+ }
+
+ // Launches the kernel
+ if (use_fast_kernel) {
+ auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])};
+ auto local = std::vector<size_t>{db_["WGS"]};
+ RunKernel(kernel, queue_, device_, global, local, event_);
+ }
+ else {
+ auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
+ auto global = std::vector<size_t>{n_ceiled/db_["WPT"]};
+ auto local = std::vector<size_t>{db_["WGS"]};
+ RunKernel(kernel, queue_, device_, global, local, event_);
+ }
}
// =================================================================================================
diff --git a/src/routines/level1/xaxpy.hpp b/src/routines/level1/xaxpy.hpp
index caac871e..9b30dfaa 100644
--- a/src/routines/level1/xaxpy.hpp
+++ b/src/routines/level1/xaxpy.hpp
@@ -28,9 +28,9 @@ class Xaxpy: public Routine {
Xaxpy(Queue &queue, EventPointer event, const std::string &name = "AXPY");
// Templated-precision implementation of the routine
- StatusCode DoAxpy(const size_t n, const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+ void DoAxpy(const size_t n, const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
};
// =================================================================================================
diff --git a/src/routines/level1/xcopy.cpp b/src/routines/level1/xcopy.cpp
index 673ef349..d86200c0 100644
--- a/src/routines/level1/xcopy.cpp
+++ b/src/routines/level1/xcopy.cpp
@@ -22,29 +22,26 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xcopy<T>::Xcopy(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xcopy.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xcopy<T>::DoCopy(const size_t n,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
+void Xcopy<T>::DoCopy(const size_t n,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
// Makes sure all dimensions are larger than zero
- if (n == 0) { return StatusCode::kInvalidDimension; }
+ if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
// Tests the vectors for validity
- auto status = TestVectorX(n, x_buffer, x_offset, x_inc);
- if (ErrorIn(status)) { return status; }
- status = TestVectorY(n, y_buffer, y_offset, y_inc);
- if (ErrorIn(status)) { return status; }
+ TestVectorX(n, x_buffer, x_offset, x_inc);
+ TestVectorY(n, y_buffer, y_offset, y_inc);
// Determines whether or not the fast-version can be used
bool use_fast_kernel = (x_offset == 0) && (x_inc == 1) &&
@@ -55,43 +52,37 @@ StatusCode Xcopy<T>::DoCopy(const size_t n,
auto kernel_name = (use_fast_kernel) ? "XcopyFast" : "Xcopy";
// Retrieves the Xcopy kernel from the compiled binary
- try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, kernel_name);
-
- // Sets the kernel arguments
- if (use_fast_kernel) {
- kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, x_buffer());
- kernel.SetArgument(2, y_buffer());
- }
- else {
- kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, x_buffer());
- kernel.SetArgument(2, static_cast<int>(x_offset));
- kernel.SetArgument(3, static_cast<int>(x_inc));
- kernel.SetArgument(4, y_buffer());
- kernel.SetArgument(5, static_cast<int>(y_offset));
- kernel.SetArgument(6, static_cast<int>(y_inc));
- }
-
- // Launches the kernel
- if (use_fast_kernel) {
- auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])};
- auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, queue_, device_, global, local, event_);
- }
- else {
- auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
- auto global = std::vector<size_t>{n_ceiled/db_["WPT"]};
- auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, queue_, device_, global, local, event_);
- }
- if (ErrorIn(status)) { return status; }
-
- // Succesfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel = Kernel(program, kernel_name);
+
+ // Sets the kernel arguments
+ if (use_fast_kernel) {
+ kernel.SetArgument(0, static_cast<int>(n));
+ kernel.SetArgument(1, x_buffer());
+ kernel.SetArgument(2, y_buffer());
+ }
+ else {
+ kernel.SetArgument(0, static_cast<int>(n));
+ kernel.SetArgument(1, x_buffer());
+ kernel.SetArgument(2, static_cast<int>(x_offset));
+ kernel.SetArgument(3, static_cast<int>(x_inc));
+ kernel.SetArgument(4, y_buffer());
+ kernel.SetArgument(5, static_cast<int>(y_offset));
+ kernel.SetArgument(6, static_cast<int>(y_inc));
+ }
+
+ // Launches the kernel
+ if (use_fast_kernel) {
+ auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])};
+ auto local = std::vector<size_t>{db_["WGS"]};
+ RunKernel(kernel, queue_, device_, global, local, event_);
+ }
+ else {
+ auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
+ auto global = std::vector<size_t>{n_ceiled/db_["WPT"]};
+ auto local = std::vector<size_t>{db_["WGS"]};
+ RunKernel(kernel, queue_, device_, global, local, event_);
+ }
}
// =================================================================================================
diff --git a/src/routines/level1/xcopy.hpp b/src/routines/level1/xcopy.hpp
index 0c424ba3..a6454fcc 100644
--- a/src/routines/level1/xcopy.hpp
+++ b/src/routines/level1/xcopy.hpp
@@ -28,9 +28,9 @@ class Xcopy: public Routine {
Xcopy(Queue &queue, EventPointer event, const std::string &name = "COPY");
// Templated-precision implementation of the routine
- StatusCode DoCopy(const size_t n,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+ void DoCopy(const size_t n,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
};
// =================================================================================================
diff --git a/src/routines/level1/xdot.cpp b/src/routines/level1/xdot.cpp
index bafea157..9d718913 100644
--- a/src/routines/level1/xdot.cpp
+++ b/src/routines/level1/xdot.cpp
@@ -22,79 +22,68 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xdot<T>::Xdot(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/xdot.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xdot<T>::DoDot(const size_t n,
- const Buffer<T> &dot_buffer, const size_t dot_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const bool do_conjugate) {
+void Xdot<T>::DoDot(const size_t n,
+ const Buffer<T> &dot_buffer, const size_t dot_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const bool do_conjugate) {
// Makes sure all dimensions are larger than zero
- if (n == 0) { return StatusCode::kInvalidDimension; }
+ if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
// Tests the vectors for validity
- auto status = TestVectorX(n, x_buffer, x_offset, x_inc);
- if (ErrorIn(status)) { return status; }
- status = TestVectorY(n, y_buffer, y_offset, y_inc);
- if (ErrorIn(status)) { return status; }
- status = TestVectorScalar(1, dot_buffer, dot_offset);
- if (ErrorIn(status)) { return status; }
+ TestVectorX(n, x_buffer, x_offset, x_inc);
+ TestVectorY(n, y_buffer, y_offset, y_inc);
+ TestVectorScalar(1, dot_buffer, dot_offset);
// Retrieves the Xdot kernels from the compiled binary
- try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel1 = Kernel(program, "Xdot");
- auto kernel2 = Kernel(program, "XdotEpilogue");
-
- // Creates the buffer for intermediate values
- auto temp_size = 2*db_["WGS2"];
- auto temp_buffer = Buffer<T>(context_, temp_size);
-
- // Sets the kernel arguments
- kernel1.SetArgument(0, static_cast<int>(n));
- kernel1.SetArgument(1, x_buffer());
- kernel1.SetArgument(2, static_cast<int>(x_offset));
- kernel1.SetArgument(3, static_cast<int>(x_inc));
- kernel1.SetArgument(4, y_buffer());
- kernel1.SetArgument(5, static_cast<int>(y_offset));
- kernel1.SetArgument(6, static_cast<int>(y_inc));
- kernel1.SetArgument(7, temp_buffer());
- kernel1.SetArgument(8, static_cast<int>(do_conjugate));
-
- // Event waiting list
- auto eventWaitList = std::vector<Event>();
-
- // Launches the main kernel
- auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size};
- auto local1 = std::vector<size_t>{db_["WGS1"]};
- auto kernelEvent = Event();
- status = RunKernel(kernel1, queue_, device_, global1, local1, kernelEvent.pointer());
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(kernelEvent);
-
- // Sets the arguments for the epilogue kernel
- kernel2.SetArgument(0, temp_buffer());
- kernel2.SetArgument(1, dot_buffer());
- kernel2.SetArgument(2, static_cast<int>(dot_offset));
-
- // Launches the epilogue kernel
- auto global2 = std::vector<size_t>{db_["WGS2"]};
- auto local2 = std::vector<size_t>{db_["WGS2"]};
- status = RunKernel(kernel2, queue_, device_, global2, local2, event_, eventWaitList);
- if (ErrorIn(status)) { return status; }
-
- // Succesfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel1 = Kernel(program, "Xdot");
+ auto kernel2 = Kernel(program, "XdotEpilogue");
+
+ // Creates the buffer for intermediate values
+ auto temp_size = 2*db_["WGS2"];
+ auto temp_buffer = Buffer<T>(context_, temp_size);
+
+ // Sets the kernel arguments
+ kernel1.SetArgument(0, static_cast<int>(n));
+ kernel1.SetArgument(1, x_buffer());
+ kernel1.SetArgument(2, static_cast<int>(x_offset));
+ kernel1.SetArgument(3, static_cast<int>(x_inc));
+ kernel1.SetArgument(4, y_buffer());
+ kernel1.SetArgument(5, static_cast<int>(y_offset));
+ kernel1.SetArgument(6, static_cast<int>(y_inc));
+ kernel1.SetArgument(7, temp_buffer());
+ kernel1.SetArgument(8, static_cast<int>(do_conjugate));
+
+ // Event waiting list
+ auto eventWaitList = std::vector<Event>();
+
+ // Launches the main kernel
+ auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size};
+ auto local1 = std::vector<size_t>{db_["WGS1"]};
+ auto kernelEvent = Event();
+ RunKernel(kernel1, queue_, device_, global1, local1, kernelEvent.pointer());
+ eventWaitList.push_back(kernelEvent);
+
+ // Sets the arguments for the epilogue kernel
+ kernel2.SetArgument(0, temp_buffer());
+ kernel2.SetArgument(1, dot_buffer());
+ kernel2.SetArgument(2, static_cast<int>(dot_offset));
+
+ // Launches the epilogue kernel
+ auto global2 = std::vector<size_t>{db_["WGS2"]};
+ auto local2 = std::vector<size_t>{db_["WGS2"]};
+ RunKernel(kernel2, queue_, device_, global2, local2, event_, eventWaitList);
}
// =================================================================================================
diff --git a/src/routines/level1/xdot.hpp b/src/routines/level1/xdot.hpp
index 02c1efaa..a4c9dfa0 100644
--- a/src/routines/level1/xdot.hpp
+++ b/src/routines/level1/xdot.hpp
@@ -28,11 +28,11 @@ class Xdot: public Routine {
Xdot(Queue &queue, EventPointer event, const std::string &name = "DOT");
// Templated-precision implementation of the routine
- StatusCode DoDot(const size_t n,
- const Buffer<T> &dot_buffer, const size_t dot_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const bool do_conjugate = false);
+ void DoDot(const size_t n,
+ const Buffer<T> &dot_buffer, const size_t dot_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const bool do_conjugate = false);
};
// =================================================================================================
diff --git a/src/routines/level1/xdotc.cpp b/src/routines/level1/xdotc.cpp
index 27cf2bab..5a4e939a 100644
--- a/src/routines/level1/xdotc.cpp
+++ b/src/routines/level1/xdotc.cpp
@@ -29,14 +29,14 @@ Xdotc<T>::Xdotc(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xdotc<T>::DoDotc(const size_t n,
- const Buffer<T> &dot_buffer, const size_t dot_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
- return DoDot(n, dot_buffer, dot_offset,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- true);
+void Xdotc<T>::DoDotc(const size_t n,
+ const Buffer<T> &dot_buffer, const size_t dot_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
+ DoDot(n, dot_buffer, dot_offset,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ true);
}
// =================================================================================================
diff --git a/src/routines/level1/xdotc.hpp b/src/routines/level1/xdotc.hpp
index b8cbdaf5..ab7465f5 100644
--- a/src/routines/level1/xdotc.hpp
+++ b/src/routines/level1/xdotc.hpp
@@ -31,10 +31,10 @@ class Xdotc: public Xdot<T> {
Xdotc(Queue &queue, EventPointer event, const std::string &name = "DOTC");
// Templated-precision implementation of the routine
- StatusCode DoDotc(const size_t n,
- const Buffer<T> &dot_buffer, const size_t dot_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+ void DoDotc(const size_t n,
+ const Buffer<T> &dot_buffer, const size_t dot_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
};
// =================================================================================================
diff --git a/src/routines/level1/xdotu.cpp b/src/routines/level1/xdotu.cpp
index 0bce70b7..b9d8bcef 100644
--- a/src/routines/level1/xdotu.cpp
+++ b/src/routines/level1/xdotu.cpp
@@ -28,14 +28,14 @@ Xdotu<T>::Xdotu(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xdotu<T>::DoDotu(const size_t n,
- const Buffer<T> &dot_buffer, const size_t dot_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
- return DoDot(n, dot_buffer, dot_offset,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- false);
+void Xdotu<T>::DoDotu(const size_t n,
+ const Buffer<T> &dot_buffer, const size_t dot_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
+ DoDot(n, dot_buffer, dot_offset,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ false);
}
// =================================================================================================
diff --git a/src/routines/level1/xdotu.hpp b/src/routines/level1/xdotu.hpp
index b3f73086..cad91c58 100644
--- a/src/routines/level1/xdotu.hpp
+++ b/src/routines/level1/xdotu.hpp
@@ -31,10 +31,10 @@ class Xdotu: public Xdot<T> {
Xdotu(Queue &queue, EventPointer event, const std::string &name = "DOTU");
// Templated-precision implementation of the routine
- StatusCode DoDotu(const size_t n,
- const Buffer<T> &dot_buffer, const size_t dot_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+ void DoDotu(const size_t n,
+ const Buffer<T> &dot_buffer, const size_t dot_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
};
// =================================================================================================
diff --git a/src/routines/level1/xmax.hpp b/src/routines/level1/xmax.hpp
index 5a0236f2..2b7a5ae7 100644
--- a/src/routines/level1/xmax.hpp
+++ b/src/routines/level1/xmax.hpp
@@ -35,10 +35,10 @@ class Xmax: public Xamax<T> {
// Forwards to the regular absolute version. The implementation difference is realised in the
// kernel through a pre-processor macro based on the name of the routine.
- StatusCode DoMax(const size_t n,
- const Buffer<unsigned int> &imax_buffer, const size_t imax_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
- return DoAmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc);
+ void DoMax(const size_t n,
+ const Buffer<unsigned int> &imax_buffer, const size_t imax_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+ DoAmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc);
}
};
diff --git a/src/routines/level1/xmin.hpp b/src/routines/level1/xmin.hpp
index 6befec64..47a195ea 100644
--- a/src/routines/level1/xmin.hpp
+++ b/src/routines/level1/xmin.hpp
@@ -35,10 +35,10 @@ class Xmin: public Xamax<T> {
// Forwards to the regular max-absolute version. The implementation difference is realised in the
// kernel through a pre-processor macro based on the name of the routine.
- StatusCode DoMin(const size_t n,
- const Buffer<unsigned int> &imin_buffer, const size_t imin_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
- return DoAmax(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc);
+ void DoMin(const size_t n,
+ const Buffer<unsigned int> &imin_buffer, const size_t imin_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+ DoAmax(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc);
}
};
diff --git a/src/routines/level1/xnrm2.cpp b/src/routines/level1/xnrm2.cpp
index 97615d8b..373820a4 100644
--- a/src/routines/level1/xnrm2.cpp
+++ b/src/routines/level1/xnrm2.cpp
@@ -22,71 +22,61 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xnrm2<T>::Xnrm2(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xdot"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/xnrm2.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xnrm2<T>::DoNrm2(const size_t n,
- const Buffer<T> &nrm2_buffer, const size_t nrm2_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+void Xnrm2<T>::DoNrm2(const size_t n,
+ const Buffer<T> &nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
// Makes sure all dimensions are larger than zero
- if (n == 0) { return StatusCode::kInvalidDimension; }
+ if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
// Tests the vectors for validity
- auto status = TestVectorX(n, x_buffer, x_offset, x_inc);
- if (ErrorIn(status)) { return status; }
- status = TestVectorScalar(1, nrm2_buffer, nrm2_offset);
- if (ErrorIn(status)) { return status; }
+ TestVectorX(n, x_buffer, x_offset, x_inc);
+ TestVectorScalar(1, nrm2_buffer, nrm2_offset);
// Retrieves the Xnrm2 kernels from the compiled binary
- try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel1 = Kernel(program, "Xnrm2");
- auto kernel2 = Kernel(program, "Xnrm2Epilogue");
-
- // Creates the buffer for intermediate values
- auto temp_size = 2*db_["WGS2"];
- auto temp_buffer = Buffer<T>(context_, temp_size);
-
- // Sets the kernel arguments
- kernel1.SetArgument(0, static_cast<int>(n));
- kernel1.SetArgument(1, x_buffer());
- kernel1.SetArgument(2, static_cast<int>(x_offset));
- kernel1.SetArgument(3, static_cast<int>(x_inc));
- kernel1.SetArgument(4, temp_buffer());
-
- // Event waiting list
- auto eventWaitList = std::vector<Event>();
-
- // Launches the main kernel
- auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size};
- auto local1 = std::vector<size_t>{db_["WGS1"]};
- auto kernelEvent = Event();
- status = RunKernel(kernel1, queue_, device_, global1, local1, kernelEvent.pointer());
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(kernelEvent);
-
- // Sets the arguments for the epilogue kernel
- kernel2.SetArgument(0, temp_buffer());
- kernel2.SetArgument(1, nrm2_buffer());
- kernel2.SetArgument(2, static_cast<int>(nrm2_offset));
-
- // Launches the epilogue kernel
- auto global2 = std::vector<size_t>{db_["WGS2"]};
- auto local2 = std::vector<size_t>{db_["WGS2"]};
- status = RunKernel(kernel2, queue_, device_, global2, local2, event_, eventWaitList);
- if (ErrorIn(status)) { return status; }
-
- // Succesfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel1 = Kernel(program, "Xnrm2");
+ auto kernel2 = Kernel(program, "Xnrm2Epilogue");
+
+ // Creates the buffer for intermediate values
+ auto temp_size = 2*db_["WGS2"];
+ auto temp_buffer = Buffer<T>(context_, temp_size);
+
+ // Sets the kernel arguments
+ kernel1.SetArgument(0, static_cast<int>(n));
+ kernel1.SetArgument(1, x_buffer());
+ kernel1.SetArgument(2, static_cast<int>(x_offset));
+ kernel1.SetArgument(3, static_cast<int>(x_inc));
+ kernel1.SetArgument(4, temp_buffer());
+
+ // Event waiting list
+ auto eventWaitList = std::vector<Event>();
+
+ // Launches the main kernel
+ auto global1 = std::vector<size_t>{db_["WGS1"]*temp_size};
+ auto local1 = std::vector<size_t>{db_["WGS1"]};
+ auto kernelEvent = Event();
+ RunKernel(kernel1, queue_, device_, global1, local1, kernelEvent.pointer());
+ eventWaitList.push_back(kernelEvent);
+
+ // Sets the arguments for the epilogue kernel
+ kernel2.SetArgument(0, temp_buffer());
+ kernel2.SetArgument(1, nrm2_buffer());
+ kernel2.SetArgument(2, static_cast<int>(nrm2_offset));
+
+ // Launches the epilogue kernel
+ auto global2 = std::vector<size_t>{db_["WGS2"]};
+ auto local2 = std::vector<size_t>{db_["WGS2"]};
+ RunKernel(kernel2, queue_, device_, global2, local2, event_, eventWaitList);
}
// =================================================================================================
diff --git a/src/routines/level1/xnrm2.hpp b/src/routines/level1/xnrm2.hpp
index 7baf07f5..3183ce24 100644
--- a/src/routines/level1/xnrm2.hpp
+++ b/src/routines/level1/xnrm2.hpp
@@ -28,9 +28,9 @@ class Xnrm2: public Routine {
Xnrm2(Queue &queue, EventPointer event, const std::string &name = "NRM2");
// Templated-precision implementation of the routine
- StatusCode DoNrm2(const size_t n,
- const Buffer<T> &nrm2_buffer, const size_t nrm2_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+ void DoNrm2(const size_t n,
+ const Buffer<T> &nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
};
// =================================================================================================
diff --git a/src/routines/level1/xscal.cpp b/src/routines/level1/xscal.cpp
index bcc43c3b..17410f01 100644
--- a/src/routines/level1/xscal.cpp
+++ b/src/routines/level1/xscal.cpp
@@ -22,26 +22,24 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xscal<T>::Xscal(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xscal.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xscal<T>::DoScal(const size_t n, const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+void Xscal<T>::DoScal(const size_t n, const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
// Makes sure all dimensions are larger than zero
- if (n == 0) { return StatusCode::kInvalidDimension; }
+ if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
// Tests the vector for validity
- auto status = TestVectorX(n, x_buffer, x_offset, x_inc);
- if (ErrorIn(status)) { return status; }
+ TestVectorX(n, x_buffer, x_offset, x_inc);
// Determines whether or not the fast-version can be used
bool use_fast_kernel = (x_offset == 0) && (x_inc == 1) &&
@@ -51,41 +49,35 @@ StatusCode Xscal<T>::DoScal(const size_t n, const T alpha,
auto kernel_name = (use_fast_kernel) ? "XscalFast" : "Xscal";
// Retrieves the Xscal kernel from the compiled binary
- try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, kernel_name);
-
- // Sets the kernel arguments
- if (use_fast_kernel) {
- kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, alpha);
- kernel.SetArgument(2, x_buffer());
- }
- else {
- kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, alpha);
- kernel.SetArgument(2, x_buffer());
- kernel.SetArgument(3, static_cast<int>(x_offset));
- kernel.SetArgument(4, static_cast<int>(x_inc));
- }
-
- // Launches the kernel
- if (use_fast_kernel) {
- auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])};
- auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, queue_, device_, global, local, event_);
- }
- else {
- auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
- auto global = std::vector<size_t>{n_ceiled/db_["WPT"]};
- auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, queue_, device_, global, local, event_);
- }
- if (ErrorIn(status)) { return status; }
-
- // Succesfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel = Kernel(program, kernel_name);
+
+ // Sets the kernel arguments
+ if (use_fast_kernel) {
+ kernel.SetArgument(0, static_cast<int>(n));
+ kernel.SetArgument(1, alpha);
+ kernel.SetArgument(2, x_buffer());
+ }
+ else {
+ kernel.SetArgument(0, static_cast<int>(n));
+ kernel.SetArgument(1, alpha);
+ kernel.SetArgument(2, x_buffer());
+ kernel.SetArgument(3, static_cast<int>(x_offset));
+ kernel.SetArgument(4, static_cast<int>(x_inc));
+ }
+
+ // Launches the kernel
+ if (use_fast_kernel) {
+ auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])};
+ auto local = std::vector<size_t>{db_["WGS"]};
+ RunKernel(kernel, queue_, device_, global, local, event_);
+ }
+ else {
+ auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
+ auto global = std::vector<size_t>{n_ceiled/db_["WPT"]};
+ auto local = std::vector<size_t>{db_["WGS"]};
+ RunKernel(kernel, queue_, device_, global, local, event_);
+ }
}
// =================================================================================================
diff --git a/src/routines/level1/xscal.hpp b/src/routines/level1/xscal.hpp
index 6c585cb2..02c847cc 100644
--- a/src/routines/level1/xscal.hpp
+++ b/src/routines/level1/xscal.hpp
@@ -28,8 +28,8 @@ class Xscal: public Routine {
Xscal(Queue &queue, EventPointer event, const std::string &name = "SCAL");
// Templated-precision implementation of the routine
- StatusCode DoScal(const size_t n, const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+ void DoScal(const size_t n, const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
};
// =================================================================================================
diff --git a/src/routines/level1/xsum.hpp b/src/routines/level1/xsum.hpp
index 84e20bea..a69d6511 100644
--- a/src/routines/level1/xsum.hpp
+++ b/src/routines/level1/xsum.hpp
@@ -35,10 +35,10 @@ class Xsum: public Xasum<T> {
// Forwards to the regular absolute version. The implementation difference is realised in the
// kernel through a pre-processor macro based on the name of the routine.
- StatusCode DoSum(const size_t n,
- const Buffer<T> &sum_buffer, const size_t sum_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
- return DoAsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc);
+ void DoSum(const size_t n,
+ const Buffer<T> &sum_buffer, const size_t sum_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+ DoAsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc);
}
};
diff --git a/src/routines/level1/xswap.cpp b/src/routines/level1/xswap.cpp
index 03907cbd..c9b97dc9 100644
--- a/src/routines/level1/xswap.cpp
+++ b/src/routines/level1/xswap.cpp
@@ -22,29 +22,26 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xswap<T>::Xswap(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level1/level1.opencl"
#include "../../kernels/level1/xswap.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xswap<T>::DoSwap(const size_t n,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
+void Xswap<T>::DoSwap(const size_t n,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
// Makes sure all dimensions are larger than zero
- if (n == 0) { return StatusCode::kInvalidDimension; }
+ if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
// Tests the vectors for validity
- auto status = TestVectorX(n, x_buffer, x_offset, x_inc);
- if (ErrorIn(status)) { return status; }
- status = TestVectorY(n, y_buffer, y_offset, y_inc);
- if (ErrorIn(status)) { return status; }
+ TestVectorX(n, x_buffer, x_offset, x_inc);
+ TestVectorY(n, y_buffer, y_offset, y_inc);
// Determines whether or not the fast-version can be used
bool use_fast_kernel = (x_offset == 0) && (x_inc == 1) &&
@@ -55,43 +52,37 @@ StatusCode Xswap<T>::DoSwap(const size_t n,
auto kernel_name = (use_fast_kernel) ? "XswapFast" : "Xswap";
// Retrieves the Xswap kernel from the compiled binary
- try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, kernel_name);
-
- // Sets the kernel arguments
- if (use_fast_kernel) {
- kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, x_buffer());
- kernel.SetArgument(2, y_buffer());
- }
- else {
- kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, x_buffer());
- kernel.SetArgument(2, static_cast<int>(x_offset));
- kernel.SetArgument(3, static_cast<int>(x_inc));
- kernel.SetArgument(4, y_buffer());
- kernel.SetArgument(5, static_cast<int>(y_offset));
- kernel.SetArgument(6, static_cast<int>(y_inc));
- }
-
- // Launches the kernel
- if (use_fast_kernel) {
- auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])};
- auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, queue_, device_, global, local, event_);
- }
- else {
- auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
- auto global = std::vector<size_t>{n_ceiled/db_["WPT"]};
- auto local = std::vector<size_t>{db_["WGS"]};
- status = RunKernel(kernel, queue_, device_, global, local, event_);
- }
- if (ErrorIn(status)) { return status; }
-
- // Succesfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel = Kernel(program, kernel_name);
+
+ // Sets the kernel arguments
+ if (use_fast_kernel) {
+ kernel.SetArgument(0, static_cast<int>(n));
+ kernel.SetArgument(1, x_buffer());
+ kernel.SetArgument(2, y_buffer());
+ }
+ else {
+ kernel.SetArgument(0, static_cast<int>(n));
+ kernel.SetArgument(1, x_buffer());
+ kernel.SetArgument(2, static_cast<int>(x_offset));
+ kernel.SetArgument(3, static_cast<int>(x_inc));
+ kernel.SetArgument(4, y_buffer());
+ kernel.SetArgument(5, static_cast<int>(y_offset));
+ kernel.SetArgument(6, static_cast<int>(y_inc));
+ }
+
+ // Launches the kernel
+ if (use_fast_kernel) {
+ auto global = std::vector<size_t>{CeilDiv(n, db_["WPT"]*db_["VW"])};
+ auto local = std::vector<size_t>{db_["WGS"]};
+ RunKernel(kernel, queue_, device_, global, local, event_);
+ }
+ else {
+ auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
+ auto global = std::vector<size_t>{n_ceiled/db_["WPT"]};
+ auto local = std::vector<size_t>{db_["WGS"]};
+ RunKernel(kernel, queue_, device_, global, local, event_);
+ }
}
// =================================================================================================
diff --git a/src/routines/level1/xswap.hpp b/src/routines/level1/xswap.hpp
index 4f9ea36d..eadf58e5 100644
--- a/src/routines/level1/xswap.hpp
+++ b/src/routines/level1/xswap.hpp
@@ -28,9 +28,9 @@ class Xswap: public Routine {
Xswap(Queue &queue, EventPointer event, const std::string &name = "SWAP");
// Templated-precision implementation of the routine
- StatusCode DoSwap(const size_t n,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+ void DoSwap(const size_t n,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
};
// =================================================================================================
diff --git a/src/routines/level2/xgbmv.cpp b/src/routines/level2/xgbmv.cpp
index ea4f001c..e80b9a96 100644
--- a/src/routines/level2/xgbmv.cpp
+++ b/src/routines/level2/xgbmv.cpp
@@ -29,13 +29,13 @@ Xgbmv<T>::Xgbmv(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xgbmv<T>::DoGbmv(const Layout layout, const Transpose a_transpose,
- const size_t m, const size_t n, const size_t kl, const size_t ku,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
+void Xgbmv<T>::DoGbmv(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
// Reverses the upper and lower band count
auto rotated = (layout == Layout::kRowMajor);
@@ -46,13 +46,13 @@ StatusCode Xgbmv<T>::DoGbmv(const Layout layout, const Transpose a_transpose,
// The specific hermitian matrix-accesses are implemented in the kernel guarded by the
// ROUTINE_GBMV define.
bool fast_kernels = false;
- return MatVec(layout, a_transpose,
- m, n, alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc, beta,
- y_buffer, y_offset, y_inc,
- fast_kernels, fast_kernels,
- 0, false, kl_real, ku_real);
+ MatVec(layout, a_transpose,
+ m, n, alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc, beta,
+ y_buffer, y_offset, y_inc,
+ fast_kernels, fast_kernels,
+ 0, false, kl_real, ku_real);
}
// =================================================================================================
diff --git a/src/routines/level2/xgbmv.hpp b/src/routines/level2/xgbmv.hpp
index 686ab642..e5f670ec 100644
--- a/src/routines/level2/xgbmv.hpp
+++ b/src/routines/level2/xgbmv.hpp
@@ -33,13 +33,13 @@ class Xgbmv: public Xgemv<T> {
Xgbmv(Queue &queue, EventPointer event, const std::string &name = "GBMV");
// Templated-precision implementation of the routine
- StatusCode DoGbmv(const Layout layout, const Transpose a_transpose,
- const size_t m, const size_t n, const size_t kl, const size_t ku,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+ void DoGbmv(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n, const size_t kl, const size_t ku,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
};
// =================================================================================================
diff --git a/src/routines/level2/xgemv.cpp b/src/routines/level2/xgemv.cpp
index 4e32ba41..7b4c2e8f 100644
--- a/src/routines/level2/xgemv.cpp
+++ b/src/routines/level2/xgemv.cpp
@@ -22,52 +22,51 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xgemv<T>::Xgemv(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Pad", "Xgemv", "XgemvFast", "XgemvFastRot"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Pad", "Xgemv", "XgemvFast", "XgemvFastRot"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level2/xgemv.opencl"
#include "../../kernels/level2/xgemv_fast.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xgemv<T>::DoGemv(const Layout layout, const Transpose a_transpose,
- const size_t m, const size_t n,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
+void Xgemv<T>::DoGemv(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
// Performs the matrix-vector multiplication
- return MatVec(layout, a_transpose,
- m, n, alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc, beta,
- y_buffer, y_offset, y_inc,
- true, true,
- 0, false, 0, 0); // N/A for this routine
+ MatVec(layout, a_transpose,
+ m, n, alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc, beta,
+ y_buffer, y_offset, y_inc,
+ true, true,
+ 0, false, 0, 0); // N/A for this routine
}
// =================================================================================================
// The generic implementation, also suited for other (non general) matrix-vector multiplications
template <typename T>
-StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
- const size_t m, const size_t n,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- bool fast_kernel, bool fast_kernel_rot,
- const size_t parameter, const bool packed,
- const size_t kl, const size_t ku) {
+void Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ bool fast_kernel, bool fast_kernel_rot,
+ const size_t parameter, const bool packed,
+ const size_t kl, const size_t ku) {
// Makes sure all dimensions are larger than zero
- if (m == 0 || n == 0) { return StatusCode::kInvalidDimension; }
+ if (m == 0 || n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
// Computes whether or not the matrix has an alternative layout (row or column-major).
auto a_altlayout = (layout == Layout::kRowMajor);
@@ -91,14 +90,10 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
auto a_conjugate = (a_transpose == Transpose::kConjugate);
// Tests the matrix and the vectors for validity
- auto status = StatusCode::kSuccess;
- if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); }
- else { status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); }
- if (ErrorIn(status)) { return status; }
- status = TestVectorX(n_real, x_buffer, x_offset, x_inc);
- if (ErrorIn(status)) { return status; }
- status = TestVectorY(m_real, y_buffer, y_offset, y_inc);
- if (ErrorIn(status)) { return status; }
+ if (packed) { TestMatrixAP(n, a_buffer, a_offset); }
+ else { TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld); }
+ TestVectorX(n_real, x_buffer, x_offset, x_inc);
+ TestVectorY(m_real, y_buffer, y_offset, y_inc);
// Determines whether or not the fast-version can be used
fast_kernel = fast_kernel && (a_offset == 0) && (a_rotated == 0) && (a_conjugate == 0) &&
@@ -127,39 +122,33 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
}
// Retrieves the Xgemv kernel from the compiled binary
- try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, kernel_name);
-
- // Sets the kernel arguments
- kernel.SetArgument(0, static_cast<int>(m_real));
- kernel.SetArgument(1, static_cast<int>(n_real));
- kernel.SetArgument(2, GetRealArg(alpha));
- kernel.SetArgument(3, GetRealArg(beta));
- kernel.SetArgument(4, static_cast<int>(a_rotated));
- kernel.SetArgument(5, a_buffer());
- kernel.SetArgument(6, static_cast<int>(a_offset));
- kernel.SetArgument(7, static_cast<int>(a_ld));
- kernel.SetArgument(8, x_buffer());
- kernel.SetArgument(9, static_cast<int>(x_offset));
- kernel.SetArgument(10, static_cast<int>(x_inc));
- kernel.SetArgument(11, y_buffer());
- kernel.SetArgument(12, static_cast<int>(y_offset));
- kernel.SetArgument(13, static_cast<int>(y_inc));
- kernel.SetArgument(14, static_cast<int>(a_conjugate));
- kernel.SetArgument(15, static_cast<int>(parameter)); // extra parameter used for symm/herm
- kernel.SetArgument(16, static_cast<int>(kl)); // only used for banded matrices
- kernel.SetArgument(17, static_cast<int>(ku)); // only used for banded matrices
-
- // Launches the kernel
- auto global = std::vector<size_t>{global_size};
- auto local = std::vector<size_t>{local_size};
- status = RunKernel(kernel, queue_, device_, global, local, event_);
- if (ErrorIn(status)) { return status; }
-
- // Succesfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel = Kernel(program, kernel_name);
+
+ // Sets the kernel arguments
+ kernel.SetArgument(0, static_cast<int>(m_real));
+ kernel.SetArgument(1, static_cast<int>(n_real));
+ kernel.SetArgument(2, GetRealArg(alpha));
+ kernel.SetArgument(3, GetRealArg(beta));
+ kernel.SetArgument(4, static_cast<int>(a_rotated));
+ kernel.SetArgument(5, a_buffer());
+ kernel.SetArgument(6, static_cast<int>(a_offset));
+ kernel.SetArgument(7, static_cast<int>(a_ld));
+ kernel.SetArgument(8, x_buffer());
+ kernel.SetArgument(9, static_cast<int>(x_offset));
+ kernel.SetArgument(10, static_cast<int>(x_inc));
+ kernel.SetArgument(11, y_buffer());
+ kernel.SetArgument(12, static_cast<int>(y_offset));
+ kernel.SetArgument(13, static_cast<int>(y_inc));
+ kernel.SetArgument(14, static_cast<int>(a_conjugate));
+ kernel.SetArgument(15, static_cast<int>(parameter)); // extra parameter used for symm/herm
+ kernel.SetArgument(16, static_cast<int>(kl)); // only used for banded matrices
+ kernel.SetArgument(17, static_cast<int>(ku)); // only used for banded matrices
+
+ // Launches the kernel
+ auto global = std::vector<size_t>{global_size};
+ auto local = std::vector<size_t>{local_size};
+ RunKernel(kernel, queue_, device_, global, local, event_);
}
// =================================================================================================
diff --git a/src/routines/level2/xgemv.hpp b/src/routines/level2/xgemv.hpp
index e9afec8d..1e1fa726 100644
--- a/src/routines/level2/xgemv.hpp
+++ b/src/routines/level2/xgemv.hpp
@@ -28,25 +28,25 @@ class Xgemv: public Routine {
Xgemv(Queue &queue, EventPointer event, const std::string &name = "GEMV");
// Templated-precision implementation of the routine
- StatusCode DoGemv(const Layout layout, const Transpose a_transpose,
- const size_t m, const size_t n,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+ void DoGemv(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
// Generic version used also for other matrix-vector multiplications
- StatusCode MatVec(const Layout layout, const Transpose a_transpose,
- const size_t m, const size_t n,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- bool fast_kernel, bool fast_kernel_rot,
- const size_t parameter, const bool packed,
- const size_t kl, const size_t ku);
+ void MatVec(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ bool fast_kernel, bool fast_kernel_rot,
+ const size_t parameter, const bool packed,
+ const size_t kl, const size_t ku);
};
// =================================================================================================
diff --git a/src/routines/level2/xger.cpp b/src/routines/level2/xger.cpp
index 29cffe0c..d16ebd11 100644
--- a/src/routines/level2/xger.cpp
+++ b/src/routines/level2/xger.cpp
@@ -22,26 +22,25 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xger<T>::Xger(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xger"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level2/level2.opencl"
#include "../../kernels/level2/xger.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xger<T>::DoGer(const Layout layout,
- const size_t m, const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld) {
+void Xger<T>::DoGer(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld) {
// Makes sure all dimensions are larger than zero
- if (m == 0 || n == 0) { return StatusCode::kInvalidDimension; }
+ if (m == 0 || n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
// Computes whether or not the matrix has an alternative layout (row or column-major).
const auto a_is_rowmajor = (layout == Layout::kRowMajor);
@@ -49,44 +48,35 @@ StatusCode Xger<T>::DoGer(const Layout layout,
const auto a_two = (a_is_rowmajor) ? m : n;
// Tests the matrix and the vectors for validity
- auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld);
- if (ErrorIn(status)) { return status; }
- status = TestVectorX(m, x_buffer, x_offset, x_inc);
- if (ErrorIn(status)) { return status; }
- status = TestVectorY(n, y_buffer, y_offset, y_inc);
- if (ErrorIn(status)) { return status; }
+ TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld);
+ TestVectorX(m, x_buffer, x_offset, x_inc);
+ TestVectorY(n, y_buffer, y_offset, y_inc);
// Retrieves the kernel from the compiled binary
- try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, "Xger");
-
- // Sets the kernel arguments
- kernel.SetArgument(0, static_cast<int>(a_one));
- kernel.SetArgument(1, static_cast<int>(a_two));
- kernel.SetArgument(2, GetRealArg(alpha));
- kernel.SetArgument(3, x_buffer());
- kernel.SetArgument(4, static_cast<int>(x_offset));
- kernel.SetArgument(5, static_cast<int>(x_inc));
- kernel.SetArgument(6, y_buffer());
- kernel.SetArgument(7, static_cast<int>(y_offset));
- kernel.SetArgument(8, static_cast<int>(y_inc));
- kernel.SetArgument(9, a_buffer());
- kernel.SetArgument(10, static_cast<int>(a_offset));
- kernel.SetArgument(11, static_cast<int>(a_ld));
- kernel.SetArgument(12, static_cast<int>(a_is_rowmajor));
-
- // Launches the kernel
- auto a_one_ceiled = Ceil(CeilDiv(a_one, db_["WPT"]), db_["WGS1"]);
- auto a_two_ceiled = Ceil(CeilDiv(a_two, db_["WPT"]), db_["WGS2"]);
- auto global = std::vector<size_t>{a_one_ceiled, a_two_ceiled};
- auto local = std::vector<size_t>{db_["WGS1"], db_["WGS2"]};
- status = RunKernel(kernel, queue_, device_, global, local, event_);
- if (ErrorIn(status)) { return status; }
-
- // Succesfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel = Kernel(program, "Xger");
+
+ // Sets the kernel arguments
+ kernel.SetArgument(0, static_cast<int>(a_one));
+ kernel.SetArgument(1, static_cast<int>(a_two));
+ kernel.SetArgument(2, GetRealArg(alpha));
+ kernel.SetArgument(3, x_buffer());
+ kernel.SetArgument(4, static_cast<int>(x_offset));
+ kernel.SetArgument(5, static_cast<int>(x_inc));
+ kernel.SetArgument(6, y_buffer());
+ kernel.SetArgument(7, static_cast<int>(y_offset));
+ kernel.SetArgument(8, static_cast<int>(y_inc));
+ kernel.SetArgument(9, a_buffer());
+ kernel.SetArgument(10, static_cast<int>(a_offset));
+ kernel.SetArgument(11, static_cast<int>(a_ld));
+ kernel.SetArgument(12, static_cast<int>(a_is_rowmajor));
+
+ // Launches the kernel
+ auto a_one_ceiled = Ceil(CeilDiv(a_one, db_["WPT"]), db_["WGS1"]);
+ auto a_two_ceiled = Ceil(CeilDiv(a_two, db_["WPT"]), db_["WGS2"]);
+ auto global = std::vector<size_t>{a_one_ceiled, a_two_ceiled};
+ auto local = std::vector<size_t>{db_["WGS1"], db_["WGS2"]};
+ RunKernel(kernel, queue_, device_, global, local, event_);
}
// =================================================================================================
diff --git a/src/routines/level2/xger.hpp b/src/routines/level2/xger.hpp
index 3c6abe44..fbbb07a1 100644
--- a/src/routines/level2/xger.hpp
+++ b/src/routines/level2/xger.hpp
@@ -28,12 +28,12 @@ class Xger: public Routine {
Xger(Queue &queue, EventPointer event, const std::string &name = "GER");
// Templated-precision implementation of the routine
- StatusCode DoGer(const Layout layout,
- const size_t m, const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
+ void DoGer(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
};
// =================================================================================================
diff --git a/src/routines/level2/xgerc.cpp b/src/routines/level2/xgerc.cpp
index d9feda97..4fa2e2a8 100644
--- a/src/routines/level2/xgerc.cpp
+++ b/src/routines/level2/xgerc.cpp
@@ -28,19 +28,19 @@ Xgerc<T>::Xgerc(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xgerc<T>::DoGerc(const Layout layout,
- const size_t m, const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld) {
+void Xgerc<T>::DoGerc(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld) {
// Regular Ger operation on complex data, plus conjugation in the kernel guarded by the
// ROUTINE_GERC guard.
- return DoGer(layout, m, n, alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld);
+ DoGer(layout, m, n, alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld);
}
// =================================================================================================
diff --git a/src/routines/level2/xgerc.hpp b/src/routines/level2/xgerc.hpp
index f1d04dfd..2d61f2b7 100644
--- a/src/routines/level2/xgerc.hpp
+++ b/src/routines/level2/xgerc.hpp
@@ -31,12 +31,12 @@ class Xgerc: public Xger<T> {
Xgerc(Queue &queue, EventPointer event, const std::string &name = "GERC");
// Templated-precision implementation of the routine
- StatusCode DoGerc(const Layout layout,
- const size_t m, const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
+ void DoGerc(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
};
// =================================================================================================
diff --git a/src/routines/level2/xgeru.cpp b/src/routines/level2/xgeru.cpp
index da9e91c2..c77e69c5 100644
--- a/src/routines/level2/xgeru.cpp
+++ b/src/routines/level2/xgeru.cpp
@@ -28,18 +28,18 @@ Xgeru<T>::Xgeru(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xgeru<T>::DoGeru(const Layout layout,
- const size_t m, const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld) {
+void Xgeru<T>::DoGeru(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld) {
// Regular Ger operation on complex data
- return DoGer(layout, m, n, alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld);
+ DoGer(layout, m, n, alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld);
}
// =================================================================================================
diff --git a/src/routines/level2/xgeru.hpp b/src/routines/level2/xgeru.hpp
index fb50e917..4cae6b58 100644
--- a/src/routines/level2/xgeru.hpp
+++ b/src/routines/level2/xgeru.hpp
@@ -31,12 +31,12 @@ class Xgeru: public Xger<T> {
Xgeru(Queue &queue, EventPointer event, const std::string &name = "GERU");
// Templated-precision implementation of the routine
- StatusCode DoGeru(const Layout layout,
- const size_t m, const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
+ void DoGeru(const Layout layout,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
};
// =================================================================================================
diff --git a/src/routines/level2/xhbmv.cpp b/src/routines/level2/xhbmv.cpp
index f6c0e3c4..c7c9ed9d 100644
--- a/src/routines/level2/xhbmv.cpp
+++ b/src/routines/level2/xhbmv.cpp
@@ -29,13 +29,13 @@ Xhbmv<T>::Xhbmv(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xhbmv<T>::DoHbmv(const Layout layout, const Triangle triangle,
- const size_t n, const size_t k,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
+void Xhbmv<T>::DoHbmv(const Layout layout, const Triangle triangle,
+ const size_t n, const size_t k,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
// The data is either in the upper or lower triangle
size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
@@ -45,13 +45,13 @@ StatusCode Xhbmv<T>::DoHbmv(const Layout layout, const Triangle triangle,
// The specific hermitian banded matrix-accesses are implemented in the kernel guarded by the
// ROUTINE_HBMV define.
bool fast_kernels = false;
- return MatVec(layout, Transpose::kNo,
- n, n, alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc, beta,
- y_buffer, y_offset, y_inc,
- fast_kernels, fast_kernels,
- is_upper, false, k, 0);
+ MatVec(layout, Transpose::kNo,
+ n, n, alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc, beta,
+ y_buffer, y_offset, y_inc,
+ fast_kernels, fast_kernels,
+ is_upper, false, k, 0);
}
// =================================================================================================
diff --git a/src/routines/level2/xhbmv.hpp b/src/routines/level2/xhbmv.hpp
index d668eb88..76d3c91e 100644
--- a/src/routines/level2/xhbmv.hpp
+++ b/src/routines/level2/xhbmv.hpp
@@ -33,13 +33,13 @@ class Xhbmv: public Xgemv<T> {
Xhbmv(Queue &queue, EventPointer event, const std::string &name = "HBMV");
// Templated-precision implementation of the routine
- StatusCode DoHbmv(const Layout layout, const Triangle triangle,
- const size_t n, const size_t k,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+ void DoHbmv(const Layout layout, const Triangle triangle,
+ const size_t n, const size_t k,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
};
// =================================================================================================
diff --git a/src/routines/level2/xhemv.cpp b/src/routines/level2/xhemv.cpp
index 2cbcf7b4..209ff654 100644
--- a/src/routines/level2/xhemv.cpp
+++ b/src/routines/level2/xhemv.cpp
@@ -29,13 +29,13 @@ Xhemv<T>::Xhemv(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xhemv<T>::DoHemv(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
+void Xhemv<T>::DoHemv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
// The data is either in the upper or lower triangle
size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
@@ -45,13 +45,13 @@ StatusCode Xhemv<T>::DoHemv(const Layout layout, const Triangle triangle,
// The specific hermitian matrix-accesses are implemented in the kernel guarded by the
// ROUTINE_HEMV define.
bool fast_kernels = false;
- return MatVec(layout, Transpose::kNo,
- n, n, alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc, beta,
- y_buffer, y_offset, y_inc,
- fast_kernels, fast_kernels,
- is_upper, false, 0, 0);
+ MatVec(layout, Transpose::kNo,
+ n, n, alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc, beta,
+ y_buffer, y_offset, y_inc,
+ fast_kernels, fast_kernels,
+ is_upper, false, 0, 0);
}
// =================================================================================================
diff --git a/src/routines/level2/xhemv.hpp b/src/routines/level2/xhemv.hpp
index 8e062fd3..20d2df22 100644
--- a/src/routines/level2/xhemv.hpp
+++ b/src/routines/level2/xhemv.hpp
@@ -33,13 +33,13 @@ class Xhemv: public Xgemv<T> {
Xhemv(Queue &queue, EventPointer event, const std::string &name = "HEMV");
// Templated-precision implementation of the routine
- StatusCode DoHemv(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+ void DoHemv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
};
// =================================================================================================
diff --git a/src/routines/level2/xher.cpp b/src/routines/level2/xher.cpp
index 6dd95938..6c334e63 100644
--- a/src/routines/level2/xher.cpp
+++ b/src/routines/level2/xher.cpp
@@ -21,11 +21,10 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xher<T,U>::Xher(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xger"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level2/level2.opencl"
#include "../../kernels/level2/xher.opencl"
- ;
+ }) {
}
// =================================================================================================
@@ -41,15 +40,15 @@ template <> half Xher<half,half>::GetAlpha(const half alpha) { return alpha; }
// The main routine
template <typename T, typename U>
-StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle,
- const size_t n,
- const U alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const bool packed) {
+void Xher<T,U>::DoHer(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const U alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const bool packed) {
// Makes sure the dimensions are larger than zero
- if (n == 0) { return StatusCode::kInvalidDimension; }
+ if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
// The data is either in the upper or lower triangle
const auto is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
@@ -57,47 +56,38 @@ StatusCode Xher<T,U>::DoHer(const Layout layout, const Triangle triangle,
const auto is_rowmajor = (layout == Layout::kRowMajor);
// Tests the matrix and the vectors for validity
- auto status = StatusCode::kSuccess;
- if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); }
- else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld); }
- if (ErrorIn(status)) { return status; }
- status = TestVectorX(n, x_buffer, x_offset, x_inc);
- if (ErrorIn(status)) { return status; }
+ if (packed) { TestMatrixAP(n, a_buffer, a_offset); }
+ else { TestMatrixA(n, n, a_buffer, a_offset, a_ld); }
+ TestVectorX(n, x_buffer, x_offset, x_inc);
// If alpha is zero an update is not required
- if (alpha == U{0}) { return StatusCode::kSuccess; }
+ if (alpha == U{0}) { return; }
// Creates a matching version of alpha
const auto matching_alpha = GetAlpha(alpha);
// Retrieves the kernel from the compiled binary
- try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, "Xher");
-
- // Sets the kernel arguments
- kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, GetRealArg(matching_alpha));
- kernel.SetArgument(2, x_buffer());
- kernel.SetArgument(3, static_cast<int>(x_offset));
- kernel.SetArgument(4, static_cast<int>(x_inc));
- kernel.SetArgument(5, a_buffer());
- kernel.SetArgument(6, static_cast<int>(a_offset));
- kernel.SetArgument(7, static_cast<int>(a_ld));
- kernel.SetArgument(8, static_cast<int>(is_upper));
- kernel.SetArgument(9, static_cast<int>(is_rowmajor));
-
- // Launches the kernel
- auto global_one = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS1"]);
- auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]);
- auto global = std::vector<size_t>{global_one, global_two};
- auto local = std::vector<size_t>{db_["WGS1"], db_["WGS2"]};
- status = RunKernel(kernel, queue_, device_, global, local, event_);
- if (ErrorIn(status)) { return status; }
-
- // Succesfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel = Kernel(program, "Xher");
+
+ // Sets the kernel arguments
+ kernel.SetArgument(0, static_cast<int>(n));
+ kernel.SetArgument(1, GetRealArg(matching_alpha));
+ kernel.SetArgument(2, x_buffer());
+ kernel.SetArgument(3, static_cast<int>(x_offset));
+ kernel.SetArgument(4, static_cast<int>(x_inc));
+ kernel.SetArgument(5, a_buffer());
+ kernel.SetArgument(6, static_cast<int>(a_offset));
+ kernel.SetArgument(7, static_cast<int>(a_ld));
+ kernel.SetArgument(8, static_cast<int>(is_upper));
+ kernel.SetArgument(9, static_cast<int>(is_rowmajor));
+
+ // Launches the kernel
+ auto global_one = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS1"]);
+ auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]);
+ auto global = std::vector<size_t>{global_one, global_two};
+ auto local = std::vector<size_t>{db_["WGS1"], db_["WGS2"]};
+ RunKernel(kernel, queue_, device_, global, local, event_);
}
// =================================================================================================
diff --git a/src/routines/level2/xher.hpp b/src/routines/level2/xher.hpp
index 9ff6bf3f..70a30bda 100644
--- a/src/routines/level2/xher.hpp
+++ b/src/routines/level2/xher.hpp
@@ -31,12 +31,12 @@ class Xher: public Routine {
T GetAlpha(const U alpha);
// Templated-precision implementation of the routine
- StatusCode DoHer(const Layout layout, const Triangle triangle,
- const size_t n,
- const U alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const bool packed = false);
+ void DoHer(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const U alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const bool packed = false);
};
// =================================================================================================
diff --git a/src/routines/level2/xher2.cpp b/src/routines/level2/xher2.cpp
index 3d57a9b9..11e2c871 100644
--- a/src/routines/level2/xher2.cpp
+++ b/src/routines/level2/xher2.cpp
@@ -21,27 +21,26 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xher2<T>::Xher2(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Xger"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Xger"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level2/level2.opencl"
#include "../../kernels/level2/xher2.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const bool packed) {
+void Xher2<T>::DoHer2(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const bool packed) {
// Makes sure the dimensions are larger than zero
- if (n == 0) { return StatusCode::kInvalidDimension; }
+ if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
// The data is either in the upper or lower triangle
const auto is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
@@ -49,46 +48,36 @@ StatusCode Xher2<T>::DoHer2(const Layout layout, const Triangle triangle,
const auto is_rowmajor = (layout == Layout::kRowMajor);
// Tests the matrix and the vectors for validity
- auto status = StatusCode::kSuccess;
- if (packed) { status = TestMatrixAP(n, a_buffer, a_offset); }
- else { status = TestMatrixA(n, n, a_buffer, a_offset, a_ld); }
- if (ErrorIn(status)) { return status; }
- status = TestVectorX(n, x_buffer, x_offset, x_inc);
- if (ErrorIn(status)) { return status; }
- status = TestVectorY(n, y_buffer, y_offset, y_inc);
- if (ErrorIn(status)) { return status; }
+ if (packed) { TestMatrixAP(n, a_buffer, a_offset); }
+ else { TestMatrixA(n, n, a_buffer, a_offset, a_ld); }
+ TestVectorX(n, x_buffer, x_offset, x_inc);
+ TestVectorY(n, y_buffer, y_offset, y_inc);
// Retrieves the kernel from the compiled binary
- try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, "Xher2");
-
- // Sets the kernel arguments
- kernel.SetArgument(0, static_cast<int>(n));
- kernel.SetArgument(1, GetRealArg(alpha));
- kernel.SetArgument(2, x_buffer());
- kernel.SetArgument(3, static_cast<int>(x_offset));
- kernel.SetArgument(4, static_cast<int>(x_inc));
- kernel.SetArgument(5, y_buffer());
- kernel.SetArgument(6, static_cast<int>(y_offset));
- kernel.SetArgument(7, static_cast<int>(y_inc));
- kernel.SetArgument(8, a_buffer());
- kernel.SetArgument(9, static_cast<int>(a_offset));
- kernel.SetArgument(10, static_cast<int>(a_ld));
- kernel.SetArgument(11, static_cast<int>(is_upper));
- kernel.SetArgument(12, static_cast<int>(is_rowmajor));
-
- // Launches the kernel
- auto global_one = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS1"]);
- auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]);
- auto global = std::vector<size_t>{global_one, global_two};
- auto local = std::vector<size_t>{db_["WGS1"], db_["WGS2"]};
- status = RunKernel(kernel, queue_, device_, global, local, event_);
- if (ErrorIn(status)) { return status; }
-
- // Succesfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel = Kernel(program, "Xher2");
+
+ // Sets the kernel arguments
+ kernel.SetArgument(0, static_cast<int>(n));
+ kernel.SetArgument(1, GetRealArg(alpha));
+ kernel.SetArgument(2, x_buffer());
+ kernel.SetArgument(3, static_cast<int>(x_offset));
+ kernel.SetArgument(4, static_cast<int>(x_inc));
+ kernel.SetArgument(5, y_buffer());
+ kernel.SetArgument(6, static_cast<int>(y_offset));
+ kernel.SetArgument(7, static_cast<int>(y_inc));
+ kernel.SetArgument(8, a_buffer());
+ kernel.SetArgument(9, static_cast<int>(a_offset));
+ kernel.SetArgument(10, static_cast<int>(a_ld));
+ kernel.SetArgument(11, static_cast<int>(is_upper));
+ kernel.SetArgument(12, static_cast<int>(is_rowmajor));
+
+ // Launches the kernel
+ auto global_one = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS1"]);
+ auto global_two = Ceil(CeilDiv(n, db_["WPT"]), db_["WGS2"]);
+ auto global = std::vector<size_t>{global_one, global_two};
+ auto local = std::vector<size_t>{db_["WGS1"], db_["WGS2"]};
+ RunKernel(kernel, queue_, device_, global, local, event_);
}
// =================================================================================================
diff --git a/src/routines/level2/xher2.hpp b/src/routines/level2/xher2.hpp
index 8c53c047..dcb2ecb7 100644
--- a/src/routines/level2/xher2.hpp
+++ b/src/routines/level2/xher2.hpp
@@ -28,13 +28,13 @@ class Xher2: public Routine {
Xher2(Queue &queue, EventPointer event, const std::string &name = "HER2");
// Templated-precision implementation of the routine
- StatusCode DoHer2(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const bool packed = false);
+ void DoHer2(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const bool packed = false);
};
// =================================================================================================
diff --git a/src/routines/level2/xhpmv.cpp b/src/routines/level2/xhpmv.cpp
index e6f82b34..70a0ab0d 100644
--- a/src/routines/level2/xhpmv.cpp
+++ b/src/routines/level2/xhpmv.cpp
@@ -29,13 +29,13 @@ Xhpmv<T>::Xhpmv(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xhpmv<T>::DoHpmv(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &ap_buffer, const size_t ap_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
+void Xhpmv<T>::DoHpmv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &ap_buffer, const size_t ap_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
// The data is either in the upper or lower triangle
size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
@@ -45,13 +45,13 @@ StatusCode Xhpmv<T>::DoHpmv(const Layout layout, const Triangle triangle,
// The specific hermitian packed matrix-accesses are implemented in the kernel guarded by the
// ROUTINE_HPMV define.
bool fast_kernels = false;
- return MatVec(layout, Transpose::kNo,
- n, n, alpha,
- ap_buffer, ap_offset, n,
- x_buffer, x_offset, x_inc, beta,
- y_buffer, y_offset, y_inc,
- fast_kernels, fast_kernels,
- is_upper, true, 0, 0);
+ MatVec(layout, Transpose::kNo,
+ n, n, alpha,
+ ap_buffer, ap_offset, n,
+ x_buffer, x_offset, x_inc, beta,
+ y_buffer, y_offset, y_inc,
+ fast_kernels, fast_kernels,
+ is_upper, true, 0, 0);
}
// =================================================================================================
diff --git a/src/routines/level2/xhpmv.hpp b/src/routines/level2/xhpmv.hpp
index b11192f9..13a6277c 100644
--- a/src/routines/level2/xhpmv.hpp
+++ b/src/routines/level2/xhpmv.hpp
@@ -33,13 +33,13 @@ class Xhpmv: public Xgemv<T> {
Xhpmv(Queue &queue, EventPointer event, const std::string &name = "HPMV");
// Templated-precision implementation of the routine
- StatusCode DoHpmv(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &ap_buffer, const size_t ap_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+ void DoHpmv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &ap_buffer, const size_t ap_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
};
// =================================================================================================
diff --git a/src/routines/level2/xhpr.cpp b/src/routines/level2/xhpr.cpp
index 225ebfe5..7e517c59 100644
--- a/src/routines/level2/xhpr.cpp
+++ b/src/routines/level2/xhpr.cpp
@@ -28,17 +28,17 @@ Xhpr<T,U>::Xhpr(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T, typename U>
-StatusCode Xhpr<T,U>::DoHpr(const Layout layout, const Triangle triangle,
- const size_t n,
- const U alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &ap_buffer, const size_t ap_offset) {
+void Xhpr<T,U>::DoHpr(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const U alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &ap_buffer, const size_t ap_offset) {
// Specific Xhpr functionality is implemented in the kernel using defines
- return DoHer(layout, triangle, n, alpha,
- x_buffer, x_offset, x_inc,
- ap_buffer, ap_offset, n,
- true); // packed matrix
+ DoHer(layout, triangle, n, alpha,
+ x_buffer, x_offset, x_inc,
+ ap_buffer, ap_offset, n,
+ true); // packed matrix
}
// =================================================================================================
diff --git a/src/routines/level2/xhpr.hpp b/src/routines/level2/xhpr.hpp
index 37801c68..6ebc220e 100644
--- a/src/routines/level2/xhpr.hpp
+++ b/src/routines/level2/xhpr.hpp
@@ -31,11 +31,11 @@ class Xhpr: public Xher<T,U> {
Xhpr(Queue &queue, EventPointer event, const std::string &name = "HPR");
// Templated-precision implementation of the routine
- StatusCode DoHpr(const Layout layout, const Triangle triangle,
- const size_t n,
- const U alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &ap_buffer, const size_t ap_offset);
+ void DoHpr(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const U alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &ap_buffer, const size_t ap_offset);
};
// =================================================================================================
diff --git a/src/routines/level2/xhpr2.cpp b/src/routines/level2/xhpr2.cpp
index 85f9d3f9..35daa365 100644
--- a/src/routines/level2/xhpr2.cpp
+++ b/src/routines/level2/xhpr2.cpp
@@ -28,19 +28,19 @@ Xhpr2<T>::Xhpr2(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xhpr2<T>::DoHpr2(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const Buffer<T> &ap_buffer, const size_t ap_offset) {
+void Xhpr2<T>::DoHpr2(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &ap_buffer, const size_t ap_offset) {
// Specific Xhpr2 functionality is implemented in the kernel using defines
- return DoHer2(layout, triangle, n, alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- ap_buffer, ap_offset, n,
- true); // packed matrix
+ DoHer2(layout, triangle, n, alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ ap_buffer, ap_offset, n,
+ true); // packed matrix
}
// =================================================================================================
diff --git a/src/routines/level2/xhpr2.hpp b/src/routines/level2/xhpr2.hpp
index d66dce55..f344fd48 100644
--- a/src/routines/level2/xhpr2.hpp
+++ b/src/routines/level2/xhpr2.hpp
@@ -31,12 +31,12 @@ class Xhpr2: public Xher2<T> {
Xhpr2(Queue &queue, EventPointer event, const std::string &name = "HPR2");
// Templated-precision implementation of the routine
- StatusCode DoHpr2(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const Buffer<T> &ap_buffer, const size_t ap_offset);
+ void DoHpr2(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &ap_buffer, const size_t ap_offset);
};
// =================================================================================================
diff --git a/src/routines/level2/xsbmv.cpp b/src/routines/level2/xsbmv.cpp
index 28730899..e47430d1 100644
--- a/src/routines/level2/xsbmv.cpp
+++ b/src/routines/level2/xsbmv.cpp
@@ -29,13 +29,13 @@ Xsbmv<T>::Xsbmv(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xsbmv<T>::DoSbmv(const Layout layout, const Triangle triangle,
- const size_t n, const size_t k,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
+void Xsbmv<T>::DoSbmv(const Layout layout, const Triangle triangle,
+ const size_t n, const size_t k,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
// The data is either in the upper or lower triangle
size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
@@ -45,13 +45,13 @@ StatusCode Xsbmv<T>::DoSbmv(const Layout layout, const Triangle triangle,
// The specific symmetric banded matrix-accesses are implemented in the kernel guarded by the
// ROUTINE_SBMV define.
bool fast_kernels = false;
- return MatVec(layout, Transpose::kNo,
- n, n, alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc, beta,
- y_buffer, y_offset, y_inc,
- fast_kernels, fast_kernels,
- is_upper, false, k, 0);
+ MatVec(layout, Transpose::kNo,
+ n, n, alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc, beta,
+ y_buffer, y_offset, y_inc,
+ fast_kernels, fast_kernels,
+ is_upper, false, k, 0);
}
// =================================================================================================
diff --git a/src/routines/level2/xsbmv.hpp b/src/routines/level2/xsbmv.hpp
index 16c5e9a8..a4542f49 100644
--- a/src/routines/level2/xsbmv.hpp
+++ b/src/routines/level2/xsbmv.hpp
@@ -33,13 +33,13 @@ class Xsbmv: public Xgemv<T> {
Xsbmv(Queue &queue, EventPointer event, const std::string &name = "SBMV");
// Templated-precision implementation of the routine
- StatusCode DoSbmv(const Layout layout, const Triangle triangle,
- const size_t n, const size_t k,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+ void DoSbmv(const Layout layout, const Triangle triangle,
+ const size_t n, const size_t k,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
};
// =================================================================================================
diff --git a/src/routines/level2/xspmv.cpp b/src/routines/level2/xspmv.cpp
index f6651012..bf1a49e1 100644
--- a/src/routines/level2/xspmv.cpp
+++ b/src/routines/level2/xspmv.cpp
@@ -29,13 +29,13 @@ Xspmv<T>::Xspmv(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xspmv<T>::DoSpmv(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &ap_buffer, const size_t ap_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
+void Xspmv<T>::DoSpmv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &ap_buffer, const size_t ap_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
// The data is either in the upper or lower triangle
size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
@@ -45,13 +45,13 @@ StatusCode Xspmv<T>::DoSpmv(const Layout layout, const Triangle triangle,
// The specific symmetric packed matrix-accesses are implemented in the kernel guarded by the
// ROUTINE_SPMV define.
bool fast_kernels = false;
- return MatVec(layout, Transpose::kNo,
- n, n, alpha,
- ap_buffer, ap_offset, n,
- x_buffer, x_offset, x_inc, beta,
- y_buffer, y_offset, y_inc,
- fast_kernels, fast_kernels,
- is_upper, true, 0, 0);
+ MatVec(layout, Transpose::kNo,
+ n, n, alpha,
+ ap_buffer, ap_offset, n,
+ x_buffer, x_offset, x_inc, beta,
+ y_buffer, y_offset, y_inc,
+ fast_kernels, fast_kernels,
+ is_upper, true, 0, 0);
}
// =================================================================================================
diff --git a/src/routines/level2/xspmv.hpp b/src/routines/level2/xspmv.hpp
index a0c69b85..94caa4ac 100644
--- a/src/routines/level2/xspmv.hpp
+++ b/src/routines/level2/xspmv.hpp
@@ -33,13 +33,13 @@ class Xspmv: public Xgemv<T> {
Xspmv(Queue &queue, EventPointer event, const std::string &name = "SPMV");
// Templated-precision implementation of the routine
- StatusCode DoSpmv(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &ap_buffer, const size_t ap_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+ void DoSpmv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &ap_buffer, const size_t ap_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
};
// =================================================================================================
diff --git a/src/routines/level2/xspr.cpp b/src/routines/level2/xspr.cpp
index a75fe9c3..56791a7b 100644
--- a/src/routines/level2/xspr.cpp
+++ b/src/routines/level2/xspr.cpp
@@ -28,17 +28,17 @@ Xspr<T>::Xspr(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xspr<T>::DoSpr(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &ap_buffer, const size_t ap_offset) {
+void Xspr<T>::DoSpr(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &ap_buffer, const size_t ap_offset) {
// Specific Xspr functionality is implemented in the kernel using defines
- return DoHer(layout, triangle, n, alpha,
- x_buffer, x_offset, x_inc,
- ap_buffer, ap_offset, n,
- true); // packed matrix
+ DoHer(layout, triangle, n, alpha,
+ x_buffer, x_offset, x_inc,
+ ap_buffer, ap_offset, n,
+ true); // packed matrix
}
// =================================================================================================
diff --git a/src/routines/level2/xspr.hpp b/src/routines/level2/xspr.hpp
index 6468c736..760a2ddb 100644
--- a/src/routines/level2/xspr.hpp
+++ b/src/routines/level2/xspr.hpp
@@ -31,11 +31,11 @@ class Xspr: public Xher<T,T> {
Xspr(Queue &queue, EventPointer event, const std::string &name = "SPR");
// Templated-precision implementation of the routine
- StatusCode DoSpr(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &ap_buffer, const size_t ap_offset);
+ void DoSpr(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &ap_buffer, const size_t ap_offset);
};
// =================================================================================================
diff --git a/src/routines/level2/xspr2.cpp b/src/routines/level2/xspr2.cpp
index c39a2eb4..8d0432c2 100644
--- a/src/routines/level2/xspr2.cpp
+++ b/src/routines/level2/xspr2.cpp
@@ -28,19 +28,19 @@ Xspr2<T>::Xspr2(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xspr2<T>::DoSpr2(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const Buffer<T> &ap_buffer, const size_t ap_offset) {
+void Xspr2<T>::DoSpr2(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &ap_buffer, const size_t ap_offset) {
// Specific Xspr2 functionality is implemented in the kernel using defines
- return DoHer2(layout, triangle, n, alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- ap_buffer, ap_offset, n,
- true); // packed matrix
+ DoHer2(layout, triangle, n, alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ ap_buffer, ap_offset, n,
+ true); // packed matrix
}
// =================================================================================================
diff --git a/src/routines/level2/xspr2.hpp b/src/routines/level2/xspr2.hpp
index 693c56a1..9f03f768 100644
--- a/src/routines/level2/xspr2.hpp
+++ b/src/routines/level2/xspr2.hpp
@@ -31,12 +31,12 @@ class Xspr2: public Xher2<T> {
Xspr2(Queue &queue, EventPointer event, const std::string &name = "SPR2");
// Templated-precision implementation of the routine
- StatusCode DoSpr2(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const Buffer<T> &ap_buffer, const size_t ap_offset);
+ void DoSpr2(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &ap_buffer, const size_t ap_offset);
};
// =================================================================================================
diff --git a/src/routines/level2/xsymv.cpp b/src/routines/level2/xsymv.cpp
index 648d2a3e..86bb66b8 100644
--- a/src/routines/level2/xsymv.cpp
+++ b/src/routines/level2/xsymv.cpp
@@ -29,13 +29,13 @@ Xsymv<T>::Xsymv(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xsymv<T>::DoSymv(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
+void Xsymv<T>::DoSymv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc) {
// The data is either in the upper or lower triangle
size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
@@ -45,13 +45,13 @@ StatusCode Xsymv<T>::DoSymv(const Layout layout, const Triangle triangle,
// The specific symmetric matrix-accesses are implemented in the kernel guarded by the
// ROUTINE_SYMV define.
bool fast_kernels = false;
- return MatVec(layout, Transpose::kNo,
- n, n, alpha,
- a_buffer, a_offset, a_ld,
- x_buffer, x_offset, x_inc, beta,
- y_buffer, y_offset, y_inc,
- fast_kernels, fast_kernels,
- is_upper, false, 0, 0);
+ MatVec(layout, Transpose::kNo,
+ n, n, alpha,
+ a_buffer, a_offset, a_ld,
+ x_buffer, x_offset, x_inc, beta,
+ y_buffer, y_offset, y_inc,
+ fast_kernels, fast_kernels,
+ is_upper, false, 0, 0);
}
// =================================================================================================
diff --git a/src/routines/level2/xsymv.hpp b/src/routines/level2/xsymv.hpp
index 67815f2f..3945802f 100644
--- a/src/routines/level2/xsymv.hpp
+++ b/src/routines/level2/xsymv.hpp
@@ -33,13 +33,13 @@ class Xsymv: public Xgemv<T> {
Xsymv(Queue &queue, EventPointer event, const std::string &name = "SYMV");
// Templated-precision implementation of the routine
- StatusCode DoSymv(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const T beta,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+ void DoSymv(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const T beta,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
};
// =================================================================================================
diff --git a/src/routines/level2/xsyr.cpp b/src/routines/level2/xsyr.cpp
index 758d8f8f..64c2dc74 100644
--- a/src/routines/level2/xsyr.cpp
+++ b/src/routines/level2/xsyr.cpp
@@ -28,16 +28,16 @@ Xsyr<T>::Xsyr(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xsyr<T>::DoSyr(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld) {
+void Xsyr<T>::DoSyr(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld) {
// Specific Xsyr functionality is implemented in the kernel using defines
- return DoHer(layout, triangle, n, alpha,
- x_buffer, x_offset, x_inc,
- a_buffer, a_offset, a_ld);
+ DoHer(layout, triangle, n, alpha,
+ x_buffer, x_offset, x_inc,
+ a_buffer, a_offset, a_ld);
}
// =================================================================================================
diff --git a/src/routines/level2/xsyr.hpp b/src/routines/level2/xsyr.hpp
index 20393454..a23ff80f 100644
--- a/src/routines/level2/xsyr.hpp
+++ b/src/routines/level2/xsyr.hpp
@@ -31,11 +31,11 @@ class Xsyr: public Xher<T,T> {
Xsyr(Queue &queue, EventPointer event, const std::string &name = "SYR");
// Templated-precision implementation of the routine
- StatusCode DoSyr(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
+ void DoSyr(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
};
// =================================================================================================
diff --git a/src/routines/level2/xsyr2.cpp b/src/routines/level2/xsyr2.cpp
index 6f43b219..38ca9d69 100644
--- a/src/routines/level2/xsyr2.cpp
+++ b/src/routines/level2/xsyr2.cpp
@@ -28,18 +28,18 @@ Xsyr2<T>::Xsyr2(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xsyr2<T>::DoSyr2(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld) {
+void Xsyr2<T>::DoSyr2(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld) {
// Specific Xsyr2 functionality is implemented in the kernel using defines
- return DoHer2(layout, triangle, n, alpha,
- x_buffer, x_offset, x_inc,
- y_buffer, y_offset, y_inc,
- a_buffer, a_offset, a_ld);
+ DoHer2(layout, triangle, n, alpha,
+ x_buffer, x_offset, x_inc,
+ y_buffer, y_offset, y_inc,
+ a_buffer, a_offset, a_ld);
}
// =================================================================================================
diff --git a/src/routines/level2/xsyr2.hpp b/src/routines/level2/xsyr2.hpp
index 1a8dcbe8..5a8d8eb4 100644
--- a/src/routines/level2/xsyr2.hpp
+++ b/src/routines/level2/xsyr2.hpp
@@ -31,12 +31,12 @@ class Xsyr2: public Xher2<T> {
Xsyr2(Queue &queue, EventPointer event, const std::string &name = "SYR2");
// Templated-precision implementation of the routine
- StatusCode DoSyr2(const Layout layout, const Triangle triangle,
- const size_t n,
- const T alpha,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
- const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
+ void DoSyr2(const Layout layout, const Triangle triangle,
+ const size_t n,
+ const T alpha,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+ const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld);
};
// =================================================================================================
diff --git a/src/routines/level2/xtbmv.cpp b/src/routines/level2/xtbmv.cpp
index e315c544..f4a58ed2 100644
--- a/src/routines/level2/xtbmv.cpp
+++ b/src/routines/level2/xtbmv.cpp
@@ -29,17 +29,15 @@ Xtbmv<T>::Xtbmv(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xtbmv<T>::DoTbmv(const Layout layout, const Triangle triangle,
- const Transpose a_transpose, const Diagonal diagonal,
- const size_t n, const size_t k,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+void Xtbmv<T>::DoTbmv(const Layout layout, const Triangle triangle,
+ const Transpose a_transpose, const Diagonal diagonal,
+ const size_t n, const size_t k,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
// Creates a copy of X: a temporary scratch buffer
auto scratch_buffer = Buffer<T>(context_, n*x_inc + x_offset);
- try {
- x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer);
- } catch (...) { } // Continues: error-code is returned in MatVec
+ x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer);
// The data is either in the upper or lower triangle
size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
@@ -52,20 +50,22 @@ StatusCode Xtbmv<T>::DoTbmv(const Layout layout, const Triangle triangle,
// The specific triangular banded matrix-accesses are implemented in the kernel guarded by the
// ROUTINE_TBMV define.
auto fast_kernels = false;
- auto status = MatVec(layout, a_transpose,
- n, n, static_cast<T>(1),
- a_buffer, a_offset, a_ld,
- scratch_buffer, x_offset, x_inc, static_cast<T>(0),
- x_buffer, x_offset, x_inc,
- fast_kernels, fast_kernels,
- parameter, false, k, 0);
-
- // Returns the proper error code (renames vector Y to X)
- switch(status) {
- case StatusCode::kInvalidVectorY: return StatusCode::kInvalidVectorX;
- case StatusCode::kInvalidIncrementY: return StatusCode::kInvalidIncrementX;
- case StatusCode::kInsufficientMemoryY: return StatusCode::kInsufficientMemoryX;
- default: return status;
+ try {
+ MatVec(layout, a_transpose,
+ n, n, static_cast<T>(1),
+ a_buffer, a_offset, a_ld,
+ scratch_buffer, x_offset, x_inc, static_cast<T>(0),
+ x_buffer, x_offset, x_inc,
+ fast_kernels, fast_kernels,
+ parameter, false, k, 0);
+ } catch (BLASError &e) {
+ // Returns the proper error code (renames vector Y to X)
+ switch (e.status()) {
+ case StatusCode::kInvalidVectorY: throw BLASError(StatusCode::kInvalidVectorX, e.details());
+ case StatusCode::kInvalidIncrementY: throw BLASError(StatusCode::kInvalidIncrementX, e.details());
+ case StatusCode::kInsufficientMemoryY: throw BLASError(StatusCode::kInsufficientMemoryX, e.details());
+ default: throw;
+ }
}
}
diff --git a/src/routines/level2/xtbmv.hpp b/src/routines/level2/xtbmv.hpp
index 389e9705..abd12db6 100644
--- a/src/routines/level2/xtbmv.hpp
+++ b/src/routines/level2/xtbmv.hpp
@@ -35,11 +35,11 @@ class Xtbmv: public Xgemv<T> {
Xtbmv(Queue &queue, EventPointer event, const std::string &name = "TBMV");
// Templated-precision implementation of the routine
- StatusCode DoTbmv(const Layout layout, const Triangle triangle,
- const Transpose a_transpose, const Diagonal diagonal,
- const size_t n, const size_t k,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+ void DoTbmv(const Layout layout, const Triangle triangle,
+ const Transpose a_transpose, const Diagonal diagonal,
+ const size_t n, const size_t k,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
};
// =================================================================================================
diff --git a/src/routines/level2/xtpmv.cpp b/src/routines/level2/xtpmv.cpp
index 46811089..c0d26699 100644
--- a/src/routines/level2/xtpmv.cpp
+++ b/src/routines/level2/xtpmv.cpp
@@ -29,17 +29,15 @@ Xtpmv<T>::Xtpmv(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xtpmv<T>::DoTpmv(const Layout layout, const Triangle triangle,
- const Transpose a_transpose, const Diagonal diagonal,
- const size_t n,
- const Buffer<T> &ap_buffer, const size_t ap_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+void Xtpmv<T>::DoTpmv(const Layout layout, const Triangle triangle,
+ const Transpose a_transpose, const Diagonal diagonal,
+ const size_t n,
+ const Buffer<T> &ap_buffer, const size_t ap_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
// Creates a copy of X: a temporary scratch buffer
auto scratch_buffer = Buffer<T>(context_, n*x_inc + x_offset);
- try {
- x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer);
- } catch (...) { } // Continues: error-code is returned in MatVec
+ x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer);
// The data is either in the upper or lower triangle
size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
@@ -52,20 +50,22 @@ StatusCode Xtpmv<T>::DoTpmv(const Layout layout, const Triangle triangle,
// The specific triangular packed matrix-accesses are implemented in the kernel guarded by the
// ROUTINE_TPMV define.
auto fast_kernels = false;
- auto status = MatVec(layout, a_transpose,
- n, n, static_cast<T>(1),
- ap_buffer, ap_offset, n,
- scratch_buffer, x_offset, x_inc, static_cast<T>(0),
- x_buffer, x_offset, x_inc,
- fast_kernels, fast_kernels,
- parameter, true, 0, 0);
-
- // Returns the proper error code (renames vector Y to X)
- switch(status) {
- case StatusCode::kInvalidVectorY: return StatusCode::kInvalidVectorX;
- case StatusCode::kInvalidIncrementY: return StatusCode::kInvalidIncrementX;
- case StatusCode::kInsufficientMemoryY: return StatusCode::kInsufficientMemoryX;
- default: return status;
+ try {
+ MatVec(layout, a_transpose,
+ n, n, static_cast<T>(1),
+ ap_buffer, ap_offset, n,
+ scratch_buffer, x_offset, x_inc, static_cast<T>(0),
+ x_buffer, x_offset, x_inc,
+ fast_kernels, fast_kernels,
+ parameter, true, 0, 0);
+ } catch (BLASError &e) {
+ // Returns the proper error code (renames vector Y to X)
+ switch (e.status()) {
+ case StatusCode::kInvalidVectorY: throw BLASError(StatusCode::kInvalidVectorX, e.details());
+ case StatusCode::kInvalidIncrementY: throw BLASError(StatusCode::kInvalidIncrementX, e.details());
+ case StatusCode::kInsufficientMemoryY: throw BLASError(StatusCode::kInsufficientMemoryX, e.details());
+ default: throw;
+ }
}
}
diff --git a/src/routines/level2/xtpmv.hpp b/src/routines/level2/xtpmv.hpp
index 0e8cf1d2..5b3954e8 100644
--- a/src/routines/level2/xtpmv.hpp
+++ b/src/routines/level2/xtpmv.hpp
@@ -35,11 +35,11 @@ class Xtpmv: public Xgemv<T> {
Xtpmv(Queue &queue, EventPointer event, const std::string &name = "TPMV");
// Templated-precision implementation of the routine
- StatusCode DoTpmv(const Layout layout, const Triangle triangle,
- const Transpose a_transpose, const Diagonal diagonal,
- const size_t n,
- const Buffer<T> &ap_buffer, const size_t ap_offset,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+ void DoTpmv(const Layout layout, const Triangle triangle,
+ const Transpose a_transpose, const Diagonal diagonal,
+ const size_t n,
+ const Buffer<T> &ap_buffer, const size_t ap_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
};
// =================================================================================================
diff --git a/src/routines/level2/xtrmv.cpp b/src/routines/level2/xtrmv.cpp
index d2f24252..5fff9b31 100644
--- a/src/routines/level2/xtrmv.cpp
+++ b/src/routines/level2/xtrmv.cpp
@@ -29,17 +29,15 @@ Xtrmv<T>::Xtrmv(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xtrmv<T>::DoTrmv(const Layout layout, const Triangle triangle,
- const Transpose a_transpose, const Diagonal diagonal,
- const size_t n,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+void Xtrmv<T>::DoTrmv(const Layout layout, const Triangle triangle,
+ const Transpose a_transpose, const Diagonal diagonal,
+ const size_t n,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
// Creates a copy of X: a temporary scratch buffer
auto scratch_buffer = Buffer<T>(context_, n*x_inc + x_offset);
- try {
- x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer);
- } catch (...) { } // Continues: error-code is returned in MatVec
+ x_buffer.CopyTo(queue_, n*x_inc + x_offset, scratch_buffer);
// The data is either in the upper or lower triangle
size_t is_upper = ((triangle == Triangle::kUpper && layout != Layout::kRowMajor) ||
@@ -52,20 +50,22 @@ StatusCode Xtrmv<T>::DoTrmv(const Layout layout, const Triangle triangle,
// The specific triangular matrix-accesses are implemented in the kernel guarded by the
// ROUTINE_TRMV define.
auto fast_kernels = false;
- auto status = MatVec(layout, a_transpose,
- n, n, static_cast<T>(1),
- a_buffer, a_offset, a_ld,
- scratch_buffer, x_offset, x_inc, static_cast<T>(0),
- x_buffer, x_offset, x_inc,
- fast_kernels, fast_kernels,
- parameter, false, 0, 0);
-
- // Returns the proper error code (renames vector Y to X)
- switch(status) {
- case StatusCode::kInvalidVectorY: return StatusCode::kInvalidVectorX;
- case StatusCode::kInvalidIncrementY: return StatusCode::kInvalidIncrementX;
- case StatusCode::kInsufficientMemoryY: return StatusCode::kInsufficientMemoryX;
- default: return status;
+ try {
+ MatVec(layout, a_transpose,
+ n, n, static_cast<T>(1),
+ a_buffer, a_offset, a_ld,
+ scratch_buffer, x_offset, x_inc, static_cast<T>(0),
+ x_buffer, x_offset, x_inc,
+ fast_kernels, fast_kernels,
+ parameter, false, 0, 0);
+ } catch (BLASError &e) {
+ // Returns the proper error code (renames vector Y to X)
+ switch (e.status()) {
+ case StatusCode::kInvalidVectorY: throw BLASError(StatusCode::kInvalidVectorX, e.details());
+ case StatusCode::kInvalidIncrementY: throw BLASError(StatusCode::kInvalidIncrementX, e.details());
+ case StatusCode::kInsufficientMemoryY: throw BLASError(StatusCode::kInsufficientMemoryX, e.details());
+ default: throw;
+ }
}
}
diff --git a/src/routines/level2/xtrmv.hpp b/src/routines/level2/xtrmv.hpp
index 07dd7841..b028ee68 100644
--- a/src/routines/level2/xtrmv.hpp
+++ b/src/routines/level2/xtrmv.hpp
@@ -35,11 +35,11 @@ class Xtrmv: public Xgemv<T> {
Xtrmv(Queue &queue, EventPointer event, const std::string &name = "TRMV");
// Templated-precision implementation of the routine
- StatusCode DoTrmv(const Layout layout, const Triangle triangle,
- const Transpose a_transpose, const Diagonal diagonal,
- const size_t n,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+ void DoTrmv(const Layout layout, const Triangle triangle,
+ const Transpose a_transpose, const Diagonal diagonal,
+ const size_t n,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
};
// =================================================================================================
diff --git a/src/routines/level3/xgemm.cpp b/src/routines/level3/xgemm.cpp
index 1602c69f..4f70dc7a 100644
--- a/src/routines/level3/xgemm.cpp
+++ b/src/routines/level3/xgemm.cpp
@@ -24,8 +24,7 @@ template <typename T>
Xgemm<T>::Xgemm(Queue &queue, EventPointer event, const std::string &name):
Routine(queue, event, name,
{"Copy","Pad","Transpose","Padtranspose","Xgemm","XgemmDirect","KernelSelection"},
- PrecisionValue<T>()) {
- source_string_ =
+ PrecisionValue<T>(), {}, {
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
#include "../../kernels/level3/copy_pad.opencl"
@@ -37,30 +36,28 @@ Xgemm<T>::Xgemm(Queue &queue, EventPointer event, const std::string &name):
#include "../../kernels/level3/xgemm_direct_part1.opencl"
#include "../../kernels/level3/xgemm_direct_part2.opencl"
#include "../../kernels/level3/xgemm_direct_part3.opencl"
- ;
- auto source_string_part_2 = // separated in two parts to prevent C1091 in MSVC 2013
+ , // separated in two parts to prevent C1091 in MSVC 2013
#include "../../kernels/level3/xgemm_part1.opencl"
#include "../../kernels/level3/xgemm_part2.opencl"
#include "../../kernels/level3/xgemm_part3.opencl"
- ;
- source_string_ += source_string_part_2;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xgemm<T>::DoGemm(const Layout layout,
- const Transpose a_transpose, const Transpose b_transpose,
- const size_t m, const size_t n, const size_t k,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
- const T beta,
- const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) {
+void Xgemm<T>::DoGemm(const Layout layout,
+ const Transpose a_transpose, const Transpose b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
+ const T beta,
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) {
// Makes sure all dimensions are larger than zero
- if ((m == 0) || (n == 0) || (k == 0)) { return StatusCode::kInvalidDimension; }
+ if ((m == 0) || (n == 0) || (k == 0)) { throw BLASError(StatusCode::kInvalidDimension); }
// Computes whether or not the matrices are transposed in memory. This is based on their layout
// (row or column-major) and whether or not they are requested to be pre-transposed. Note
@@ -99,12 +96,9 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout,
// matrix A cannot be less than K when rotated, or less than M when not-rotated
// matrix B cannot be less than N when rotated, or less than K when not-rotated
// matrix C cannot be less than N when rotated, or less than M when not-rotated
- auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld);
- if (ErrorIn(status)) { return status; }
- status = TestMatrixB(b_one, b_two, b_buffer, b_offset, b_ld);
- if (ErrorIn(status)) { return status; }
- status = TestMatrixC(c_one, c_two, c_buffer, c_offset, c_ld);
- if (ErrorIn(status)) { return status; }
+ TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld);
+ TestMatrixB(b_one, b_two, b_buffer, b_offset, b_ld);
+ TestMatrixC(c_one, c_two, c_buffer, c_offset, c_ld);
// Selects which version of GEMM to run
const auto do_gemm_direct = (m * n * k < db_["XGEMM_MIN_INDIRECT_SIZE"]);
@@ -131,7 +125,7 @@ StatusCode Xgemm<T>::DoGemm(const Layout layout,
// requirements, but several pre and post-processing kernels take care of those. However, the
// overhead of these extra kernels might not be ideal for certain devices/arguments.
template <typename T>
-StatusCode Xgemm<T>::GemmIndirect(const size_t m, const size_t n, const size_t k,
+void Xgemm<T>::GemmIndirect(const size_t m, const size_t n, const size_t k,
const T alpha,
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
@@ -142,8 +136,6 @@ StatusCode Xgemm<T>::GemmIndirect(const size_t m, const size_t n, const size_t k
const size_t a_one, const size_t a_two, const bool a_want_rotated,
const size_t b_one, const size_t b_two, const bool b_want_rotated,
const size_t c_one, const size_t c_two, const bool c_want_rotated) {
- auto status = StatusCode::kSuccess;
-
// Calculates the ceiled versions of m, n, and k
const auto m_ceiled = Ceil(m, db_["MWG"]);
const auto n_ceiled = Ceil(n, db_["NWG"]);
@@ -158,109 +150,95 @@ StatusCode Xgemm<T>::GemmIndirect(const size_t m, const size_t n, const size_t k
const auto c_one_i = (c_want_rotated) ? n_ceiled : m_ceiled;
const auto c_two_i = (c_want_rotated) ? m_ceiled : n_ceiled;
- // The padded/transposed input/output matrices: if memory allocation fails, throw an exception
- try {
-
- // Loads the program from the database
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
-
- // Determines whether or not temporary matrices are needed
- auto a_no_temp = a_one == a_one_i && a_two == a_two_i && a_ld == a_one && a_offset == 0 &&
- a_do_transpose == false && a_conjugate == false;
- auto b_no_temp = b_one == b_one_i && b_two == b_two_i && b_ld == b_one && b_offset == 0 &&
- b_do_transpose == false && b_conjugate == false;
- auto c_no_temp = c_one == c_one_i && c_two == c_two_i && c_ld == c_one && c_offset == 0 &&
- c_do_transpose == false;
-
- // Creates the temporary matrices
- const auto a_temp = (a_no_temp) ? a_buffer : Buffer<T>(context_, a_one_i*a_two_i);
- const auto b_temp = (b_no_temp) ? b_buffer : Buffer<T>(context_, b_one_i*b_two_i);
- const auto c_temp = (c_no_temp) ? c_buffer : Buffer<T>(context_, c_one_i*c_two_i);
-
- // Events of all kernels (including pre/post processing kernels)
- auto eventWaitList = std::vector<Event>();
- auto emptyEventList = std::vector<Event>();
-
- // Runs the pre-processing kernel for matrix A. This transposes the matrix, but also pads zeros
- // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In
- // case nothing has to be done, these kernels can be skipped.
- if (!a_no_temp) {
- auto eventProcessA = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA.pointer(), emptyEventList,
- a_one, a_two, a_ld, a_offset, a_buffer,
- a_one_i, a_two_i, a_one_i, 0, a_temp,
- ConstantOne<T>(), program,
- true, a_do_transpose, a_conjugate);
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(eventProcessA);
- }
-
- // As above, but now for matrix B
- if (!b_no_temp) {
- auto eventProcessB = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB.pointer(), emptyEventList,
- b_one, b_two, b_ld, b_offset, b_buffer,
- b_one_i, b_two_i, b_one_i, 0, b_temp,
- ConstantOne<T>(), program,
- true, b_do_transpose, b_conjugate);
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(eventProcessB);
- }
-
- // As above, but now for matrix C. This is only necessary if C is used both as input and output.
- if (!c_no_temp && beta != static_cast<T>(0)) {
- auto eventProcessC = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
- c_one, c_two, c_ld, c_offset, c_buffer,
- c_one_i, c_two_i, c_one_i, 0, c_temp,
- ConstantOne<T>(), program,
- true, c_do_transpose, false);
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(eventProcessC);
- }
-
- // Retrieves the Xgemm kernel from the compiled binary
- try {
- auto kernel = Kernel(program, "Xgemm");
-
- // Sets the kernel arguments
- kernel.SetArgument(0, static_cast<int>(m_ceiled));
- kernel.SetArgument(1, static_cast<int>(n_ceiled));
- kernel.SetArgument(2, static_cast<int>(k_ceiled));
- kernel.SetArgument(3, GetRealArg(alpha));
- kernel.SetArgument(4, GetRealArg(beta));
- kernel.SetArgument(5, a_temp());
- kernel.SetArgument(6, b_temp());
- kernel.SetArgument(7, c_temp());
-
- // Computes the global and local thread sizes
- const auto global = std::vector<size_t>{
- (c_one_i * db_["MDIMC"]) / db_["MWG"],
- (c_two_i * db_["NDIMC"]) / db_["NWG"]
- };
- const auto local = std::vector<size_t>{db_["MDIMC"], db_["NDIMC"]};
-
- // Launches the kernel
- auto eventKernel = Event();
- auto eventPointer = (!c_no_temp) ? eventKernel.pointer() : event_;
- status = RunKernel(kernel, queue_, device_, global, local, eventPointer, eventWaitList);
- if (ErrorIn(status)) { return status; }
-
- // Runs the post-processing kernel if needed
- if (!c_no_temp) {
- eventWaitList.push_back(eventKernel);
- status = PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
- c_one_i, c_two_i, c_one_i, 0, c_temp,
- c_one, c_two, c_ld, c_offset, c_buffer,
- ConstantOne<T>(), program,
- false, c_do_transpose, false);
- if (ErrorIn(status)) { return status; }
- }
-
- // Successfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
- } catch (...) { return StatusCode::kTempBufferAllocFailure; }
+ // Loads the program from the database
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+
+ // Determines whether or not temporary matrices are needed
+ auto a_no_temp = a_one == a_one_i && a_two == a_two_i && a_ld == a_one && a_offset == 0 &&
+ a_do_transpose == false && a_conjugate == false;
+ auto b_no_temp = b_one == b_one_i && b_two == b_two_i && b_ld == b_one && b_offset == 0 &&
+ b_do_transpose == false && b_conjugate == false;
+ auto c_no_temp = c_one == c_one_i && c_two == c_two_i && c_ld == c_one && c_offset == 0 &&
+ c_do_transpose == false;
+
+ // Creates the temporary matrices
+ const auto a_temp = (a_no_temp) ? a_buffer : Buffer<T>(context_, a_one_i*a_two_i);
+ const auto b_temp = (b_no_temp) ? b_buffer : Buffer<T>(context_, b_one_i*b_two_i);
+ const auto c_temp = (c_no_temp) ? c_buffer : Buffer<T>(context_, c_one_i*c_two_i);
+
+ // Events of all kernels (including pre/post processing kernels)
+ auto eventWaitList = std::vector<Event>();
+ auto emptyEventList = std::vector<Event>();
+
+ // Runs the pre-processing kernel for matrix A. This transposes the matrix, but also pads zeros
+ // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In
+ // case nothing has to be done, these kernels can be skipped.
+ if (!a_no_temp) {
+ auto eventProcessA = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA.pointer(), emptyEventList,
+ a_one, a_two, a_ld, a_offset, a_buffer,
+ a_one_i, a_two_i, a_one_i, 0, a_temp,
+ ConstantOne<T>(), program,
+ true, a_do_transpose, a_conjugate);
+ eventWaitList.push_back(eventProcessA);
+ }
+
+ // As above, but now for matrix B
+ if (!b_no_temp) {
+ auto eventProcessB = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB.pointer(), emptyEventList,
+ b_one, b_two, b_ld, b_offset, b_buffer,
+ b_one_i, b_two_i, b_one_i, 0, b_temp,
+ ConstantOne<T>(), program,
+ true, b_do_transpose, b_conjugate);
+ eventWaitList.push_back(eventProcessB);
+ }
+
+ // As above, but now for matrix C. This is only necessary if C is used both as input and output.
+ if (!c_no_temp && beta != static_cast<T>(0)) {
+ auto eventProcessC = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
+ c_one, c_two, c_ld, c_offset, c_buffer,
+ c_one_i, c_two_i, c_one_i, 0, c_temp,
+ ConstantOne<T>(), program,
+ true, c_do_transpose, false);
+ eventWaitList.push_back(eventProcessC);
+ }
+
+ // Retrieves the Xgemm kernel from the compiled binary
+ auto kernel = Kernel(program, "Xgemm");
+
+ // Sets the kernel arguments
+ kernel.SetArgument(0, static_cast<int>(m_ceiled));
+ kernel.SetArgument(1, static_cast<int>(n_ceiled));
+ kernel.SetArgument(2, static_cast<int>(k_ceiled));
+ kernel.SetArgument(3, GetRealArg(alpha));
+ kernel.SetArgument(4, GetRealArg(beta));
+ kernel.SetArgument(5, a_temp());
+ kernel.SetArgument(6, b_temp());
+ kernel.SetArgument(7, c_temp());
+
+ // Computes the global and local thread sizes
+ const auto global = std::vector<size_t>{
+ (c_one_i * db_["MDIMC"]) / db_["MWG"],
+ (c_two_i * db_["NDIMC"]) / db_["NWG"]
+ };
+ const auto local = std::vector<size_t>{db_["MDIMC"], db_["NDIMC"]};
+
+ // Launches the kernel
+ auto eventKernel = Event();
+ auto eventPointer = (!c_no_temp) ? eventKernel.pointer() : event_;
+ RunKernel(kernel, queue_, device_, global, local, eventPointer, eventWaitList);
+
+ // Runs the post-processing kernel if needed
+ if (!c_no_temp) {
+ eventWaitList.push_back(eventKernel);
+ PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
+ c_one_i, c_two_i, c_one_i, 0, c_temp,
+ c_one, c_two, c_ld, c_offset, c_buffer,
+ ConstantOne<T>(), program,
+ false, c_do_transpose, false);
+ }
}
@@ -268,7 +246,7 @@ StatusCode Xgemm<T>::GemmIndirect(const size_t m, const size_t n, const size_t k
// The direct version of GEMM, requiring just one kernel, no pre or post-processing kernels.
template <typename T>
-StatusCode Xgemm<T>::GemmDirect(const size_t m, const size_t n, const size_t k,
+void Xgemm<T>::GemmDirect(const size_t m, const size_t n, const size_t k,
const T alpha,
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
@@ -281,46 +259,40 @@ StatusCode Xgemm<T>::GemmDirect(const size_t m, const size_t n, const size_t k,
const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
// Retrieves the proper XgemmDirect kernel from the compiled binary
- try {
- const auto name = (a_do_transpose) ? (b_do_transpose ? "XgemmDirectTT" : "XgemmDirectTN") :
- (b_do_transpose ? "XgemmDirectNT" : "XgemmDirectNN");
- auto kernel = Kernel(program, name);
-
- // Sets the kernel arguments
- kernel.SetArgument(0, static_cast<int>(m));
- kernel.SetArgument(1, static_cast<int>(n));
- kernel.SetArgument(2, static_cast<int>(k));
- kernel.SetArgument(3, GetRealArg(alpha));
- kernel.SetArgument(4, GetRealArg(beta));
- kernel.SetArgument(5, a_buffer());
- kernel.SetArgument(6, static_cast<int>(a_offset));
- kernel.SetArgument(7, static_cast<int>(a_ld));
- kernel.SetArgument(8, b_buffer());
- kernel.SetArgument(9, static_cast<int>(b_offset));
- kernel.SetArgument(10, static_cast<int>(b_ld));
- kernel.SetArgument(11, c_buffer());
- kernel.SetArgument(12, static_cast<int>(c_offset));
- kernel.SetArgument(13, static_cast<int>(c_ld));
- kernel.SetArgument(14, static_cast<int>(c_do_transpose));
- kernel.SetArgument(15, static_cast<int>(a_conjugate));
- kernel.SetArgument(16, static_cast<int>(b_conjugate));
-
- // Computes the global and local thread sizes
- const auto m_ceiled = Ceil(m, db_["WGD"]);
- const auto n_ceiled = Ceil(n, db_["WGD"]);
- const auto global = std::vector<size_t>{
- (m_ceiled * db_["MDIMCD"]) / db_["WGD"],
- (n_ceiled * db_["NDIMCD"]) / db_["WGD"]
- };
- const auto local = std::vector<size_t>{db_["MDIMCD"], db_["NDIMCD"]};
-
- // Launches the kernel
- auto status = RunKernel(kernel, queue_, device_, global, local, event_);
- if (ErrorIn(status)) { return status; }
-
- // Successfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
+ const auto name = (a_do_transpose) ? (b_do_transpose ? "XgemmDirectTT" : "XgemmDirectTN") :
+ (b_do_transpose ? "XgemmDirectNT" : "XgemmDirectNN");
+ auto kernel = Kernel(program, name);
+
+ // Sets the kernel arguments
+ kernel.SetArgument(0, static_cast<int>(m));
+ kernel.SetArgument(1, static_cast<int>(n));
+ kernel.SetArgument(2, static_cast<int>(k));
+ kernel.SetArgument(3, GetRealArg(alpha));
+ kernel.SetArgument(4, GetRealArg(beta));
+ kernel.SetArgument(5, a_buffer());
+ kernel.SetArgument(6, static_cast<int>(a_offset));
+ kernel.SetArgument(7, static_cast<int>(a_ld));
+ kernel.SetArgument(8, b_buffer());
+ kernel.SetArgument(9, static_cast<int>(b_offset));
+ kernel.SetArgument(10, static_cast<int>(b_ld));
+ kernel.SetArgument(11, c_buffer());
+ kernel.SetArgument(12, static_cast<int>(c_offset));
+ kernel.SetArgument(13, static_cast<int>(c_ld));
+ kernel.SetArgument(14, static_cast<int>(c_do_transpose));
+ kernel.SetArgument(15, static_cast<int>(a_conjugate));
+ kernel.SetArgument(16, static_cast<int>(b_conjugate));
+
+ // Computes the global and local thread sizes
+ const auto m_ceiled = Ceil(m, db_["WGD"]);
+ const auto n_ceiled = Ceil(n, db_["WGD"]);
+ const auto global = std::vector<size_t>{
+ (m_ceiled * db_["MDIMCD"]) / db_["WGD"],
+ (n_ceiled * db_["NDIMCD"]) / db_["WGD"]
+ };
+ const auto local = std::vector<size_t>{db_["MDIMCD"], db_["NDIMCD"]};
+
+ // Launches the kernel
+ RunKernel(kernel, queue_, device_, global, local, event_);
}
// =================================================================================================
diff --git a/src/routines/level3/xgemm.hpp b/src/routines/level3/xgemm.hpp
index 46e12453..c61611b6 100644
--- a/src/routines/level3/xgemm.hpp
+++ b/src/routines/level3/xgemm.hpp
@@ -28,36 +28,36 @@ class Xgemm: public Routine {
Xgemm(Queue &queue, EventPointer event, const std::string &name = "GEMM");
// Templated-precision implementation of the routine
- StatusCode DoGemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
- const size_t m, const size_t n, const size_t k,
+ void DoGemm(const Layout layout, const Transpose a_transpose, const Transpose b_transpose,
+ const size_t m, const size_t n, const size_t k,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
+ const T beta,
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
+
+ // Indirect version of GEMM (with pre and post-processing kernels)
+ void GemmIndirect(const size_t m, const size_t n, const size_t k,
const T alpha,
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
const T beta,
- const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
-
- // Indirect version of GEMM (with pre and post-processing kernels)
- StatusCode GemmIndirect(const size_t m, const size_t n, const size_t k,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
- const T beta,
- const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld,
- const bool a_do_transpose, const bool b_do_transpose, const bool c_do_transpose,
- const bool a_conjugate, const bool b_conjugate,
- const size_t a_one, const size_t a_two, const bool a_want_rotated,
- const size_t b_one, const size_t b_two, const bool b_want_rotated,
- const size_t c_one, const size_t c_two, const bool c_want_rotated);
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld,
+ const bool a_do_transpose, const bool b_do_transpose, const bool c_do_transpose,
+ const bool a_conjugate, const bool b_conjugate,
+ const size_t a_one, const size_t a_two, const bool a_want_rotated,
+ const size_t b_one, const size_t b_two, const bool b_want_rotated,
+ const size_t c_one, const size_t c_two, const bool c_want_rotated);
// Direct version of GEMM (no pre and post-processing kernels)
- StatusCode GemmDirect(const size_t m, const size_t n, const size_t k,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
- const T beta,
- const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld,
- const bool a_do_transpose, const bool b_do_transpose, const bool c_do_transpose,
- const bool a_conjugate, const bool b_conjugate);
+ void GemmDirect(const size_t m, const size_t n, const size_t k,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
+ const T beta,
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld,
+ const bool a_do_transpose, const bool b_do_transpose, const bool c_do_transpose,
+ const bool a_conjugate, const bool b_conjugate);
};
// =================================================================================================
diff --git a/src/routines/level3/xhemm.cpp b/src/routines/level3/xhemm.cpp
index 9813503e..e5b1502a 100644
--- a/src/routines/level3/xhemm.cpp
+++ b/src/routines/level3/xhemm.cpp
@@ -29,7 +29,7 @@ Xhemm<T>::Xhemm(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xhemm<T>::DoHemm(const Layout layout, const Side side, const Triangle triangle,
+void Xhemm<T>::DoHemm(const Layout layout, const Side side, const Triangle triangle,
const size_t m, const size_t n,
const T alpha,
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
@@ -38,15 +38,14 @@ StatusCode Xhemm<T>::DoHemm(const Layout layout, const Side side, const Triangle
const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) {
// Makes sure all dimensions are larger than zero
- if ((m == 0) || (n == 0) ) { return StatusCode::kInvalidDimension; }
+ if ((m == 0) || (n == 0) ) { throw BLASError(StatusCode::kInvalidDimension); }
// Computes the k dimension. This is based on whether or not the hermitian matrix is A (on the
// left) or B (on the right) in the Xgemm routine.
auto k = (side == Side::kLeft) ? m : n;
// Checks for validity of the squared A matrix
- auto status = TestMatrixA(k, k, a_buffer, a_offset, a_ld);
- if (ErrorIn(status)) { return status; }
+ TestMatrixA(k, k, a_buffer, a_offset, a_ld);
// Determines which kernel to run based on the layout (the Xgemm kernel assumes column-major as
// default) and on whether we are dealing with an upper or lower triangle of the hermitian matrix
@@ -55,73 +54,68 @@ StatusCode Xhemm<T>::DoHemm(const Layout layout, const Side side, const Triangle
auto kernel_name = (is_upper) ? "HermUpperToSquared" : "HermLowerToSquared";
// Temporary buffer for a copy of the hermitian matrix
- try {
- auto temp_herm = Buffer<T>(context_, k*k);
-
- // Creates a general matrix from the hermitian matrix to be able to run the regular Xgemm
- // routine afterwards
+ auto temp_herm = Buffer<T>(context_, k*k);
+
+ // Creates a general matrix from the hermitian matrix to be able to run the regular Xgemm
+ // routine afterwards
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel = Kernel(program, kernel_name);
+
+ // Sets the arguments for the hermitian-to-squared kernel
+ kernel.SetArgument(0, static_cast<int>(k));
+ kernel.SetArgument(1, static_cast<int>(a_ld));
+ kernel.SetArgument(2, static_cast<int>(a_offset));
+ kernel.SetArgument(3, a_buffer());
+ kernel.SetArgument(4, static_cast<int>(k));
+ kernel.SetArgument(5, static_cast<int>(k));
+ kernel.SetArgument(6, static_cast<int>(0));
+ kernel.SetArgument(7, temp_herm());
+
+ // Uses the common padding kernel's thread configuration. This is allowed, since the
+ // hermitian-to-squared kernel uses the same parameters.
+ auto global = std::vector<size_t>{Ceil(CeilDiv(k, db_["PAD_WPTX"]), db_["PAD_DIMX"]),
+ Ceil(CeilDiv(k, db_["PAD_WPTY"]), db_["PAD_DIMY"])};
+ auto local = std::vector<size_t>{db_["PAD_DIMX"], db_["PAD_DIMY"]};
+ auto kernelEvent = Event();
+ RunKernel(kernel, queue_, device_, global, local, kernelEvent.pointer());
+
+ // Synchronize now: 'DoGemm' does not accept a list of events to wait for
+ kernelEvent.WaitForCompletion();
+
+ // Runs the regular Xgemm code with either "C := AB+C" or ...
+ if (side == Side::kLeft) {
+ DoGemm(layout, Transpose::kNo, Transpose::kNo,
+ m, n, k,
+ alpha,
+ temp_herm, 0, k,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld);
+ }
+
+ // ... with "C := BA+C". Note that A and B are now reversed.
+ else {
try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, kernel_name);
-
- // Sets the arguments for the hermitian-to-squared kernel
- kernel.SetArgument(0, static_cast<int>(k));
- kernel.SetArgument(1, static_cast<int>(a_ld));
- kernel.SetArgument(2, static_cast<int>(a_offset));
- kernel.SetArgument(3, a_buffer());
- kernel.SetArgument(4, static_cast<int>(k));
- kernel.SetArgument(5, static_cast<int>(k));
- kernel.SetArgument(6, static_cast<int>(0));
- kernel.SetArgument(7, temp_herm());
-
- // Uses the common padding kernel's thread configuration. This is allowed, since the
- // hermitian-to-squared kernel uses the same parameters.
- auto global = std::vector<size_t>{Ceil(CeilDiv(k, db_["PAD_WPTX"]), db_["PAD_DIMX"]),
- Ceil(CeilDiv(k, db_["PAD_WPTY"]), db_["PAD_DIMY"])};
- auto local = std::vector<size_t>{db_["PAD_DIMX"], db_["PAD_DIMY"]};
- auto kernelEvent = Event();
- status = RunKernel(kernel, queue_, device_, global, local, kernelEvent.pointer());
- if (ErrorIn(status)) { return status; }
-
- // Synchronize now: 'DoGemm' does not accept a list of events to wait for
- kernelEvent.WaitForCompletion();
-
- // Runs the regular Xgemm code with either "C := AB+C" or ...
- if (side == Side::kLeft) {
- status = DoGemm(layout, Transpose::kNo, Transpose::kNo,
- m, n, k,
- alpha,
- temp_herm, 0, k,
- b_buffer, b_offset, b_ld,
- beta,
- c_buffer, c_offset, c_ld);
- }
-
- // ... with "C := BA+C". Note that A and B are now reversed.
- else {
- status = DoGemm(layout, Transpose::kNo, Transpose::kNo,
- m, n, k,
- alpha,
- b_buffer, b_offset, b_ld,
- temp_herm, 0, k,
- beta,
- c_buffer, c_offset, c_ld);
-
- // A and B are now reversed, so also reverse the error codes returned from the Xgemm routine
- switch(status) {
- case StatusCode::kInvalidMatrixA: status = StatusCode::kInvalidMatrixB; break;
- case StatusCode::kInvalidMatrixB: status = StatusCode::kInvalidMatrixA; break;
- case StatusCode::kInvalidLeadDimA: status = StatusCode::kInvalidLeadDimB; break;
- case StatusCode::kInvalidLeadDimB: status = StatusCode::kInvalidLeadDimA; break;
- case StatusCode::kInsufficientMemoryA: status = StatusCode::kInsufficientMemoryB; break;
- case StatusCode::kInsufficientMemoryB: status = StatusCode::kInsufficientMemoryA; break;
- }
+ DoGemm(layout, Transpose::kNo, Transpose::kNo,
+ m, n, k,
+ alpha,
+ b_buffer, b_offset, b_ld,
+ temp_herm, 0, k,
+ beta,
+ c_buffer, c_offset, c_ld);
+ } catch (BLASError &e) {
+ // A and B are now reversed, so also reverse the error codes returned from the Xgemm routine
+ switch(e.status()) {
+ case StatusCode::kInvalidMatrixA: throw BLASError(StatusCode::kInvalidMatrixB, e.details());
+ case StatusCode::kInvalidMatrixB: throw BLASError(StatusCode::kInvalidMatrixA, e.details());
+ case StatusCode::kInvalidLeadDimA: throw BLASError(StatusCode::kInvalidLeadDimB, e.details());
+ case StatusCode::kInvalidLeadDimB: throw BLASError(StatusCode::kInvalidLeadDimA, e.details());
+ case StatusCode::kInsufficientMemoryA: throw BLASError(StatusCode::kInsufficientMemoryB, e.details());
+ case StatusCode::kInsufficientMemoryB: throw BLASError(StatusCode::kInsufficientMemoryA, e.details());
+ default: throw;
}
-
- // Return the status of the Xgemm routine
- return status;
- } catch (...) { return StatusCode::kInvalidKernel; }
- } catch (...) { return StatusCode::kTempBufferAllocFailure; }
+ }
+ }
}
// =================================================================================================
diff --git a/src/routines/level3/xhemm.hpp b/src/routines/level3/xhemm.hpp
index 272bd2ec..2385706e 100644
--- a/src/routines/level3/xhemm.hpp
+++ b/src/routines/level3/xhemm.hpp
@@ -37,13 +37,13 @@ class Xhemm: public Xgemm<T> {
Xhemm(Queue &queue, EventPointer event, const std::string &name = "HEMM");
// Templated-precision implementation of the routine
- StatusCode DoHemm(const Layout layout, const Side side, const Triangle triangle,
- const size_t m, const size_t n,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
- const T beta,
- const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
+ void DoHemm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
+ const T beta,
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
};
// =================================================================================================
diff --git a/src/routines/level3/xher2k.cpp b/src/routines/level3/xher2k.cpp
index bf328729..ee3bb8b8 100644
--- a/src/routines/level3/xher2k.cpp
+++ b/src/routines/level3/xher2k.cpp
@@ -22,8 +22,7 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xher2k<T,U>::Xher2k(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
#include "../../kernels/level3/copy_pad.opencl"
@@ -32,23 +31,23 @@ Xher2k<T,U>::Xher2k(Queue &queue, EventPointer event, const std::string &name):
#include "../../kernels/level3/xgemm_part1.opencl"
#include "../../kernels/level3/xgemm_part2.opencl"
#include "../../kernels/level3/xgemm_part3.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T, typename U>
-StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
- const size_t n, const size_t k,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
- const U beta,
- const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) {
+void Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
+ const U beta,
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) {
// Makes sure all dimensions are larger than zero
- if ((n == 0) || (k == 0) ) { return StatusCode::kInvalidDimension; }
+ if ((n == 0) || (k == 0) ) { throw BLASError(StatusCode::kInvalidDimension); }
// Determines whether to apply the conjugate transpose to matrix B (argument: no transpose) or
// to matrix A (argument: conjugate transpose)
@@ -71,12 +70,9 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co
// matrix A cannot be less than N when rotated, or less than K when not-rotated
// matrix B cannot be less than N when rotated, or less than K when not-rotated
// matrix C cannot be less than N
- auto status = TestMatrixA(ab_one, ab_two, a_buffer, a_offset, a_ld);
- if (ErrorIn(status)) { return status; }
- status = TestMatrixB(ab_one, ab_two, b_buffer, b_offset, b_ld);
- if (ErrorIn(status)) { return status; }
- status = TestMatrixC(n, n, c_buffer, c_offset, c_ld);
- if (ErrorIn(status)) { return status; }
+ TestMatrixA(ab_one, ab_two, a_buffer, a_offset, a_ld);
+ TestMatrixB(ab_one, ab_two, b_buffer, b_offset, b_ld);
+ TestMatrixC(n, n, c_buffer, c_offset, c_ld);
// Calculates the ceiled versions of n and k
auto n_ceiled = Ceil(Ceil(n, db_["MWG"]), db_["NWG"]);
@@ -85,145 +81,128 @@ StatusCode Xher2k<T,U>::DoHer2k(const Layout layout, const Triangle triangle, co
// Decides which kernel to run: the upper-triangular or lower-triangular version
auto kernel_name = (triangle == Triangle::kUpper) ? "XgemmUpper" : "XgemmLower";
- // The padded/transposed input/output matrices: if memory allocation fails, throw an exception
- try {
-
- // Loads the program from the database
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
-
- // Determines whether or not temporary matrices are needed
- auto a1_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
- ab_rotated == false && ab_conjugate == false;
- auto a2_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
- ab_rotated == false && ab_conjugate == true;
- auto b1_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && b_ld == n_ceiled && b_offset == 0 &&
- ab_rotated == false && ab_conjugate == false;
- auto b2_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && b_ld == n_ceiled && b_offset == 0 &&
- ab_rotated == false && ab_conjugate == true;
-
- // Creates the temporary matrices
- auto a1_temp = (a1_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
- auto a2_temp = (a2_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
- auto b1_temp = (b1_no_temp) ? b_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
- auto b2_temp = (b2_no_temp) ? b_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
- auto c_temp = Buffer<T>(context_, n_ceiled*n_ceiled);
-
- // Convert the arguments to complex versions
- auto complex_beta = T{beta, static_cast<U>(0.0)};
-
- // Events of all kernels (including pre/post processing kernels)
- auto eventWaitList = std::vector<Event>();
- auto emptyEventList = std::vector<Event>();
-
- // Runs the pre-processing kernels. This transposes the matrices A and B, but also pads zeros to
- // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In
- // case nothing has to be done, these kernels can be skipped.
- if (!a1_no_temp) {
- auto eventProcessA1 = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA1.pointer(), emptyEventList,
- ab_one, ab_two, a_ld, a_offset, a_buffer,
- n_ceiled, k_ceiled, n_ceiled, 0, a1_temp,
- ConstantOne<T>(), program,
- true, ab_rotated, ab_conjugate);
- eventWaitList.push_back(eventProcessA1);
- if (ErrorIn(status)) { return status; }
- }
- if (!a2_no_temp) {
- auto eventProcessA2 = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA2.pointer(), emptyEventList,
- ab_one, ab_two, a_ld, a_offset, a_buffer,
- n_ceiled, k_ceiled, n_ceiled, 0, a2_temp,
- ConstantOne<T>(), program,
- true, ab_rotated, !ab_conjugate);
- eventWaitList.push_back(eventProcessA2);
- if (ErrorIn(status)) { return status; }
- }
- if (!b1_no_temp) {
- auto eventProcessB1 = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB1.pointer(), emptyEventList,
- ab_one, ab_two, b_ld, b_offset, b_buffer,
- n_ceiled, k_ceiled, n_ceiled, 0, b1_temp,
- ConstantOne<T>(), program,
- true, ab_rotated, ab_conjugate);
- eventWaitList.push_back(eventProcessB1);
- if (ErrorIn(status)) { return status; }
- }
- if (!b2_no_temp) {
- auto eventProcessB2 = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB2.pointer(), emptyEventList,
- ab_one, ab_two, b_ld, b_offset, b_buffer,
- n_ceiled, k_ceiled, n_ceiled, 0, b2_temp,
- ConstantOne<T>(), program,
- true, ab_rotated, !ab_conjugate);
- eventWaitList.push_back(eventProcessB2);
- if (ErrorIn(status)) { return status; }
- }
-
- // Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to
- // modify the other triangle.
- auto eventProcessC = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
- n, n, c_ld, c_offset, c_buffer,
- n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
- ConstantOne<T>(), program,
- true, c_rotated, false);
- eventWaitList.push_back(eventProcessC);
- if (ErrorIn(status)) { return status; }
-
- // Retrieves the XgemmUpper or XgemmLower kernel from the compiled binary
- try {
- auto kernel = Kernel(program, kernel_name);
-
- // Sets the kernel arguments
- kernel.SetArgument(0, static_cast<int>(n_ceiled));
- kernel.SetArgument(1, static_cast<int>(k_ceiled));
- kernel.SetArgument(2, GetRealArg(alpha));
- kernel.SetArgument(3, GetRealArg(complex_beta));
- kernel.SetArgument(4, a1_temp());
- kernel.SetArgument(5, b2_temp());
- kernel.SetArgument(6, c_temp());
-
- // Computes the global and local thread sizes
- auto global = std::vector<size_t>{
- (n_ceiled * db_["MDIMC"]) / db_["MWG"],
- (n_ceiled * db_["NDIMC"]) / db_["NWG"]
- };
- auto local = std::vector<size_t>{db_["MDIMC"], db_["NDIMC"]};
-
- // Launches the kernel
- auto eventKernel1 = Event();
- status = RunKernel(kernel, queue_, device_, global, local, eventKernel1.pointer(), eventWaitList);
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(eventKernel1);
-
- // Swaps the arguments for matrices A and B, sets 'beta' to 1, and conjugate alpha
- auto conjugate_alpha = T{alpha.real(), -alpha.imag()};
- auto complex_one = T{static_cast<U>(1.0), static_cast<U>(0.0)};
- kernel.SetArgument(2, GetRealArg(conjugate_alpha));
- kernel.SetArgument(3, GetRealArg(complex_one));
- kernel.SetArgument(4, b1_temp());
- kernel.SetArgument(5, a2_temp());
-
- // Runs the kernel again
- auto eventKernel2 = Event();
- status = RunKernel(kernel, queue_, device_, global, local, eventKernel2.pointer(), eventWaitList);
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(eventKernel2);
-
- // Runs the post-processing kernel
- auto upper = (triangle == Triangle::kUpper);
- auto lower = (triangle == Triangle::kLower);
- status = PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
- n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
- n, n, c_ld, c_offset, c_buffer,
- ConstantOne<T>(), program,
- false, c_rotated, false, upper, lower, true);
- if (ErrorIn(status)) { return status; }
-
- // Successfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
- } catch (...) { return StatusCode::kTempBufferAllocFailure; }
+ // Loads the program from the database
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+
+ // Determines whether or not temporary matrices are needed
+ auto a1_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
+ ab_rotated == false && ab_conjugate == false;
+ auto a2_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
+ ab_rotated == false && ab_conjugate == true;
+ auto b1_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && b_ld == n_ceiled && b_offset == 0 &&
+ ab_rotated == false && ab_conjugate == false;
+ auto b2_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && b_ld == n_ceiled && b_offset == 0 &&
+ ab_rotated == false && ab_conjugate == true;
+
+ // Creates the temporary matrices
+ auto a1_temp = (a1_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
+ auto a2_temp = (a2_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
+ auto b1_temp = (b1_no_temp) ? b_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
+ auto b2_temp = (b2_no_temp) ? b_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
+ auto c_temp = Buffer<T>(context_, n_ceiled*n_ceiled);
+
+ // Convert the arguments to complex versions
+ auto complex_beta = T{beta, static_cast<U>(0.0)};
+
+ // Events of all kernels (including pre/post processing kernels)
+ auto eventWaitList = std::vector<Event>();
+ auto emptyEventList = std::vector<Event>();
+
+ // Runs the pre-processing kernels. This transposes the matrices A and B, but also pads zeros to
+ // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In
+ // case nothing has to be done, these kernels can be skipped.
+ if (!a1_no_temp) {
+ auto eventProcessA1 = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA1.pointer(), emptyEventList,
+ ab_one, ab_two, a_ld, a_offset, a_buffer,
+ n_ceiled, k_ceiled, n_ceiled, 0, a1_temp,
+ ConstantOne<T>(), program,
+ true, ab_rotated, ab_conjugate);
+ eventWaitList.push_back(eventProcessA1);
+ }
+ if (!a2_no_temp) {
+ auto eventProcessA2 = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA2.pointer(), emptyEventList,
+ ab_one, ab_two, a_ld, a_offset, a_buffer,
+ n_ceiled, k_ceiled, n_ceiled, 0, a2_temp,
+ ConstantOne<T>(), program,
+ true, ab_rotated, !ab_conjugate);
+ eventWaitList.push_back(eventProcessA2);
+ }
+ if (!b1_no_temp) {
+ auto eventProcessB1 = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB1.pointer(), emptyEventList,
+ ab_one, ab_two, b_ld, b_offset, b_buffer,
+ n_ceiled, k_ceiled, n_ceiled, 0, b1_temp,
+ ConstantOne<T>(), program,
+ true, ab_rotated, ab_conjugate);
+ eventWaitList.push_back(eventProcessB1);
+ }
+ if (!b2_no_temp) {
+ auto eventProcessB2 = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB2.pointer(), emptyEventList,
+ ab_one, ab_two, b_ld, b_offset, b_buffer,
+ n_ceiled, k_ceiled, n_ceiled, 0, b2_temp,
+ ConstantOne<T>(), program,
+ true, ab_rotated, !ab_conjugate);
+ eventWaitList.push_back(eventProcessB2);
+ }
+
+ // Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to
+ // modify the other triangle.
+ auto eventProcessC = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
+ n, n, c_ld, c_offset, c_buffer,
+ n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
+ ConstantOne<T>(), program,
+ true, c_rotated, false);
+ eventWaitList.push_back(eventProcessC);
+
+ // Retrieves the XgemmUpper or XgemmLower kernel from the compiled binary
+ auto kernel = Kernel(program, kernel_name);
+
+ // Sets the kernel arguments
+ kernel.SetArgument(0, static_cast<int>(n_ceiled));
+ kernel.SetArgument(1, static_cast<int>(k_ceiled));
+ kernel.SetArgument(2, GetRealArg(alpha));
+ kernel.SetArgument(3, GetRealArg(complex_beta));
+ kernel.SetArgument(4, a1_temp());
+ kernel.SetArgument(5, b2_temp());
+ kernel.SetArgument(6, c_temp());
+
+ // Computes the global and local thread sizes
+ auto global = std::vector<size_t>{
+ (n_ceiled * db_["MDIMC"]) / db_["MWG"],
+ (n_ceiled * db_["NDIMC"]) / db_["NWG"]
+ };
+ auto local = std::vector<size_t>{db_["MDIMC"], db_["NDIMC"]};
+
+ // Launches the kernel
+ auto eventKernel1 = Event();
+ RunKernel(kernel, queue_, device_, global, local, eventKernel1.pointer(), eventWaitList);
+ eventWaitList.push_back(eventKernel1);
+
+ // Swaps the arguments for matrices A and B, sets 'beta' to 1, and conjugate alpha
+ auto conjugate_alpha = T{alpha.real(), -alpha.imag()};
+ auto complex_one = T{static_cast<U>(1.0), static_cast<U>(0.0)};
+ kernel.SetArgument(2, GetRealArg(conjugate_alpha));
+ kernel.SetArgument(3, GetRealArg(complex_one));
+ kernel.SetArgument(4, b1_temp());
+ kernel.SetArgument(5, a2_temp());
+
+ // Runs the kernel again
+ auto eventKernel2 = Event();
+ RunKernel(kernel, queue_, device_, global, local, eventKernel2.pointer(), eventWaitList);
+ eventWaitList.push_back(eventKernel2);
+
+ // Runs the post-processing kernel
+ auto upper = (triangle == Triangle::kUpper);
+ auto lower = (triangle == Triangle::kLower);
+ PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
+ n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
+ n, n, c_ld, c_offset, c_buffer,
+ ConstantOne<T>(), program,
+ false, c_rotated, false, upper, lower, true);
}
// =================================================================================================
diff --git a/src/routines/level3/xher2k.hpp b/src/routines/level3/xher2k.hpp
index 23996219..acc346e4 100644
--- a/src/routines/level3/xher2k.hpp
+++ b/src/routines/level3/xher2k.hpp
@@ -30,13 +30,13 @@ class Xher2k: public Routine {
Xher2k(Queue &queue, EventPointer event, const std::string &name = "HER2K");
// Templated-precision implementation of the routine
- StatusCode DoHer2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
- const size_t n, const size_t k,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
- const U beta,
- const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
+ void DoHer2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
+ const U beta,
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
};
// =================================================================================================
diff --git a/src/routines/level3/xherk.cpp b/src/routines/level3/xherk.cpp
index 77422526..ae8e9324 100644
--- a/src/routines/level3/xherk.cpp
+++ b/src/routines/level3/xherk.cpp
@@ -22,8 +22,7 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T, typename U>
Xherk<T,U>::Xherk(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
#include "../../kernels/level3/copy_pad.opencl"
@@ -32,14 +31,14 @@ Xherk<T,U>::Xherk(Queue &queue, EventPointer event, const std::string &name):
#include "../../kernels/level3/xgemm_part1.opencl"
#include "../../kernels/level3/xgemm_part2.opencl"
#include "../../kernels/level3/xgemm_part3.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T, typename U>
-StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+void Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
const size_t n, const size_t k,
const U alpha,
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
@@ -47,7 +46,7 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons
const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) {
// Makes sure all dimensions are larger than zero
- if ((n == 0) || (k == 0) ) { return StatusCode::kInvalidDimension; }
+ if ((n == 0) || (k == 0) ) { throw BLASError(StatusCode::kInvalidDimension); }
// Determines whether to apply the conjugate transpose to matrix B (argument: no transpose) or
// to matrix A (argument: conjugate transpose)
@@ -70,10 +69,8 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons
// space. Also tests that the leading dimensions of:
// matrix A cannot be less than N when rotated, or less than K when not-rotated
// matrix C cannot be less than N
- auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld);
- if (ErrorIn(status)) { return status; }
- status = TestMatrixC(n, n, c_buffer, c_offset, c_ld);
- if (ErrorIn(status)) { return status; }
+ TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld);
+ TestMatrixC(n, n, c_buffer, c_offset, c_ld);
// Calculates the ceiled versions of n and k
auto n_ceiled = Ceil(Ceil(n, db_["MWG"]), db_["NWG"]);
@@ -82,106 +79,92 @@ StatusCode Xherk<T,U>::DoHerk(const Layout layout, const Triangle triangle, cons
// Decides which kernel to run: the upper-triangular or lower-triangular version
auto kernel_name = (triangle == Triangle::kUpper) ? "XgemmUpper" : "XgemmLower";
- // The padded/transposed input/output matrices: if memory allocation fails, throw an exception
- try {
-
- // Loads the program from the database
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
-
- // Determines whether or not temporary matrices are needed
- auto a_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
- a_rotated == false && a_conjugate == false;
- auto b_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
- a_rotated == false && b_conjugate == false;
-
- // Creates the temporary matrices
- auto a_temp = (a_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
- auto b_temp = (b_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
- auto c_temp = Buffer<T>(context_, n_ceiled*n_ceiled);
-
- // Convert the arguments to complex versions
- auto complex_alpha = T{alpha, static_cast<U>(0.0)};
- auto complex_beta = T{beta, static_cast<U>(0.0)};
-
- // Events of all kernels (including pre/post processing kernels)
- auto eventWaitList = std::vector<Event>();
- auto emptyEventList = std::vector<Event>();
-
- // Runs the pre-processing kernel for matrix A. This transposes the matrix, but also pads zeros
- // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In
- // case nothing has to be done, these kernels can be skipped. Two copies are created.
- if (!a_no_temp) {
- auto eventProcessA = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA.pointer(), emptyEventList,
- a_one, a_two, a_ld, a_offset, a_buffer,
- n_ceiled, k_ceiled, n_ceiled, 0, a_temp,
- ConstantOne<T>(), program,
- true, a_rotated, a_conjugate);
- eventWaitList.push_back(eventProcessA);
- if (ErrorIn(status)) { return status; }
- }
- if (!b_no_temp) {
- auto eventProcessB = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB.pointer(), emptyEventList,
- a_one, a_two, a_ld, a_offset, a_buffer,
- n_ceiled, k_ceiled, n_ceiled, 0, b_temp,
- ConstantOne<T>(), program,
- true, a_rotated, b_conjugate);
- eventWaitList.push_back(eventProcessB);
- if (ErrorIn(status)) { return status; }
- }
-
- // Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to
- // modify the other triangle.
- auto eventProcessC = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
- n, n, c_ld, c_offset, c_buffer,
- n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
- ConstantOne<T>(), program,
- true, c_rotated, false);
- eventWaitList.push_back(eventProcessC);
- if (ErrorIn(status)) { return status; }
-
- // Retrieves the XgemmUpper or XgemmLower kernel from the compiled binary
- try {
- auto kernel = Kernel(program, kernel_name);
-
- // Sets the kernel arguments
- kernel.SetArgument(0, static_cast<int>(n_ceiled));
- kernel.SetArgument(1, static_cast<int>(k_ceiled));
- kernel.SetArgument(2, GetRealArg(complex_alpha));
- kernel.SetArgument(3, GetRealArg(complex_beta));
- kernel.SetArgument(4, a_temp());
- kernel.SetArgument(5, b_temp());
- kernel.SetArgument(6, c_temp());
-
- // Computes the global and local thread sizes
- auto global = std::vector<size_t>{
- (n_ceiled * db_["MDIMC"]) / db_["MWG"],
- (n_ceiled * db_["NDIMC"]) / db_["NWG"]
- };
- auto local = std::vector<size_t>{db_["MDIMC"], db_["NDIMC"]};
-
- // Launches the kernel
- auto eventKernel = Event();
- status = RunKernel(kernel, queue_, device_, global, local, eventKernel.pointer(), eventWaitList);
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(eventKernel);
-
- // Runs the post-processing kernel
- auto upper = (triangle == Triangle::kUpper);
- auto lower = (triangle == Triangle::kLower);
- status = PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
- n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
- n, n, c_ld, c_offset, c_buffer,
- ConstantOne<T>(), program,
- false, c_rotated, false, upper, lower, true);
- if (ErrorIn(status)) { return status; }
-
- // Successfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
- } catch (...) { return StatusCode::kTempBufferAllocFailure; }
+ // Loads the program from the database
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+
+ // Determines whether or not temporary matrices are needed
+ auto a_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
+ a_rotated == false && a_conjugate == false;
+ auto b_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
+ a_rotated == false && b_conjugate == false;
+
+ // Creates the temporary matrices
+ auto a_temp = (a_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
+ auto b_temp = (b_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
+ auto c_temp = Buffer<T>(context_, n_ceiled*n_ceiled);
+
+ // Convert the arguments to complex versions
+ auto complex_alpha = T{alpha, static_cast<U>(0.0)};
+ auto complex_beta = T{beta, static_cast<U>(0.0)};
+
+ // Events of all kernels (including pre/post processing kernels)
+ auto eventWaitList = std::vector<Event>();
+ auto emptyEventList = std::vector<Event>();
+
+ // Runs the pre-processing kernel for matrix A. This transposes the matrix, but also pads zeros
+ // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In
+ // case nothing has to be done, these kernels can be skipped. Two copies are created.
+ if (!a_no_temp) {
+ auto eventProcessA = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA.pointer(), emptyEventList,
+ a_one, a_two, a_ld, a_offset, a_buffer,
+ n_ceiled, k_ceiled, n_ceiled, 0, a_temp,
+ ConstantOne<T>(), program,
+ true, a_rotated, a_conjugate);
+ eventWaitList.push_back(eventProcessA);
+ }
+ if (!b_no_temp) {
+ auto eventProcessB = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB.pointer(), emptyEventList,
+ a_one, a_two, a_ld, a_offset, a_buffer,
+ n_ceiled, k_ceiled, n_ceiled, 0, b_temp,
+ ConstantOne<T>(), program,
+ true, a_rotated, b_conjugate);
+ eventWaitList.push_back(eventProcessB);
+ }
+
+ // Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to
+ // modify the other triangle.
+ auto eventProcessC = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
+ n, n, c_ld, c_offset, c_buffer,
+ n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
+ ConstantOne<T>(), program,
+ true, c_rotated, false);
+ eventWaitList.push_back(eventProcessC);
+
+ // Retrieves the XgemmUpper or XgemmLower kernel from the compiled binary
+ auto kernel = Kernel(program, kernel_name);
+
+ // Sets the kernel arguments
+ kernel.SetArgument(0, static_cast<int>(n_ceiled));
+ kernel.SetArgument(1, static_cast<int>(k_ceiled));
+ kernel.SetArgument(2, GetRealArg(complex_alpha));
+ kernel.SetArgument(3, GetRealArg(complex_beta));
+ kernel.SetArgument(4, a_temp());
+ kernel.SetArgument(5, b_temp());
+ kernel.SetArgument(6, c_temp());
+
+ // Computes the global and local thread sizes
+ auto global = std::vector<size_t>{
+ (n_ceiled * db_["MDIMC"]) / db_["MWG"],
+ (n_ceiled * db_["NDIMC"]) / db_["NWG"]
+ };
+ auto local = std::vector<size_t>{db_["MDIMC"], db_["NDIMC"]};
+
+ // Launches the kernel
+ auto eventKernel = Event();
+ RunKernel(kernel, queue_, device_, global, local, eventKernel.pointer(), eventWaitList);
+ eventWaitList.push_back(eventKernel);
+
+ // Runs the post-processing kernel
+ auto upper = (triangle == Triangle::kUpper);
+ auto lower = (triangle == Triangle::kLower);
+ PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
+ n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
+ n, n, c_ld, c_offset, c_buffer,
+ ConstantOne<T>(), program,
+ false, c_rotated, false, upper, lower, true);
}
// =================================================================================================
diff --git a/src/routines/level3/xherk.hpp b/src/routines/level3/xherk.hpp
index 3f156a1b..51f29d7e 100644
--- a/src/routines/level3/xherk.hpp
+++ b/src/routines/level3/xherk.hpp
@@ -30,12 +30,12 @@ class Xherk: public Routine {
Xherk(Queue &queue, EventPointer event, const std::string &name = "HERK");
// Templated-precision implementation of the routine
- StatusCode DoHerk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
- const size_t n, const size_t k,
- const U alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const U beta,
- const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
+ void DoHerk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k,
+ const U alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const U beta,
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
};
// =================================================================================================
diff --git a/src/routines/level3/xsymm.cpp b/src/routines/level3/xsymm.cpp
index 04e4b718..d7f771d1 100644
--- a/src/routines/level3/xsymm.cpp
+++ b/src/routines/level3/xsymm.cpp
@@ -29,7 +29,7 @@ Xsymm<T>::Xsymm(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle triangle,
+void Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle triangle,
const size_t m, const size_t n,
const T alpha,
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
@@ -38,15 +38,14 @@ StatusCode Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle
const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) {
// Makes sure all dimensions are larger than zero
- if ((m == 0) || (n == 0) ) { return StatusCode::kInvalidDimension; }
+ if ((m == 0) || (n == 0) ) { throw BLASError(StatusCode::kInvalidDimension); }
// Computes the k dimension. This is based on whether or not the symmetric matrix is A (on the
// left) or B (on the right) in the Xgemm routine.
auto k = (side == Side::kLeft) ? m : n;
// Checks for validity of the squared A matrix
- auto status = TestMatrixA(k, k, a_buffer, a_offset, a_ld);
- if (ErrorIn(status)) { return status; }
+ TestMatrixA(k, k, a_buffer, a_offset, a_ld);
// Determines which kernel to run based on the layout (the Xgemm kernel assumes column-major as
// default) and on whether we are dealing with an upper or lower triangle of the symmetric matrix
@@ -55,73 +54,68 @@ StatusCode Xsymm<T>::DoSymm(const Layout layout, const Side side, const Triangle
auto kernel_name = (is_upper) ? "SymmUpperToSquared" : "SymmLowerToSquared";
// Temporary buffer for a copy of the symmetric matrix
- try {
- auto temp_symm = Buffer<T>(context_, k*k);
-
- // Creates a general matrix from the symmetric matrix to be able to run the regular Xgemm
- // routine afterwards
+ auto temp_symm = Buffer<T>(context_, k*k);
+
+ // Creates a general matrix from the symmetric matrix to be able to run the regular Xgemm
+ // routine afterwards
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel = Kernel(program, kernel_name);
+
+ // Sets the arguments for the symmetric-to-squared kernel
+ kernel.SetArgument(0, static_cast<int>(k));
+ kernel.SetArgument(1, static_cast<int>(a_ld));
+ kernel.SetArgument(2, static_cast<int>(a_offset));
+ kernel.SetArgument(3, a_buffer());
+ kernel.SetArgument(4, static_cast<int>(k));
+ kernel.SetArgument(5, static_cast<int>(k));
+ kernel.SetArgument(6, static_cast<int>(0));
+ kernel.SetArgument(7, temp_symm());
+
+ // Uses the common padding kernel's thread configuration. This is allowed, since the
+ // symmetric-to-squared kernel uses the same parameters.
+ auto global = std::vector<size_t>{Ceil(CeilDiv(k, db_["PAD_WPTX"]), db_["PAD_DIMX"]),
+ Ceil(CeilDiv(k, db_["PAD_WPTY"]), db_["PAD_DIMY"])};
+ auto local = std::vector<size_t>{db_["PAD_DIMX"], db_["PAD_DIMY"]};
+ auto kernelEvent = Event();
+ RunKernel(kernel, queue_, device_, global, local, kernelEvent.pointer());
+
+ // Synchronize now: 'DoGemm' does not accept a list of events to wait for
+ kernelEvent.WaitForCompletion();
+
+ // Runs the regular Xgemm code with either "C := AB+C" or ...
+ if (side == Side::kLeft) {
+ DoGemm(layout, Transpose::kNo, Transpose::kNo,
+ m, n, k,
+ alpha,
+ temp_symm, 0, k,
+ b_buffer, b_offset, b_ld,
+ beta,
+ c_buffer, c_offset, c_ld);
+ }
+
+ // ... with "C := BA+C". Note that A and B are now reversed.
+ else {
try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, kernel_name);
-
- // Sets the arguments for the symmetric-to-squared kernel
- kernel.SetArgument(0, static_cast<int>(k));
- kernel.SetArgument(1, static_cast<int>(a_ld));
- kernel.SetArgument(2, static_cast<int>(a_offset));
- kernel.SetArgument(3, a_buffer());
- kernel.SetArgument(4, static_cast<int>(k));
- kernel.SetArgument(5, static_cast<int>(k));
- kernel.SetArgument(6, static_cast<int>(0));
- kernel.SetArgument(7, temp_symm());
-
- // Uses the common padding kernel's thread configuration. This is allowed, since the
- // symmetric-to-squared kernel uses the same parameters.
- auto global = std::vector<size_t>{Ceil(CeilDiv(k, db_["PAD_WPTX"]), db_["PAD_DIMX"]),
- Ceil(CeilDiv(k, db_["PAD_WPTY"]), db_["PAD_DIMY"])};
- auto local = std::vector<size_t>{db_["PAD_DIMX"], db_["PAD_DIMY"]};
- auto kernelEvent = Event();
- status = RunKernel(kernel, queue_, device_, global, local, kernelEvent.pointer());
- if (ErrorIn(status)) { return status; }
-
- // Synchronize now: 'DoGemm' does not accept a list of events to wait for
- kernelEvent.WaitForCompletion();
-
- // Runs the regular Xgemm code with either "C := AB+C" or ...
- if (side == Side::kLeft) {
- status = DoGemm(layout, Transpose::kNo, Transpose::kNo,
- m, n, k,
- alpha,
- temp_symm, 0, k,
- b_buffer, b_offset, b_ld,
- beta,
- c_buffer, c_offset, c_ld);
- }
-
- // ... with "C := BA+C". Note that A and B are now reversed.
- else {
- status = DoGemm(layout, Transpose::kNo, Transpose::kNo,
- m, n, k,
- alpha,
- b_buffer, b_offset, b_ld,
- temp_symm, 0, k,
- beta,
- c_buffer, c_offset, c_ld);
-
- // A and B are now reversed, so also reverse the error codes returned from the Xgemm routine
- switch(status) {
- case StatusCode::kInvalidMatrixA: status = StatusCode::kInvalidMatrixB; break;
- case StatusCode::kInvalidMatrixB: status = StatusCode::kInvalidMatrixA; break;
- case StatusCode::kInvalidLeadDimA: status = StatusCode::kInvalidLeadDimB; break;
- case StatusCode::kInvalidLeadDimB: status = StatusCode::kInvalidLeadDimA; break;
- case StatusCode::kInsufficientMemoryA: status = StatusCode::kInsufficientMemoryB; break;
- case StatusCode::kInsufficientMemoryB: status = StatusCode::kInsufficientMemoryA; break;
- }
+ DoGemm(layout, Transpose::kNo, Transpose::kNo,
+ m, n, k,
+ alpha,
+ b_buffer, b_offset, b_ld,
+ temp_symm, 0, k,
+ beta,
+ c_buffer, c_offset, c_ld);
+ } catch (BLASError &e) {
+ // A and B are now reversed, so also reverse the error codes returned from the Xgemm routine
+ switch(e.status()) {
+ case StatusCode::kInvalidMatrixA: throw BLASError(StatusCode::kInvalidMatrixB, e.details());
+ case StatusCode::kInvalidMatrixB: throw BLASError(StatusCode::kInvalidMatrixA, e.details());
+ case StatusCode::kInvalidLeadDimA: throw BLASError(StatusCode::kInvalidLeadDimB, e.details());
+ case StatusCode::kInvalidLeadDimB: throw BLASError(StatusCode::kInvalidLeadDimA, e.details());
+ case StatusCode::kInsufficientMemoryA: throw BLASError(StatusCode::kInsufficientMemoryB, e.details());
+ case StatusCode::kInsufficientMemoryB: throw BLASError(StatusCode::kInsufficientMemoryA, e.details());
+ default: throw;
}
-
- // Return the status of the Xgemm routine
- return status;
- } catch (...) { return StatusCode::kInvalidKernel; }
- } catch (...) { return StatusCode::kTempBufferAllocFailure; }
+ }
+ }
}
// =================================================================================================
diff --git a/src/routines/level3/xsymm.hpp b/src/routines/level3/xsymm.hpp
index 428f78ef..ee965364 100644
--- a/src/routines/level3/xsymm.hpp
+++ b/src/routines/level3/xsymm.hpp
@@ -39,13 +39,13 @@ class Xsymm: public Xgemm<T> {
Xsymm(Queue &queue, EventPointer event, const std::string &name = "SYMM");
// Templated-precision implementation of the routine
- StatusCode DoSymm(const Layout layout, const Side side, const Triangle triangle,
- const size_t m, const size_t n,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
- const T beta,
- const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
+ void DoSymm(const Layout layout, const Side side, const Triangle triangle,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
+ const T beta,
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
};
// =================================================================================================
diff --git a/src/routines/level3/xsyr2k.cpp b/src/routines/level3/xsyr2k.cpp
index badf3100..cb0e0461 100644
--- a/src/routines/level3/xsyr2k.cpp
+++ b/src/routines/level3/xsyr2k.cpp
@@ -22,8 +22,7 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xsyr2k<T>::Xsyr2k(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
#include "../../kernels/level3/copy_pad.opencl"
@@ -32,14 +31,14 @@ Xsyr2k<T>::Xsyr2k(Queue &queue, EventPointer event, const std::string &name):
#include "../../kernels/level3/xgemm_part1.opencl"
#include "../../kernels/level3/xgemm_part2.opencl"
#include "../../kernels/level3/xgemm_part3.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+void Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
const size_t n, const size_t k,
const T alpha,
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
@@ -48,7 +47,7 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons
const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) {
// Makes sure all dimensions are larger than zero
- if ((n == 0) || (k == 0) ) { return StatusCode::kInvalidDimension; }
+ if ((n == 0) || (k == 0) ) { throw BLASError(StatusCode::kInvalidDimension); }
// Computes whether or not the matrices are transposed in memory. This is based on their layout
// (row or column-major) and whether or not they are requested to be pre-transposed.
@@ -67,12 +66,9 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons
// matrix A cannot be less than N when rotated, or less than K when not-rotated
// matrix B cannot be less than N when rotated, or less than K when not-rotated
// matrix C cannot be less than N
- auto status = TestMatrixA(ab_one, ab_two, a_buffer, a_offset, a_ld);
- if (ErrorIn(status)) { return status; }
- status = TestMatrixB(ab_one, ab_two, b_buffer, b_offset, b_ld);
- if (ErrorIn(status)) { return status; }
- status = TestMatrixC(n, n, c_buffer, c_offset, c_ld);
- if (ErrorIn(status)) { return status; }
+ TestMatrixA(ab_one, ab_two, a_buffer, a_offset, a_ld);
+ TestMatrixB(ab_one, ab_two, b_buffer, b_offset, b_ld);
+ TestMatrixC(n, n, c_buffer, c_offset, c_ld);
// Calculates the ceiled versions of n and k
auto n_ceiled = Ceil(Ceil(n, db_["MWG"]), db_["NWG"]);
@@ -81,114 +77,99 @@ StatusCode Xsyr2k<T>::DoSyr2k(const Layout layout, const Triangle triangle, cons
// Decides which kernel to run: the upper-triangular or lower-triangular version
auto kernel_name = (triangle == Triangle::kUpper) ? "XgemmUpper" : "XgemmLower";
- // The padded/transposed input/output matrices: if memory allocation fails, throw an exception
- try {
-
- // Loads the program from the database
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
-
- // Determines whether or not temporary matrices are needed
- auto a_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
- ab_rotated == false;
- auto b_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && b_ld == n_ceiled && b_offset == 0 &&
- ab_rotated == false;
-
- // Creates the temporary matrices
- auto a_temp = (a_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
- auto b_temp = (b_no_temp) ? b_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
- auto c_temp = Buffer<T>(context_, n_ceiled*n_ceiled);
-
- // Events of all kernels (including pre/post processing kernels)
- auto eventWaitList = std::vector<Event>();
- auto emptyEventList = std::vector<Event>();
-
- // Runs the pre-processing kernels. This transposes the matrices A and B, but also pads zeros to
- // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In
- // case nothing has to be done, these kernels can be skipped.
- if (!a_no_temp) {
- auto eventProcessA = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA.pointer(), emptyEventList,
- ab_one, ab_two, a_ld, a_offset, a_buffer,
- n_ceiled, k_ceiled, n_ceiled, 0, a_temp,
- ConstantOne<T>(), program,
- true, ab_rotated, false);
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(eventProcessA);
- }
- if (!b_no_temp) {
- auto eventProcessB = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB.pointer(), emptyEventList,
- ab_one, ab_two, b_ld, b_offset, b_buffer,
- n_ceiled, k_ceiled, n_ceiled, 0, b_temp,
- ConstantOne<T>(), program,
- true, ab_rotated, false);
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(eventProcessB);
- }
-
- // Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to
- // modify the other triangle.
- auto eventProcessC = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
- n, n, c_ld, c_offset, c_buffer,
- n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
- ConstantOne<T>(), program,
- true, c_rotated, false);
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(eventProcessC);
-
- // Retrieves the XgemmUpper or XgemmLower kernel from the compiled binary
- try {
- auto kernel = Kernel(program, kernel_name);
-
- // Sets the kernel arguments
- kernel.SetArgument(0, static_cast<int>(n_ceiled));
- kernel.SetArgument(1, static_cast<int>(k_ceiled));
- kernel.SetArgument(2, GetRealArg(alpha));
- kernel.SetArgument(3, GetRealArg(beta));
- kernel.SetArgument(4, a_temp());
- kernel.SetArgument(5, b_temp());
- kernel.SetArgument(6, c_temp());
-
- // Computes the global and local thread sizes
- auto global = std::vector<size_t>{
- (n_ceiled * db_["MDIMC"]) / db_["MWG"],
- (n_ceiled * db_["NDIMC"]) / db_["NWG"]
- };
- auto local = std::vector<size_t>{db_["MDIMC"], db_["NDIMC"]};
-
- // Launches the kernel
- auto eventKernel1 = Event();
- status = RunKernel(kernel, queue_, device_, global, local, eventKernel1.pointer(), eventWaitList);
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(eventKernel1);
-
- // Swaps the arguments for matrices A and B, and sets 'beta' to 1
- auto one = static_cast<T>(1);
- kernel.SetArgument(3, GetRealArg(one));
- kernel.SetArgument(4, b_temp());
- kernel.SetArgument(5, a_temp());
-
- // Runs the kernel again
- auto eventKernel2 = Event();
- status = RunKernel(kernel, queue_, device_, global, local, eventKernel2.pointer(), eventWaitList);
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(eventKernel2);
-
- // Runs the post-processing kernel
- auto upper = (triangle == Triangle::kUpper);
- auto lower = (triangle == Triangle::kLower);
- status = PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
- n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
- n, n, c_ld, c_offset, c_buffer,
- ConstantOne<T>(), program,
- false, c_rotated, false, upper, lower, false);
- if (ErrorIn(status)) { return status; }
-
- // Successfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
- } catch (...) { return StatusCode::kTempBufferAllocFailure; }
+ // Loads the program from the database
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+
+ // Determines whether or not temporary matrices are needed
+ auto a_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
+ ab_rotated == false;
+ auto b_no_temp = ab_one == n_ceiled && ab_two == k_ceiled && b_ld == n_ceiled && b_offset == 0 &&
+ ab_rotated == false;
+
+ // Creates the temporary matrices
+ auto a_temp = (a_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
+ auto b_temp = (b_no_temp) ? b_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
+ auto c_temp = Buffer<T>(context_, n_ceiled*n_ceiled);
+
+ // Events of all kernels (including pre/post processing kernels)
+ auto eventWaitList = std::vector<Event>();
+ auto emptyEventList = std::vector<Event>();
+
+ // Runs the pre-processing kernels. This transposes the matrices A and B, but also pads zeros to
+ // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In
+ // case nothing has to be done, these kernels can be skipped.
+ if (!a_no_temp) {
+ auto eventProcessA = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA.pointer(), emptyEventList,
+ ab_one, ab_two, a_ld, a_offset, a_buffer,
+ n_ceiled, k_ceiled, n_ceiled, 0, a_temp,
+ ConstantOne<T>(), program,
+ true, ab_rotated, false);
+ eventWaitList.push_back(eventProcessA);
+ }
+ if (!b_no_temp) {
+ auto eventProcessB = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessB.pointer(), emptyEventList,
+ ab_one, ab_two, b_ld, b_offset, b_buffer,
+ n_ceiled, k_ceiled, n_ceiled, 0, b_temp,
+ ConstantOne<T>(), program,
+ true, ab_rotated, false);
+ eventWaitList.push_back(eventProcessB);
+ }
+
+ // Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to
+ // modify the other triangle.
+ auto eventProcessC = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
+ n, n, c_ld, c_offset, c_buffer,
+ n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
+ ConstantOne<T>(), program,
+ true, c_rotated, false);
+ eventWaitList.push_back(eventProcessC);
+
+ // Retrieves the XgemmUpper or XgemmLower kernel from the compiled binary
+ auto kernel = Kernel(program, kernel_name);
+
+ // Sets the kernel arguments
+ kernel.SetArgument(0, static_cast<int>(n_ceiled));
+ kernel.SetArgument(1, static_cast<int>(k_ceiled));
+ kernel.SetArgument(2, GetRealArg(alpha));
+ kernel.SetArgument(3, GetRealArg(beta));
+ kernel.SetArgument(4, a_temp());
+ kernel.SetArgument(5, b_temp());
+ kernel.SetArgument(6, c_temp());
+
+ // Computes the global and local thread sizes
+ auto global = std::vector<size_t>{
+ (n_ceiled * db_["MDIMC"]) / db_["MWG"],
+ (n_ceiled * db_["NDIMC"]) / db_["NWG"]
+ };
+ auto local = std::vector<size_t>{db_["MDIMC"], db_["NDIMC"]};
+
+ // Launches the kernel
+ auto eventKernel1 = Event();
+ RunKernel(kernel, queue_, device_, global, local, eventKernel1.pointer(), eventWaitList);
+ eventWaitList.push_back(eventKernel1);
+
+ // Swaps the arguments for matrices A and B, and sets 'beta' to 1
+ auto one = static_cast<T>(1);
+ kernel.SetArgument(3, GetRealArg(one));
+ kernel.SetArgument(4, b_temp());
+ kernel.SetArgument(5, a_temp());
+
+ // Runs the kernel again
+ auto eventKernel2 = Event();
+ RunKernel(kernel, queue_, device_, global, local, eventKernel2.pointer(), eventWaitList);
+ eventWaitList.push_back(eventKernel2);
+
+ // Runs the post-processing kernel
+ auto upper = (triangle == Triangle::kUpper);
+ auto lower = (triangle == Triangle::kLower);
+ PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
+ n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
+ n, n, c_ld, c_offset, c_buffer,
+ ConstantOne<T>(), program,
+ false, c_rotated, false, upper, lower, false);
}
// =================================================================================================
diff --git a/src/routines/level3/xsyr2k.hpp b/src/routines/level3/xsyr2k.hpp
index 56185653..a02c6e16 100644
--- a/src/routines/level3/xsyr2k.hpp
+++ b/src/routines/level3/xsyr2k.hpp
@@ -30,13 +30,13 @@ class Xsyr2k: public Routine {
Xsyr2k(Queue &queue, EventPointer event, const std::string &name = "SYR2K");
// Templated-precision implementation of the routine
- StatusCode DoSyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
- const size_t n, const size_t k,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
- const T beta,
- const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
+ void DoSyr2k(const Layout layout, const Triangle triangle, const Transpose ab_transpose,
+ const size_t n, const size_t k,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld,
+ const T beta,
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
};
// =================================================================================================
diff --git a/src/routines/level3/xsyrk.cpp b/src/routines/level3/xsyrk.cpp
index 438aa218..bd6c4b25 100644
--- a/src/routines/level3/xsyrk.cpp
+++ b/src/routines/level3/xsyrk.cpp
@@ -22,8 +22,7 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xsyrk<T>::Xsyrk(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose","Xgemm"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
#include "../../kernels/level3/copy_pad.opencl"
@@ -32,14 +31,14 @@ Xsyrk<T>::Xsyrk(Queue &queue, EventPointer event, const std::string &name):
#include "../../kernels/level3/xgemm_part1.opencl"
#include "../../kernels/level3/xgemm_part2.opencl"
#include "../../kernels/level3/xgemm_part3.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+void Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
const size_t n, const size_t k,
const T alpha,
const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
@@ -47,7 +46,7 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const
const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld) {
// Makes sure all dimensions are larger than zero
- if ((n == 0) || (k == 0) ) { return StatusCode::kInvalidDimension; }
+ if ((n == 0) || (k == 0) ) { throw BLASError(StatusCode::kInvalidDimension); }
// Computes whether or not the matrices are transposed in memory. This is based on their layout
// (row or column-major) and whether or not they are requested to be pre-transposed.
@@ -65,10 +64,8 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const
// space. Also tests that the leading dimensions of:
// matrix A cannot be less than N when rotated, or less than K when not-rotated
// matrix C cannot be less than N
- auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld);
- if (ErrorIn(status)) { return status; }
- status = TestMatrixC(n, n, c_buffer, c_offset, c_ld);
- if (ErrorIn(status)) { return status; }
+ TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld);
+ TestMatrixC(n, n, c_buffer, c_offset, c_ld);
// Calculates the ceiled versions of n and k
auto n_ceiled = Ceil(Ceil(n, db_["MWG"]), db_["NWG"]);
@@ -77,90 +74,76 @@ StatusCode Xsyrk<T>::DoSyrk(const Layout layout, const Triangle triangle, const
// Decides which kernel to run: the upper-triangular or lower-triangular version
auto kernel_name = (triangle == Triangle::kUpper) ? "XgemmUpper" : "XgemmLower";
- // The padded/transposed input/output matrices: if memory allocation fails, throw an exception
- try {
-
- // Loads the program from the database
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
-
- // Determines whether or not temporary matrices are needed
- auto a_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
- a_rotated == false;
-
- // Creates the temporary matrices
- auto a_temp = (a_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
- auto c_temp = Buffer<T>(context_, n_ceiled*n_ceiled);
-
- // Events of all kernels (including pre/post processing kernels)
- auto eventWaitList = std::vector<Event>();
- auto emptyEventList = std::vector<Event>();
-
- // Runs the pre-processing kernel for matrix A. This transposes the matrix, but also pads zeros
- // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In
- // case nothing has to be done, these kernels can be skipped.
- if (!a_no_temp) {
- auto eventProcessA = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA.pointer(), emptyEventList,
- a_one, a_two, a_ld, a_offset, a_buffer,
- n_ceiled, k_ceiled, n_ceiled, 0, a_temp,
- ConstantOne<T>(), program,
- true, a_rotated, false);
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(eventProcessA);
- }
-
- // Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to
- // modify the other triangle.
- auto eventProcessC = Event();
- status = PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
- n, n, c_ld, c_offset, c_buffer,
- n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
- ConstantOne<T>(), program,
- true, c_rotated, false);
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(eventProcessC);
-
- // Retrieves the XgemmUpper or XgemmLower kernel from the compiled binary
- try {
- auto kernel = Kernel(program, kernel_name);
-
- // Sets the kernel arguments
- kernel.SetArgument(0, static_cast<int>(n_ceiled));
- kernel.SetArgument(1, static_cast<int>(k_ceiled));
- kernel.SetArgument(2, GetRealArg(alpha));
- kernel.SetArgument(3, GetRealArg(beta));
- kernel.SetArgument(4, a_temp());
- kernel.SetArgument(5, a_temp());
- kernel.SetArgument(6, c_temp());
-
- // Computes the global and local thread sizes
- auto global = std::vector<size_t>{
- (n_ceiled * db_["MDIMC"]) / db_["MWG"],
- (n_ceiled * db_["NDIMC"]) / db_["NWG"]
- };
- auto local = std::vector<size_t>{db_["MDIMC"], db_["NDIMC"]};
-
- // Launches the kernel
- auto eventKernel = Event();
- status = RunKernel(kernel, queue_, device_, global, local, eventKernel.pointer(), eventWaitList);
- if (ErrorIn(status)) { return status; }
- eventWaitList.push_back(eventKernel);
-
- // Runs the post-processing kernel
- auto upper = (triangle == Triangle::kUpper);
- auto lower = (triangle == Triangle::kLower);
- status = PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
- n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
- n, n, c_ld, c_offset, c_buffer,
- ConstantOne<T>(), program,
- false, c_rotated, false, upper, lower, false);
- if (ErrorIn(status)) { return status; }
-
-
- // Successfully finished the computation
- return StatusCode::kSuccess;
- } catch (...) { return StatusCode::kInvalidKernel; }
- } catch (...) { return StatusCode::kTempBufferAllocFailure; }
+ // Loads the program from the database
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+
+ // Determines whether or not temporary matrices are needed
+ auto a_no_temp = a_one == n_ceiled && a_two == k_ceiled && a_ld == n_ceiled && a_offset == 0 &&
+ a_rotated == false;
+
+ // Creates the temporary matrices
+ auto a_temp = (a_no_temp) ? a_buffer : Buffer<T>(context_, k_ceiled*n_ceiled);
+ auto c_temp = Buffer<T>(context_, n_ceiled*n_ceiled);
+
+ // Events of all kernels (including pre/post processing kernels)
+ auto eventWaitList = std::vector<Event>();
+ auto emptyEventList = std::vector<Event>();
+
+ // Runs the pre-processing kernel for matrix A. This transposes the matrix, but also pads zeros
+ // to fill it up until it reaches a certain multiple of size (kernel parameter dependent). In
+ // case nothing has to be done, these kernels can be skipped.
+ if (!a_no_temp) {
+ auto eventProcessA = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessA.pointer(), emptyEventList,
+ a_one, a_two, a_ld, a_offset, a_buffer,
+ n_ceiled, k_ceiled, n_ceiled, 0, a_temp,
+ ConstantOne<T>(), program,
+ true, a_rotated, false);
+ eventWaitList.push_back(eventProcessA);
+ }
+
+ // Furthermore, also creates a (possibly padded) copy of matrix C, since it is not allowed to
+ // modify the other triangle.
+ auto eventProcessC = Event();
+ PadCopyTransposeMatrix(queue_, device_, db_, eventProcessC.pointer(), emptyEventList,
+ n, n, c_ld, c_offset, c_buffer,
+ n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
+ ConstantOne<T>(), program,
+ true, c_rotated, false);
+ eventWaitList.push_back(eventProcessC);
+
+ // Retrieves the XgemmUpper or XgemmLower kernel from the compiled binary
+ auto kernel = Kernel(program, kernel_name);
+
+ // Sets the kernel arguments
+ kernel.SetArgument(0, static_cast<int>(n_ceiled));
+ kernel.SetArgument(1, static_cast<int>(k_ceiled));
+ kernel.SetArgument(2, GetRealArg(alpha));
+ kernel.SetArgument(3, GetRealArg(beta));
+ kernel.SetArgument(4, a_temp());
+ kernel.SetArgument(5, a_temp());
+ kernel.SetArgument(6, c_temp());
+
+ // Computes the global and local thread sizes
+ auto global = std::vector<size_t>{
+ (n_ceiled * db_["MDIMC"]) / db_["MWG"],
+ (n_ceiled * db_["NDIMC"]) / db_["NWG"]
+ };
+ auto local = std::vector<size_t>{db_["MDIMC"], db_["NDIMC"]};
+
+ // Launches the kernel
+ auto eventKernel = Event();
+ RunKernel(kernel, queue_, device_, global, local, eventKernel.pointer(), eventWaitList);
+ eventWaitList.push_back(eventKernel);
+
+ // Runs the post-processing kernel
+ auto upper = (triangle == Triangle::kUpper);
+ auto lower = (triangle == Triangle::kLower);
+ PadCopyTransposeMatrix(queue_, device_, db_, event_, eventWaitList,
+ n_ceiled, n_ceiled, n_ceiled, 0, c_temp,
+ n, n, c_ld, c_offset, c_buffer,
+ ConstantOne<T>(), program,
+ false, c_rotated, false, upper, lower, false);
}
// =================================================================================================
diff --git a/src/routines/level3/xsyrk.hpp b/src/routines/level3/xsyrk.hpp
index 7c075c26..de42b824 100644
--- a/src/routines/level3/xsyrk.hpp
+++ b/src/routines/level3/xsyrk.hpp
@@ -32,12 +32,12 @@ class Xsyrk: public Routine {
Xsyrk(Queue &queue, EventPointer event, const std::string &name = "SYRK");
// Templated-precision implementation of the routine
- StatusCode DoSyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
- const size_t n, const size_t k,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const T beta,
- const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
+ void DoSyrk(const Layout layout, const Triangle triangle, const Transpose a_transpose,
+ const size_t n, const size_t k,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const T beta,
+ const Buffer<T> &c_buffer, const size_t c_offset, const size_t c_ld);
};
// =================================================================================================
diff --git a/src/routines/level3/xtrmm.cpp b/src/routines/level3/xtrmm.cpp
index 74a82822..6bf77cfa 100644
--- a/src/routines/level3/xtrmm.cpp
+++ b/src/routines/level3/xtrmm.cpp
@@ -29,7 +29,7 @@ Xtrmm<T>::Xtrmm(Queue &queue, EventPointer event, const std::string &name):
// The main routine
template <typename T>
-StatusCode Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle triangle,
+void Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle triangle,
const Transpose a_transpose, const Diagonal diagonal,
const size_t m, const size_t n,
const T alpha,
@@ -37,15 +37,14 @@ StatusCode Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle
const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld) {
// Makes sure all dimensions are larger than zero
- if ((m == 0) || (n == 0)) { return StatusCode::kInvalidDimension; }
+ if ((m == 0) || (n == 0)) { throw BLASError(StatusCode::kInvalidDimension); }
// Computes the k dimension. This is based on whether or not matrix is A (on the left)
// or B (on the right) in the Xgemm routine.
auto k = (side == Side::kLeft) ? m : n;
// Checks for validity of the triangular A matrix
- auto status = TestMatrixA(k, k, a_buffer, a_offset, a_ld);
- if (ErrorIn(status)) { return status; }
+ TestMatrixA(k, k, a_buffer, a_offset, a_ld);
// Determines which kernel to run based on the layout (the Xgemm kernel assumes column-major as
// default) and on whether we are dealing with an upper or lower triangle of the triangular matrix
@@ -57,74 +56,69 @@ StatusCode Xtrmm<T>::DoTrmm(const Layout layout, const Side side, const Triangle
auto unit_diagonal = (diagonal == Diagonal::kUnit) ? true : false;
// Temporary buffer for a copy of the triangular matrix
- try {
- auto temp_triangular = Buffer<T>(context_, k*k);
-
- // Creates a general matrix from the triangular matrix to be able to run the regular Xgemm
- // routine afterwards
+ auto temp_triangular = Buffer<T>(context_, k*k);
+
+ // Creates a general matrix from the triangular matrix to be able to run the regular Xgemm
+ // routine afterwards
+ const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
+ auto kernel = Kernel(program, kernel_name);
+
+ // Sets the arguments for the triangular-to-squared kernel
+ kernel.SetArgument(0, static_cast<int>(k));
+ kernel.SetArgument(1, static_cast<int>(a_ld));
+ kernel.SetArgument(2, static_cast<int>(a_offset));
+ kernel.SetArgument(3, a_buffer());
+ kernel.SetArgument(4, static_cast<int>(k));
+ kernel.SetArgument(5, static_cast<int>(k));
+ kernel.SetArgument(6, static_cast<int>(0));
+ kernel.SetArgument(7, temp_triangular());
+ kernel.SetArgument(8, static_cast<int>(unit_diagonal));
+
+ // Uses the common padding kernel's thread configuration. This is allowed, since the
+ // triangular-to-squared kernel uses the same parameters.
+ auto global = std::vector<size_t>{Ceil(CeilDiv(k, db_["PAD_WPTX"]), db_["PAD_DIMX"]),
+ Ceil(CeilDiv(k, db_["PAD_WPTY"]), db_["PAD_DIMY"])};
+ auto local = std::vector<size_t>{db_["PAD_DIMX"], db_["PAD_DIMY"]};
+ auto kernelEvent = Event();
+ RunKernel(kernel, queue_, device_, global, local, kernelEvent.pointer());
+
+ // Synchronize now: 'DoGemm' does not accept a list of events to wait for
+ kernelEvent.WaitForCompletion();
+
+ // Runs the regular Xgemm code with either "B := alpha*A*B" or ...
+ if (side == Side::kLeft) {
+ DoGemm(layout, a_transpose, Transpose::kNo,
+ m, n, k,
+ alpha,
+ temp_triangular, 0, k,
+ b_buffer, b_offset, b_ld,
+ static_cast<T>(0.0),
+ b_buffer, b_offset, b_ld);
+ }
+
+ // ... with "B := alpha*B*A". Note that A and B are now reversed.
+ else {
try {
- const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
- auto kernel = Kernel(program, kernel_name);
-
- // Sets the arguments for the triangular-to-squared kernel
- kernel.SetArgument(0, static_cast<int>(k));
- kernel.SetArgument(1, static_cast<int>(a_ld));
- kernel.SetArgument(2, static_cast<int>(a_offset));
- kernel.SetArgument(3, a_buffer());
- kernel.SetArgument(4, static_cast<int>(k));
- kernel.SetArgument(5, static_cast<int>(k));
- kernel.SetArgument(6, static_cast<int>(0));
- kernel.SetArgument(7, temp_triangular());
- kernel.SetArgument(8, static_cast<int>(unit_diagonal));
-
- // Uses the common padding kernel's thread configuration. This is allowed, since the
- // triangular-to-squared kernel uses the same parameters.
- auto global = std::vector<size_t>{Ceil(CeilDiv(k, db_["PAD_WPTX"]), db_["PAD_DIMX"]),
- Ceil(CeilDiv(k, db_["PAD_WPTY"]), db_["PAD_DIMY"])};
- auto local = std::vector<size_t>{db_["PAD_DIMX"], db_["PAD_DIMY"]};
- auto kernelEvent = Event();
- status = RunKernel(kernel, queue_, device_, global, local, kernelEvent.pointer());
- if (ErrorIn(status)) { return status; }
-
- // Synchronize now: 'DoGemm' does not accept a list of events to wait for
- kernelEvent.WaitForCompletion();
-
- // Runs the regular Xgemm code with either "B := alpha*A*B" or ...
- if (side == Side::kLeft) {
- status = DoGemm(layout, a_transpose, Transpose::kNo,
- m, n, k,
- alpha,
- temp_triangular, 0, k,
- b_buffer, b_offset, b_ld,
- static_cast<T>(0.0),
- b_buffer, b_offset, b_ld);
- }
-
- // ... with "B := alpha*B*A". Note that A and B are now reversed.
- else {
- status = DoGemm(layout, Transpose::kNo, a_transpose,
- m, n, k,
- alpha,
- b_buffer, b_offset, b_ld,
- temp_triangular, 0, k,
- static_cast<T>(0.0),
- b_buffer, b_offset, b_ld);
-
- // A and B are now reversed, so also reverse the error codes returned from the Xgemm routine
- switch(status) {
- case StatusCode::kInvalidMatrixA: status = StatusCode::kInvalidMatrixB; break;
- case StatusCode::kInvalidMatrixB: status = StatusCode::kInvalidMatrixA; break;
- case StatusCode::kInvalidLeadDimA: status = StatusCode::kInvalidLeadDimB; break;
- case StatusCode::kInvalidLeadDimB: status = StatusCode::kInvalidLeadDimA; break;
- case StatusCode::kInsufficientMemoryA: status = StatusCode::kInsufficientMemoryB; break;
- case StatusCode::kInsufficientMemoryB: status = StatusCode::kInsufficientMemoryA; break;
- }
+ DoGemm(layout, Transpose::kNo, a_transpose,
+ m, n, k,
+ alpha,
+ b_buffer, b_offset, b_ld,
+ temp_triangular, 0, k,
+ static_cast<T>(0.0),
+ b_buffer, b_offset, b_ld);
+ } catch (BLASError &e) {
+ // A and B are now reversed, so also reverse the error codes returned from the Xgemm routine
+ switch(e.status()) {
+ case StatusCode::kInvalidMatrixA: throw BLASError(StatusCode::kInvalidMatrixB, e.details());
+ case StatusCode::kInvalidMatrixB: throw BLASError(StatusCode::kInvalidMatrixA, e.details());
+ case StatusCode::kInvalidLeadDimA: throw BLASError(StatusCode::kInvalidLeadDimB, e.details());
+ case StatusCode::kInvalidLeadDimB: throw BLASError(StatusCode::kInvalidLeadDimA, e.details());
+ case StatusCode::kInsufficientMemoryA: throw BLASError(StatusCode::kInsufficientMemoryB, e.details());
+ case StatusCode::kInsufficientMemoryB: throw BLASError(StatusCode::kInsufficientMemoryA, e.details());
+ default: throw;
}
-
- // Return the status of the Xgemm routine
- return status;
- } catch (...) { return StatusCode::kInvalidKernel; }
- } catch (...) { return StatusCode::kTempBufferAllocFailure; }
+ }
+ }
}
// =================================================================================================
diff --git a/src/routines/level3/xtrmm.hpp b/src/routines/level3/xtrmm.hpp
index 186a120e..967bf132 100644
--- a/src/routines/level3/xtrmm.hpp
+++ b/src/routines/level3/xtrmm.hpp
@@ -38,12 +38,12 @@ class Xtrmm: public Xgemm<T> {
Xtrmm(Queue &queue, EventPointer event, const std::string &name = "TRMM");
// Templated-precision implementation of the routine
- StatusCode DoTrmm(const Layout layout, const Side side, const Triangle triangle,
- const Transpose a_transpose, const Diagonal diagonal,
- const size_t m, const size_t n,
- const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld);
+ void DoTrmm(const Layout layout, const Side side, const Triangle triangle,
+ const Transpose a_transpose, const Diagonal diagonal,
+ const size_t m, const size_t n,
+ const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld);
};
// =================================================================================================
diff --git a/src/routines/levelx/xomatcopy.cpp b/src/routines/levelx/xomatcopy.cpp
index af9080af..875ca7d2 100644
--- a/src/routines/levelx/xomatcopy.cpp
+++ b/src/routines/levelx/xomatcopy.cpp
@@ -22,27 +22,26 @@ namespace clblast {
// Constructor: forwards to base class constructor
template <typename T>
Xomatcopy<T>::Xomatcopy(Queue &queue, EventPointer event, const std::string &name):
- Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose"}, PrecisionValue<T>()) {
- source_string_ =
+ Routine(queue, event, name, {"Copy","Pad","Transpose","Padtranspose"}, PrecisionValue<T>(), {}, {
#include "../../kernels/level3/level3.opencl"
#include "../../kernels/level3/copy_fast.opencl"
#include "../../kernels/level3/copy_pad.opencl"
#include "../../kernels/level3/transpose_fast.opencl"
#include "../../kernels/level3/transpose_pad.opencl"
- ;
+ }) {
}
// =================================================================================================
// The main routine
template <typename T>
-StatusCode Xomatcopy<T>::DoOmatcopy(const Layout layout, const Transpose a_transpose,
- const size_t m, const size_t n, const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld) {
+void Xomatcopy<T>::DoOmatcopy(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n, const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld) {
// Makes sure all dimensions are larger than zero
- if ((m == 0) || (n == 0)) { return StatusCode::kInvalidDimension; }
+ if ((m == 0) || (n == 0)) { throw BLASError(StatusCode::kInvalidDimension); }
// Determines whether to transpose the matrix A
const auto transpose = (a_transpose != Transpose::kNo);
@@ -63,22 +62,17 @@ StatusCode Xomatcopy<T>::DoOmatcopy(const Layout layout, const Transpose a_trans
// Also tests that the leading dimensions of:
// matrix A cannot be less than N when rotated, or less than M when not-rotated
// matrix B cannot be less than M when rotated, or less than N when not-rotated
- auto status = TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld);
- if (ErrorIn(status)) { return status; }
- status = TestMatrixB(b_one, b_two, b_buffer, b_offset, b_ld);
- if (ErrorIn(status)) { return status; }
+ TestMatrixA(a_one, a_two, a_buffer, a_offset, a_ld);
+ TestMatrixB(b_one, b_two, b_buffer, b_offset, b_ld);
// Loads the program from the database
const auto program = GetProgramFromCache(context_, PrecisionValue<T>(), routine_name_);
auto emptyEventList = std::vector<Event>();
- status = PadCopyTransposeMatrix(queue_, device_, db_, event_, emptyEventList,
- a_one, a_two, a_ld, a_offset, a_buffer,
- b_one, b_two, b_ld, b_offset, b_buffer,
- alpha, program, false, transpose, conjugate);
- if (ErrorIn(status)) { return status; }
-
- return StatusCode::kSuccess;
+ PadCopyTransposeMatrix(queue_, device_, db_, event_, emptyEventList,
+ a_one, a_two, a_ld, a_offset, a_buffer,
+ b_one, b_two, b_ld, b_offset, b_buffer,
+ alpha, program, false, transpose, conjugate);
}
// =================================================================================================
diff --git a/src/routines/levelx/xomatcopy.hpp b/src/routines/levelx/xomatcopy.hpp
index 0e580230..2da66693 100644
--- a/src/routines/levelx/xomatcopy.hpp
+++ b/src/routines/levelx/xomatcopy.hpp
@@ -28,10 +28,10 @@ class Xomatcopy: public Routine {
Xomatcopy(Queue &queue, EventPointer event, const std::string &name = "OMATCOPY");
// Templated-precision implementation of the routine
- StatusCode DoOmatcopy(const Layout layout, const Transpose a_transpose,
- const size_t m, const size_t n, const T alpha,
- const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
- const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld);
+ void DoOmatcopy(const Layout layout, const Transpose a_transpose,
+ const size_t m, const size_t n, const T alpha,
+ const Buffer<T> &a_buffer, const size_t a_offset, const size_t a_ld,
+ const Buffer<T> &b_buffer, const size_t b_offset, const size_t b_ld);
};
// =================================================================================================
diff --git a/src/utilities.hpp b/src/utilities.hpp
index 038a8a96..a7fcbd25 100644
--- a/src/utilities.hpp
+++ b/src/utilities.hpp
@@ -24,6 +24,7 @@
#include "clblast.h"
#include "clblast_half.h"
#include "clpp11.hpp"
+#include "clblast_exceptions.hpp"
#include "msvc.hpp"
@@ -207,11 +208,6 @@ bool CheckArgument(const int argc, char *argv[], std::string &help, const std::s
// =================================================================================================
-// Helper function to check for errors in the status code
-inline bool ErrorIn(const StatusCode s) { return (s != StatusCode::kSuccess); }
-
-// =================================================================================================
-
// Returns a random number to be used as a seed
unsigned int GetRandomSeed();