summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/tuning/routines/xgemm.cpp9
-rw-r--r--src/tuning/tuning.hpp11
-rw-r--r--test/routines/level2/xtrsv.hpp3
-rw-r--r--test/routines/levelx/xim2col.hpp4
4 files changed, 13 insertions, 14 deletions
diff --git a/src/tuning/routines/xgemm.cpp b/src/tuning/routines/xgemm.cpp
index cd22137a..a12ab706 100644
--- a/src/tuning/routines/xgemm.cpp
+++ b/src/tuning/routines/xgemm.cpp
@@ -75,10 +75,11 @@ void TuneXgemm(int argc, char* argv[]) {
const auto queue = Queue(context, device);
// Buffers
- auto a_mat = Buffer<T>(context, to * to);
- auto b_mat = Buffer<T>(context, to * to);
- auto c_mat = Buffer<T>(context, to * to);
- auto buffers = std::vector<Buffer<T>>{a_mat, b_mat, c_mat};
+ auto buffers = std::vector<Buffer<T>>{
+ Buffer<T>(context, to * to),
+ Buffer<T>(context, to * to),
+ Buffer<T>(context, to * to)
+ };
// In-direct version
printf("\n* Testing the in-direct GEMM routine for m=n=k\n");
diff --git a/src/tuning/tuning.hpp b/src/tuning/tuning.hpp
index 2c7f6a0b..12826d0c 100644
--- a/src/tuning/tuning.hpp
+++ b/src/tuning/tuning.hpp
@@ -180,12 +180,9 @@ void Tuner(int argc, char* argv[]) {
auto host_buffer = std::vector<T>(size);
PopulateVector(host_buffer, mt, dist);
source_buffers.push_back(host_buffer);
- auto reference_buffer = std::vector<T>(size);
- reference_buffers.push_back(reference_buffer);
- auto result_buffer = std::vector<T>(size);
- result_buffers.push_back(result_buffer);
- auto device_buffer = Buffer<T>(context, size);
- device_buffers.push_back(device_buffer);
+ reference_buffers.push_back(std::vector<T>(size));
+ result_buffers.push_back(std::vector<T>(size));
+ device_buffers.push_back(Buffer<T>(context, size));
}
// Sets the tunable parameters and their possible values
@@ -329,7 +326,7 @@ void Tuner(int argc, char* argv[]) {
printf(" %6.1lf |", settings.metric_amount / (time_ms * 1.0e6));
printf(" %sresults match%s |\n", kPrintSuccess.c_str(), kPrintEnd.c_str());
}
- catch (const CLCudaAPIBuildError &e) {
+ catch (CLCudaAPIBuildError) {
const auto status_code = DispatchExceptionCatchAll(true);
printf(" %scompilation error: %5d%s |",
kPrintError.c_str(), static_cast<int>(status_code), kPrintEnd.c_str());
diff --git a/test/routines/level2/xtrsv.hpp b/test/routines/level2/xtrsv.hpp
index aec8eace..81508236 100644
--- a/test/routines/level2/xtrsv.hpp
+++ b/test/routines/level2/xtrsv.hpp
@@ -74,7 +74,8 @@ class TestXtrsv {
// TODO: Improve this, currently loosely based on clBLAS's implementation
for (auto i = size_t{0}; i < args.n; ++i) {
auto diagonal = a_source[i*args.a_ld + i + args.a_offset];
- diagonal = static_cast<T>(AbsoluteValue(diagonal)) + static_cast<T>(args.n / size_t{4});
+ diagonal = static_cast<T>(AbsoluteValue(diagonal)) +
+ Constant<T>(static_cast<double>(args.n / size_t{4}));
for (auto j = size_t{0}; j < args.n; ++j) {
a_source[j*args.a_ld + i + args.a_offset] /= Constant<T>(2.0);
}
diff --git a/test/routines/levelx/xim2col.hpp b/test/routines/levelx/xim2col.hpp
index ebffe85e..abd6af86 100644
--- a/test/routines/levelx/xim2col.hpp
+++ b/test/routines/levelx/xim2col.hpp
@@ -165,8 +165,8 @@ StatusCode RunReference(const Arguments<T> &args, BuffersHost<T> &buffers_host)
for (auto w_id = size_t{0}; w_id < output_w; ++w_id) { // image width
// Retrieves the input value
- const auto h_index = -args.pad_h + kh_id * args.dilation_h + args.stride_h * h_id;
- const auto w_index = -args.pad_w + kw_id * args.dilation_w + args.stride_w * w_id;
+ const auto h_index = kh_id * args.dilation_h + args.stride_h * h_id - args.pad_h;
+ const auto w_index = kw_id * args.dilation_w + args.stride_w * w_id - args.pad_w;
auto val = ConstantZero<T>();
if (h_index >= 0 && h_index < args.height &&
w_index >= 0 && w_index < args.width) {