From c87e877bf23d2fe38a7da2898e1734a3cdeaf48c Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Sun, 10 Jul 2016 20:32:01 +0200 Subject: Now passing alpha/beta to the kernel as arguments as before fp16 support; in case of fp16 arguments are cast on host and in kernel --- src/tuning/kernels/copy_fast.cpp | 3 +-- src/tuning/kernels/copy_pad.cpp | 3 +-- src/tuning/kernels/transpose_fast.cpp | 3 +-- src/tuning/kernels/transpose_pad.cpp | 3 +-- src/tuning/kernels/xaxpy.cpp | 3 +-- src/tuning/kernels/xgemm.cpp | 6 ++---- src/tuning/kernels/xgemv.cpp | 6 ++---- src/tuning/kernels/xger.cpp | 3 +-- 8 files changed, 10 insertions(+), 20 deletions(-) (limited to 'src/tuning/kernels') diff --git a/src/tuning/kernels/copy_fast.cpp b/src/tuning/kernels/copy_fast.cpp index 34269bc7..78ded56e 100644 --- a/src/tuning/kernels/copy_fast.cpp +++ b/src/tuning/kernels/copy_fast.cpp @@ -86,11 +86,10 @@ class TuneCopy { std::vector &, std::vector &, std::vector &a_mat, std::vector &b_mat, std::vector &, std::vector &) { - auto alpha_buffer = std::vector{args.alpha}; tuner.AddArgumentScalar(static_cast(args.m)); tuner.AddArgumentInput(a_mat); tuner.AddArgumentOutput(b_mat); - tuner.AddArgumentInput(alpha_buffer); + tuner.AddArgumentScalar(GetRealArg(args.alpha)); } // Describes how to compute the performance metrics diff --git a/src/tuning/kernels/copy_pad.cpp b/src/tuning/kernels/copy_pad.cpp index 1e0dccd3..90f5ea82 100644 --- a/src/tuning/kernels/copy_pad.cpp +++ b/src/tuning/kernels/copy_pad.cpp @@ -86,7 +86,6 @@ class TunePad { std::vector &, std::vector &, std::vector &a_mat, std::vector &b_mat, std::vector &, std::vector &) { - auto alpha_buffer = std::vector{args.alpha}; tuner.AddArgumentScalar(static_cast(args.m)); tuner.AddArgumentScalar(static_cast(args.n)); tuner.AddArgumentScalar(static_cast(args.m)); @@ -97,7 +96,7 @@ class TunePad { tuner.AddArgumentScalar(static_cast(args.m)); tuner.AddArgumentScalar(0); tuner.AddArgumentOutput(b_mat); - tuner.AddArgumentInput(alpha_buffer); + tuner.AddArgumentScalar(GetRealArg(args.alpha)); tuner.AddArgumentScalar(0); } diff --git a/src/tuning/kernels/transpose_fast.cpp b/src/tuning/kernels/transpose_fast.cpp index 7ac19cb6..10fa80cb 100644 --- a/src/tuning/kernels/transpose_fast.cpp +++ b/src/tuning/kernels/transpose_fast.cpp @@ -91,11 +91,10 @@ class TuneTranspose { std::vector &, std::vector &, std::vector &a_mat, std::vector &b_mat, std::vector &, std::vector &) { - auto alpha_buffer = std::vector{args.alpha}; tuner.AddArgumentScalar(static_cast(args.m)); tuner.AddArgumentInput(a_mat); tuner.AddArgumentOutput(b_mat); - tuner.AddArgumentInput(alpha_buffer); + tuner.AddArgumentScalar(GetRealArg(args.alpha)); } // Describes how to compute the performance metrics diff --git a/src/tuning/kernels/transpose_pad.cpp b/src/tuning/kernels/transpose_pad.cpp index 63274415..507718eb 100644 --- a/src/tuning/kernels/transpose_pad.cpp +++ b/src/tuning/kernels/transpose_pad.cpp @@ -90,7 +90,6 @@ class TunePadTranspose { std::vector &, std::vector &, std::vector &a_mat, std::vector &b_mat, std::vector &, std::vector &) { - auto alpha_buffer = std::vector{args.alpha}; tuner.AddArgumentScalar(static_cast(args.m)); tuner.AddArgumentScalar(static_cast(args.n)); tuner.AddArgumentScalar(static_cast(args.m)); @@ -101,7 +100,7 @@ class TunePadTranspose { tuner.AddArgumentScalar(static_cast(args.n)); tuner.AddArgumentScalar(0); tuner.AddArgumentOutput(b_mat); - tuner.AddArgumentInput(alpha_buffer); + tuner.AddArgumentScalar(GetRealArg(args.alpha)); tuner.AddArgumentScalar(0); } diff --git a/src/tuning/kernels/xaxpy.cpp b/src/tuning/kernels/xaxpy.cpp index 88d12c1f..0033b3c6 100644 --- a/src/tuning/kernels/xaxpy.cpp +++ b/src/tuning/kernels/xaxpy.cpp @@ -89,9 +89,8 @@ class TuneXaxpy { std::vector &x_vec, std::vector &y_vec, std::vector &, std::vector &, std::vector &, std::vector &) { - auto alpha_buffer = std::vector{args.alpha}; tuner.AddArgumentScalar(static_cast(args.n)); - tuner.AddArgumentInput(alpha_buffer); + tuner.AddArgumentScalar(GetRealArg(args.alpha)); tuner.AddArgumentInput(x_vec); tuner.AddArgumentOutput(y_vec); } diff --git a/src/tuning/kernels/xgemm.cpp b/src/tuning/kernels/xgemm.cpp index 4b1efdef..898b8435 100644 --- a/src/tuning/kernels/xgemm.cpp +++ b/src/tuning/kernels/xgemm.cpp @@ -121,13 +121,11 @@ class TuneXgemm { std::vector &, std::vector &, std::vector &a_mat, std::vector &b_mat, std::vector &c_mat, std::vector &) { - auto alpha_buffer = std::vector{args.alpha}; - auto beta_buffer = std::vector{args.beta}; tuner.AddArgumentScalar(static_cast(args.m)); tuner.AddArgumentScalar(static_cast(args.n)); tuner.AddArgumentScalar(static_cast(args.k)); - tuner.AddArgumentInput(alpha_buffer); - tuner.AddArgumentInput(beta_buffer); + tuner.AddArgumentScalar(GetRealArg(args.alpha)); + tuner.AddArgumentScalar(GetRealArg(args.beta)); tuner.AddArgumentInput(a_mat); tuner.AddArgumentInput(b_mat); tuner.AddArgumentOutput(c_mat); diff --git a/src/tuning/kernels/xgemv.cpp b/src/tuning/kernels/xgemv.cpp index d42155ae..5c187d33 100644 --- a/src/tuning/kernels/xgemv.cpp +++ b/src/tuning/kernels/xgemv.cpp @@ -96,13 +96,11 @@ class TuneXgemv { std::vector &x_vec, std::vector &y_vec, std::vector &a_mat, std::vector &, std::vector &, std::vector &) { - auto alpha_buffer = std::vector{args.alpha}; - auto beta_buffer = std::vector{args.beta}; auto a_rotated = (V==3) ? 1 : 0; tuner.AddArgumentScalar(static_cast(args.m)); tuner.AddArgumentScalar(static_cast(args.n)); - tuner.AddArgumentInput(alpha_buffer); - tuner.AddArgumentInput(beta_buffer); + tuner.AddArgumentScalar(GetRealArg(args.alpha)); + tuner.AddArgumentScalar(GetRealArg(args.beta)); tuner.AddArgumentScalar(static_cast(a_rotated)); tuner.AddArgumentInput(a_mat); tuner.AddArgumentScalar(0); diff --git a/src/tuning/kernels/xger.cpp b/src/tuning/kernels/xger.cpp index d2590c53..1fb5c531 100644 --- a/src/tuning/kernels/xger.cpp +++ b/src/tuning/kernels/xger.cpp @@ -85,10 +85,9 @@ class TuneXger { std::vector &x_vec, std::vector &y_vec, std::vector &a_mat, std::vector &, std::vector &, std::vector &) { - auto alpha_buffer = std::vector{args.alpha}; tuner.AddArgumentScalar(static_cast(args.m)); tuner.AddArgumentScalar(static_cast(args.n)); - tuner.AddArgumentInput(alpha_buffer); + tuner.AddArgumentScalar(GetRealArg(args.alpha)); tuner.AddArgumentInput(x_vec); tuner.AddArgumentScalar(0); // x_offset tuner.AddArgumentScalar(1); // x_increment -- cgit v1.2.3