summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/clblast.cpp59
-rw-r--r--src/clblast_c.cpp107
-rw-r--r--src/clpp11.hpp3
-rw-r--r--src/kernels/level1/xaxpy.opencl22
-rw-r--r--src/routines/levelx/xaxpybatched.cpp95
-rw-r--r--src/routines/levelx/xaxpybatched.hpp43
-rw-r--r--src/tuning/tuning.hpp15
-rw-r--r--src/utilities/utilities.cpp41
-rw-r--r--src/utilities/utilities.hpp14
9 files changed, 359 insertions, 40 deletions
diff --git a/src/clblast.cpp b/src/clblast.cpp
index a63d766c..d3db8edf 100644
--- a/src/clblast.cpp
+++ b/src/clblast.cpp
@@ -71,6 +71,7 @@
// Level-x includes (non-BLAS)
#include "routines/levelx/xomatcopy.hpp"
+#include "routines/levelx/xaxpybatched.hpp"
namespace clblast {
@@ -2172,6 +2173,64 @@ template StatusCode PUBLIC_API Omatcopy<half>(const Layout, const Transpose,
const cl_mem, const size_t, const size_t,
cl_mem, const size_t, const size_t,
cl_command_queue*, cl_event*);
+
+// Batched version of AXPY: SAXPYBATCHED/DAXPYBATCHED/CAXPYBATCHED/ZAXPYBATCHED/HAXPYBATCHED
+template <typename T>
+StatusCode AxpyBatched(const size_t n,
+ const T *alphas,
+ const cl_mem x_buffer, const size_t *x_offsets, const size_t x_inc,
+ cl_mem y_buffer, const size_t *y_offsets, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event) {
+ try {
+ auto queue_cpp = Queue(*queue);
+ auto routine = XaxpyBatched<T>(queue_cpp, event);
+ auto alphas_cpp = std::vector<T>();
+ auto x_offsets_cpp = std::vector<size_t>();
+ auto y_offsets_cpp = std::vector<size_t>();
+ for (auto batch = size_t{0}; batch < batch_count; ++batch) {
+ alphas_cpp.push_back(alphas[batch]);
+ x_offsets_cpp.push_back(x_offsets[batch]);
+ y_offsets_cpp.push_back(y_offsets[batch]);
+ }
+ routine.DoAxpyBatched(n,
+ alphas_cpp,
+ Buffer<T>(x_buffer), x_offsets_cpp, x_inc,
+ Buffer<T>(y_buffer), y_offsets_cpp, y_inc,
+ batch_count);
+ return StatusCode::kSuccess;
+ } catch (...) { return DispatchException(); }
+}
+template StatusCode PUBLIC_API AxpyBatched<float>(const size_t,
+ const float*,
+ const cl_mem, const size_t*, const size_t,
+ cl_mem, const size_t*, const size_t,
+ const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API AxpyBatched<double>(const size_t,
+ const double*,
+ const cl_mem, const size_t*, const size_t,
+ cl_mem, const size_t*, const size_t,
+ const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API AxpyBatched<float2>(const size_t,
+ const float2*,
+ const cl_mem, const size_t*, const size_t,
+ cl_mem, const size_t*, const size_t,
+ const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API AxpyBatched<double2>(const size_t,
+ const double2*,
+ const cl_mem, const size_t*, const size_t,
+ cl_mem, const size_t*, const size_t,
+ const size_t,
+ cl_command_queue*, cl_event*);
+template StatusCode PUBLIC_API AxpyBatched<half>(const size_t,
+ const half*,
+ const cl_mem, const size_t*, const size_t,
+ cl_mem, const size_t*, const size_t,
+ const size_t,
+ cl_command_queue*, cl_event*);
// =================================================================================================
// Clears the cache of stored binaries
diff --git a/src/clblast_c.cpp b/src/clblast_c.cpp
index 6018bcfa..b09f8c54 100644
--- a/src/clblast_c.cpp
+++ b/src/clblast_c.cpp
@@ -3447,6 +3447,113 @@ CLBlastStatusCode CLBlastHomatcopy(const CLBlastLayout layout, const CLBlastTran
} catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
}
+// AXPY
+CLBlastStatusCode CLBlastSaxpyBatched(const size_t n,
+ const float *alphas,
+ const cl_mem x_buffer, const size_t *x_offsets, const size_t x_inc,
+ cl_mem y_buffer, const size_t *y_offsets, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event) {
+ auto alphas_cpp = std::vector<float>();
+ for (auto batch = size_t{0}; batch < batch_count; ++batch) {
+ alphas_cpp.push_back(alphas[batch]);
+ }
+ try {
+ return static_cast<CLBlastStatusCode>(
+ clblast::AxpyBatched(n,
+ alphas_cpp.data(),
+ x_buffer, x_offsets, x_inc,
+ y_buffer, y_offsets, y_inc,
+ batch_count,
+ queue, event)
+ );
+ } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
+}
+CLBlastStatusCode CLBlastDaxpyBatched(const size_t n,
+ const double *alphas,
+ const cl_mem x_buffer, const size_t *x_offsets, const size_t x_inc,
+ cl_mem y_buffer, const size_t *y_offsets, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event) {
+ auto alphas_cpp = std::vector<double>();
+ for (auto batch = size_t{0}; batch < batch_count; ++batch) {
+ alphas_cpp.push_back(alphas[batch]);
+ }
+ try {
+ return static_cast<CLBlastStatusCode>(
+ clblast::AxpyBatched(n,
+ alphas_cpp.data(),
+ x_buffer, x_offsets, x_inc,
+ y_buffer, y_offsets, y_inc,
+ batch_count,
+ queue, event)
+ );
+ } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
+}
+CLBlastStatusCode CLBlastCaxpyBatched(const size_t n,
+ const cl_float2 *alphas,
+ const cl_mem x_buffer, const size_t *x_offsets, const size_t x_inc,
+ cl_mem y_buffer, const size_t *y_offsets, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event) {
+ auto alphas_cpp = std::vector<float2>();
+ for (auto batch = size_t{0}; batch < batch_count; ++batch) {
+ alphas_cpp.push_back(float2{alphas[batch].s[0], alphas[batch].s[1]});
+ }
+ try {
+ return static_cast<CLBlastStatusCode>(
+ clblast::AxpyBatched(n,
+ alphas_cpp.data(),
+ x_buffer, x_offsets, x_inc,
+ y_buffer, y_offsets, y_inc,
+ batch_count,
+ queue, event)
+ );
+ } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
+}
+CLBlastStatusCode CLBlastZaxpyBatched(const size_t n,
+ const cl_double2 *alphas,
+ const cl_mem x_buffer, const size_t *x_offsets, const size_t x_inc,
+ cl_mem y_buffer, const size_t *y_offsets, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event) {
+ auto alphas_cpp = std::vector<double2>();
+ for (auto batch = size_t{0}; batch < batch_count; ++batch) {
+ alphas_cpp.push_back(double2{alphas[batch].s[0], alphas[batch].s[1]});
+ }
+ try {
+ return static_cast<CLBlastStatusCode>(
+ clblast::AxpyBatched(n,
+ alphas_cpp.data(),
+ x_buffer, x_offsets, x_inc,
+ y_buffer, y_offsets, y_inc,
+ batch_count,
+ queue, event)
+ );
+ } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
+}
+CLBlastStatusCode CLBlastHaxpyBatched(const size_t n,
+ const cl_half *alphas,
+ const cl_mem x_buffer, const size_t *x_offsets, const size_t x_inc,
+ cl_mem y_buffer, const size_t *y_offsets, const size_t y_inc,
+ const size_t batch_count,
+ cl_command_queue* queue, cl_event* event) {
+ auto alphas_cpp = std::vector<half>();
+ for (auto batch = size_t{0}; batch < batch_count; ++batch) {
+ alphas_cpp.push_back(alphas[batch]);
+ }
+ try {
+ return static_cast<CLBlastStatusCode>(
+ clblast::AxpyBatched(n,
+ alphas_cpp.data(),
+ x_buffer, x_offsets, x_inc,
+ y_buffer, y_offsets, y_inc,
+ batch_count,
+ queue, event)
+ );
+ } catch (...) { return static_cast<CLBlastStatusCode>(clblast::DispatchExceptionForC()); }
+}
+
// =================================================================================================
// Clears the cache of stored binaries
diff --git a/src/clpp11.hpp b/src/clpp11.hpp
index 41af28da..29f81cf8 100644
--- a/src/clpp11.hpp
+++ b/src/clpp11.hpp
@@ -600,9 +600,6 @@ class Buffer {
// Copies from host to device: writing the device buffer a-synchronously
void WriteAsync(const Queue &queue, const size_t size, const T* host, const size_t offset = 0) {
- if (access_ == BufferAccess::kReadOnly) {
- throw LogicError("Buffer: writing to a read-only buffer");
- }
if (GetSize() < (offset+size)*sizeof(T)) {
throw LogicError("Buffer: target device buffer is too small");
}
diff --git a/src/kernels/level1/xaxpy.opencl b/src/kernels/level1/xaxpy.opencl
index ece8476e..f44bbce0 100644
--- a/src/kernels/level1/xaxpy.opencl
+++ b/src/kernels/level1/xaxpy.opencl
@@ -9,7 +9,7 @@
//
// This file contains the Xaxpy kernel. It contains one fast vectorized version in case of unit
// strides (incx=incy=1) and no offsets (offx=offy=0). Another version is more general, but doesn't
-// support vector data-types.
+// support vector data-types. The general version has a batched implementation as well.
//
// This kernel uses the level-1 BLAS common tuning parameters.
//
@@ -36,8 +36,6 @@ void Xaxpy(const int n, const real_arg arg_alpha,
}
}
-// =================================================================================================
-
// Faster version of the kernel without offsets and strided accesses. Also assumes that 'n' is
// dividable by 'VW', 'WGS' and 'WPT'.
__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
@@ -57,6 +55,24 @@ void XaxpyFast(const int n, const real_arg arg_alpha,
// =================================================================================================
+// Full version of the kernel with offsets and strided accesses: batched version
+__kernel __attribute__((reqd_work_group_size(WGS, 1, 1)))
+void XaxpyBatched(const int n, const __constant real_arg* arg_alphas,
+ const __global real* restrict xgm, const __constant int* x_offsets, const int x_inc,
+ __global real* ygm, const __constant int* y_offsets, const int y_inc) {
+ const int batch = get_group_id(1);
+ const real alpha = GetRealArg(arg_alphas[batch]);
+
+ // Loops over the work that needs to be done (allows for an arbitrary number of threads)
+ #pragma unroll
+ for (int id = get_global_id(0); id<n; id += get_global_size(0)) {
+ real xvalue = xgm[id*x_inc + x_offsets[batch]];
+ MultiplyAdd(ygm[id*y_inc + y_offsets[batch]], alpha, xvalue);
+ }
+}
+
+// =================================================================================================
+
// End of the C++11 raw string literal
)"
diff --git a/src/routines/levelx/xaxpybatched.cpp b/src/routines/levelx/xaxpybatched.cpp
new file mode 100644
index 00000000..6a4269be
--- /dev/null
+++ b/src/routines/levelx/xaxpybatched.cpp
@@ -0,0 +1,95 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the XaxpyBatched class (see the header for information about the class).
+//
+// =================================================================================================
+
+#include "routines/levelx/xaxpybatched.hpp"
+
+#include <string>
+#include <vector>
+
+namespace clblast {
+// =================================================================================================
+
+// Constructor: forwards to base class constructor
+template <typename T>
+XaxpyBatched<T>::XaxpyBatched(Queue &queue, EventPointer event, const std::string &name):
+ Routine(queue, event, name, {"Xaxpy"}, PrecisionValue<T>(), {}, {
+ #include "../../kernels/level1/level1.opencl"
+ #include "../../kernels/level1/xaxpy.opencl"
+ }) {
+}
+
+// =================================================================================================
+
+// The main routine
+template <typename T>
+void XaxpyBatched<T>::DoAxpyBatched(const size_t n, const std::vector<T> &alphas,
+ const Buffer<T> &x_buffer, const std::vector<size_t> &x_offsets, const size_t x_inc,
+ const Buffer<T> &y_buffer, const std::vector<size_t> &y_offsets, const size_t y_inc,
+ const size_t batch_count) {
+
+ // Tests for a valid batch count
+ if ((batch_count < 1) || (alphas.size() != batch_count) ||
+ (x_offsets.size() != batch_count) || (y_offsets.size() != batch_count)) {
+ throw BLASError(StatusCode::kInvalidBatchCount);
+ }
+
+ // Makes sure all dimensions are larger than zero
+ if (n == 0) { throw BLASError(StatusCode::kInvalidDimension); }
+
+ // Tests the vectors for validity
+ for (auto batch = size_t{0}; batch < batch_count; ++batch) {
+ TestVectorX(n, x_buffer, x_offsets[batch], x_inc);
+ TestVectorY(n, y_buffer, y_offsets[batch], y_inc);
+ }
+
+ // Upload the arguments to the device
+ std::vector<int> x_offsets_int(x_offsets.begin(), x_offsets.end());
+ std::vector<int> y_offsets_int(y_offsets.begin(), y_offsets.end());
+ auto x_offsets_device = Buffer<int>(context_, BufferAccess::kReadOnly, batch_count);
+ auto y_offsets_device = Buffer<int>(context_, BufferAccess::kReadOnly, batch_count);
+ auto alphas_device = Buffer<T>(context_, BufferAccess::kReadOnly, batch_count);
+ x_offsets_device.Write(queue_, batch_count, x_offsets_int);
+ y_offsets_device.Write(queue_, batch_count, y_offsets_int);
+ alphas_device.Write(queue_, batch_count, alphas);
+
+ // Retrieves the Xaxpy kernel from the compiled binary
+ auto kernel = Kernel(program_, "XaxpyBatched");
+
+ // Sets the kernel arguments
+ kernel.SetArgument(0, static_cast<int>(n));
+ kernel.SetArgument(1, alphas_device());
+ kernel.SetArgument(2, x_buffer());
+ kernel.SetArgument(3, x_offsets_device());
+ kernel.SetArgument(4, static_cast<int>(x_inc));
+ kernel.SetArgument(5, y_buffer());
+ kernel.SetArgument(6, y_offsets_device());
+ kernel.SetArgument(7, static_cast<int>(y_inc));
+
+ // Launches the kernel
+ auto n_ceiled = Ceil(n, db_["WGS"]*db_["WPT"]);
+ auto global = std::vector<size_t>{n_ceiled/db_["WPT"], batch_count};
+ auto local = std::vector<size_t>{db_["WGS"], 1};
+ RunKernel(kernel, queue_, device_, global, local, event_);
+}
+
+// =================================================================================================
+
+// Compiles the templated class
+template class XaxpyBatched<half>;
+template class XaxpyBatched<float>;
+template class XaxpyBatched<double>;
+template class XaxpyBatched<float2>;
+template class XaxpyBatched<double2>;
+
+// =================================================================================================
+} // namespace clblast
diff --git a/src/routines/levelx/xaxpybatched.hpp b/src/routines/levelx/xaxpybatched.hpp
new file mode 100644
index 00000000..513792ea
--- /dev/null
+++ b/src/routines/levelx/xaxpybatched.hpp
@@ -0,0 +1,43 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the XaxpyBatched routine. This is a non-blas batched version of AXPY.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XAXPYBATCHED_H_
+#define CLBLAST_ROUTINES_XAXPYBATCHED_H_
+
+#include <vector>
+
+#include "routine.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class XaxpyBatched: public Routine {
+ public:
+
+ // Constructor
+ XaxpyBatched(Queue &queue, EventPointer event, const std::string &name = "AXPYBATCHED");
+
+ // Templated-precision implementation of the routine
+ void DoAxpyBatched(const size_t n, const std::vector<T> &alphas,
+ const Buffer<T> &x_buffer, const std::vector<size_t> &x_offsets, const size_t x_inc,
+ const Buffer<T> &y_buffer, const std::vector<size_t> &y_offsets, const size_t y_inc,
+ const size_t batch_count);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XAXPYBATCHED_H_
+#endif
diff --git a/src/tuning/tuning.hpp b/src/tuning/tuning.hpp
index 1dd76894..7060fc9f 100644
--- a/src/tuning/tuning.hpp
+++ b/src/tuning/tuning.hpp
@@ -17,6 +17,7 @@
#include <vector>
#include <string>
+#include <random>
#include <cltune.h>
@@ -77,12 +78,14 @@ void Tuner(int argc, char* argv[]) {
auto b_mat = std::vector<T>(C::GetSizeB(args));
auto c_mat = std::vector<T>(C::GetSizeC(args));
auto temp = std::vector<T>(C::GetSizeTemp(args));
- PopulateVector(x_vec, kSeed);
- PopulateVector(y_vec, kSeed);
- PopulateVector(a_mat, kSeed);
- PopulateVector(b_mat, kSeed);
- PopulateVector(c_mat, kSeed);
- PopulateVector(temp, kSeed);
+ std::mt19937 mt(kSeed);
+ std::uniform_real_distribution<double> dist(kTestDataLowerLimit, kTestDataUpperLimit);
+ PopulateVector(x_vec, mt, dist);
+ PopulateVector(y_vec, mt, dist);
+ PopulateVector(a_mat, mt, dist);
+ PopulateVector(b_mat, mt, dist);
+ PopulateVector(c_mat, mt, dist);
+ PopulateVector(temp, mt, dist);
// Initializes the tuner for the chosen device
cltune::Tuner tuner(args.platform_id, args.device_id);
diff --git a/src/utilities/utilities.cpp b/src/utilities/utilities.cpp
index d68cc1a6..3d091b64 100644
--- a/src/utilities/utilities.cpp
+++ b/src/utilities/utilities.cpp
@@ -67,8 +67,8 @@ template <> double2 Constant(const double val) { return {val, 0.0}; }
template <typename T> T SmallConstant() { return static_cast<T>(1e-4); }
template float SmallConstant<float>();
template double SmallConstant<double>();
-template <> half SmallConstant() { return FloatToHalf(1e-4); }
-template <> float2 SmallConstant() { return {1e-4, 0.0f}; }
+template <> half SmallConstant() { return FloatToHalf(1e-4f); }
+template <> float2 SmallConstant() { return {1e-4f, 0.0f}; }
template <> double2 SmallConstant() { return {1e-4, 0.0}; }
// Returns the absolute value of a scalar (modulus in case of a complex number)
@@ -326,42 +326,29 @@ unsigned int GetRandomSeed() {
// Create a random number generator and populates a vector with samples from a random distribution
template <typename T>
-void PopulateVector(std::vector<T> &vector, const unsigned int seed) {
- auto lower_limit = static_cast<T>(kTestDataLowerLimit);
- auto upper_limit = static_cast<T>(kTestDataUpperLimit);
- std::mt19937 mt(seed);
- std::uniform_real_distribution<T> dist(lower_limit, upper_limit);
- for (auto &element: vector) { element = dist(mt); }
+void PopulateVector(std::vector<T> &vector, std::mt19937 &mt, std::uniform_real_distribution<double> &dist) {
+ for (auto &element: vector) { element = static_cast<T>(dist(mt)); }
}
-template void PopulateVector<float>(std::vector<float>&, const unsigned int);
-template void PopulateVector<double>(std::vector<double>&, const unsigned int);
+template void PopulateVector<float>(std::vector<float>&, std::mt19937&, std::uniform_real_distribution<double>&);
+template void PopulateVector<double>(std::vector<double>&, std::mt19937&, std::uniform_real_distribution<double>&);
// Specialized versions of the above for complex data-types
template <>
-void PopulateVector(std::vector<float2> &vector, const unsigned int seed) {
- auto lower_limit = static_cast<float>(kTestDataLowerLimit);
- auto upper_limit = static_cast<float>(kTestDataUpperLimit);
- std::mt19937 mt(seed);
- std::uniform_real_distribution<float> dist(lower_limit, upper_limit);
- for (auto &element: vector) { element.real(dist(mt)); element.imag(dist(mt)); }
+void PopulateVector(std::vector<float2> &vector, std::mt19937 &mt, std::uniform_real_distribution<double> &dist) {
+ for (auto &element: vector) {
+ element.real(static_cast<float>(dist(mt)));
+ element.imag(static_cast<float>(dist(mt)));
+ }
}
template <>
-void PopulateVector(std::vector<double2> &vector, const unsigned int seed) {
- auto lower_limit = static_cast<double>(kTestDataLowerLimit);
- auto upper_limit = static_cast<double>(kTestDataUpperLimit);
- std::mt19937 mt(seed);
- std::uniform_real_distribution<double> dist(lower_limit, upper_limit);
+void PopulateVector(std::vector<double2> &vector, std::mt19937 &mt, std::uniform_real_distribution<double> &dist) {
for (auto &element: vector) { element.real(dist(mt)); element.imag(dist(mt)); }
}
// Specialized versions of the above for half-precision
template <>
-void PopulateVector(std::vector<half> &vector, const unsigned int seed) {
- const auto lower_limit = static_cast<float>(kTestDataLowerLimit);
- const auto upper_limit = static_cast<float>(kTestDataUpperLimit);
- std::mt19937 mt(seed);
- std::uniform_real_distribution<float> dist(lower_limit, upper_limit);
- for (auto &element: vector) { element = FloatToHalf(dist(mt)); }
+void PopulateVector(std::vector<half> &vector, std::mt19937 &mt, std::uniform_real_distribution<double> &dist) {
+ for (auto &element: vector) { element = FloatToHalf(static_cast<float>(dist(mt))); }
}
// =================================================================================================
diff --git a/src/utilities/utilities.hpp b/src/utilities/utilities.hpp
index 3c9be6a2..b3db8c22 100644
--- a/src/utilities/utilities.hpp
+++ b/src/utilities/utilities.hpp
@@ -20,6 +20,7 @@
#include <string>
#include <functional>
#include <complex>
+#include <random>
#include "clpp11.hpp"
#include "clblast.h"
@@ -72,6 +73,7 @@ constexpr auto kArgAsumOffset = "offasum";
constexpr auto kArgImaxOffset = "offimax";
constexpr auto kArgAlpha = "alpha";
constexpr auto kArgBeta = "beta";
+constexpr auto kArgBatchCount = "batch_num";
// The tuner-specific arguments in string form
constexpr auto kArgFraction = "fraction";
@@ -155,6 +157,16 @@ struct Arguments {
size_t imax_offset = 0;
T alpha = ConstantOne<T>();
T beta = ConstantOne<T>();
+ // Batch-specific arguments
+ size_t batch_count = 1;
+ std::vector<size_t> x_offsets = {0};
+ std::vector<size_t> y_offsets = {0};
+ std::vector<size_t> a_offsets = {0};
+ std::vector<size_t> b_offsets = {0};
+ std::vector<size_t> c_offsets = {0};
+ std::vector<T> alphas = {ConstantOne<T>()};
+ std::vector<T> betas = {ConstantOne<T>()};
+ // Sizes
size_t x_size = 1;
size_t y_size = 1;
size_t a_size = 1;
@@ -234,7 +246,7 @@ constexpr auto kTestDataUpperLimit = 2.0;
// Populates a vector with random data
template <typename T>
-void PopulateVector(std::vector<T> &vector, const unsigned int seed);
+void PopulateVector(std::vector<T> &vector, std::mt19937 &mt, std::uniform_real_distribution<double> &dist);
// =================================================================================================