// ================================================================================================= // This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This // project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max- // width of 100 characters per line. // // Author(s): // Cedric Nugteren // // This file implements the test utility functions. // // ================================================================================================= #include #include #include #include #include "test/test_utilities.hpp" namespace clblast { // ================================================================================================= // Returns whether a scalar is close to zero template bool IsCloseToZero(const T value) { return (value > -SmallConstant()) && (value < SmallConstant()); } template bool IsCloseToZero(const float); template bool IsCloseToZero(const double); template <> bool IsCloseToZero(const half value) { return IsCloseToZero(HalfToFloat(value)); } template <> bool IsCloseToZero(const float2 value) { return IsCloseToZero(value.real()) || IsCloseToZero(value.imag()); } template <> bool IsCloseToZero(const double2 value) { return IsCloseToZero(value.real()) || IsCloseToZero(value.imag()); } // ================================================================================================= // Performs a complex conjugate if complex template T ComplexConjugate(const T value) { return value; } template half ComplexConjugate(const half); template float ComplexConjugate(const float); template double ComplexConjugate(const double); template <> float2 ComplexConjugate(const float2 value) { return float2{value.real(), -value.imag()}; } template <> double2 ComplexConjugate(const double2 value) { return double2{value.real(), -value.imag()}; } // ================================================================================================= template void DeviceToHost(const Arguments &args, Buffers &buffers, BuffersHost &buffers_host, Queue &queue, const std::vector &names) { for (auto &name: names) { if (name == kBufVecX) {buffers_host.x_vec = std::vector(args.x_size, static_cast(0)); buffers.x_vec.Read(queue, args.x_size, buffers_host.x_vec); } else if (name == kBufVecY) { buffers_host.y_vec = std::vector(args.y_size, static_cast(0)); buffers.y_vec.Read(queue, args.y_size, buffers_host.y_vec); } else if (name == kBufMatA) { buffers_host.a_mat = std::vector(args.a_size, static_cast(0)); buffers.a_mat.Read(queue, args.a_size, buffers_host.a_mat); } else if (name == kBufMatB) { buffers_host.b_mat = std::vector(args.b_size, static_cast(0)); buffers.b_mat.Read(queue, args.b_size, buffers_host.b_mat); } else if (name == kBufMatC) { buffers_host.c_mat = std::vector(args.c_size, static_cast(0)); buffers.c_mat.Read(queue, args.c_size, buffers_host.c_mat); } else if (name == kBufMatAP) { buffers_host.ap_mat = std::vector(args.ap_size, static_cast(0)); buffers.ap_mat.Read(queue, args.ap_size, buffers_host.ap_mat); } else if (name == kBufScalar) { buffers_host.scalar = std::vector(args.scalar_size, static_cast(0)); buffers.scalar.Read(queue, args.scalar_size, buffers_host.scalar); } else if (name == kBufScalarUint) { buffers_host.scalar_uint = std::vector(args.scalar_size, 0); buffers.scalar_uint.Read(queue, args.scalar_size, buffers_host.scalar_uint); } else { throw std::runtime_error("Invalid buffer name"); } } } template void HostToDevice(const Arguments &args, Buffers &buffers, BuffersHost &buffers_host, Queue &queue, const std::vector &names) { for (auto &name: names) { if (name == kBufVecX) { buffers.x_vec.Write(queue, args.x_size, buffers_host.x_vec); } else if (name == kBufVecY) { buffers.y_vec.Write(queue, args.y_size, buffers_host.y_vec); } else if (name == kBufMatA) { buffers.a_mat.Write(queue, args.a_size, buffers_host.a_mat); } else if (name == kBufMatB) { buffers.b_mat.Write(queue, args.b_size, buffers_host.b_mat); } else if (name == kBufMatC) { buffers.c_mat.Write(queue, args.c_size, buffers_host.c_mat); } else if (name == kBufMatAP) { buffers.ap_mat.Write(queue, args.ap_size, buffers_host.ap_mat); } else if (name == kBufScalar) { buffers.scalar.Write(queue, args.scalar_size, buffers_host.scalar); } else if (name == kBufScalarUint) { buffers.scalar_uint.Write(queue, args.scalar_size, buffers_host.scalar_uint); } else { throw std::runtime_error("Invalid buffer name"); } } } // Compiles the above functions template void DeviceToHost(const Arguments&, Buffers&, BuffersHost&, Queue&, const std::vector&); template void DeviceToHost(const Arguments&, Buffers&, BuffersHost&, Queue&, const std::vector&); template void DeviceToHost(const Arguments&, Buffers&, BuffersHost&, Queue&, const std::vector&); template void DeviceToHost(const Arguments&, Buffers&, BuffersHost&, Queue&, const std::vector&); template void DeviceToHost(const Arguments&, Buffers&, BuffersHost&, Queue&, const std::vector&); template void DeviceToHost(const Arguments&, Buffers&, BuffersHost&, Queue&, const std::vector&); template void DeviceToHost(const Arguments&, Buffers&, BuffersHost&, Queue&, const std::vector&); template void HostToDevice(const Arguments&, Buffers&, BuffersHost&, Queue&, const std::vector&); template void HostToDevice(const Arguments&, Buffers&, BuffersHost&, Queue&, const std::vector&); template void HostToDevice(const Arguments&, Buffers&, BuffersHost&, Queue&, const std::vector&); template void HostToDevice(const Arguments&, Buffers&, BuffersHost&, Queue&, const std::vector&); template void HostToDevice(const Arguments&, Buffers&, BuffersHost&, Queue&, const std::vector&); template void HostToDevice(const Arguments&, Buffers&, BuffersHost&, Queue&, const std::vector&); template void HostToDevice(const Arguments&, Buffers&, BuffersHost&, Queue&, const std::vector&); // ================================================================================================= // Conversion between half and single-precision std::vector HalfToFloatBuffer(const std::vector& source) { auto result = std::vector(source.size()); for (auto i = size_t(0); i < source.size(); ++i) { result[i] = HalfToFloat(source[i]); } return result; } void FloatToHalfBuffer(std::vector& result, const std::vector& source) { for (auto i = size_t(0); i < source.size(); ++i) { result[i] = FloatToHalf(source[i]); } } // As above, but now for OpenCL data-types instead of std::vectors #ifdef OPENCL_API Buffer HalfToFloatBuffer(const Buffer& source, RawCommandQueue queue_raw) { const auto size = source.GetSize() / sizeof(half); auto queue = Queue(queue_raw); auto context = queue.GetContext(); auto source_cpu = std::vector(size); source.Read(queue, size, source_cpu); auto result_cpu = HalfToFloatBuffer(source_cpu); auto result = Buffer(context, size); result.Write(queue, size, result_cpu); return result; } void FloatToHalfBuffer(Buffer& result, const Buffer& source, RawCommandQueue queue_raw) { const auto size = source.GetSize() / sizeof(float); auto queue = Queue(queue_raw); auto context = queue.GetContext(); auto source_cpu = std::vector(size); source.Read(queue, size, source_cpu); auto result_cpu = std::vector(size); FloatToHalfBuffer(result_cpu, source_cpu); result.Write(queue, size, result_cpu); } #endif // ================================================================================================= void OverrideParametersFromJSONFiles(const std::vector& file_names, const RawDeviceID device, const Precision precision) { // Retrieves the best parameters for each file from disk BestParametersCollection all_parameters; for (const auto& json_file_name : file_names) { GetBestParametersFromJSONFile(json_file_name, all_parameters, precision); } // Applies the parameter override for (const auto &best_parameters : all_parameters) { const auto kernel_family = best_parameters.first; const auto parameters = best_parameters.second; const auto status = OverrideParameters(device, kernel_family, precision, parameters); if (status == StatusCode::kSuccess) { fprintf(stdout, "* Applying parameter override successfully for '%s'\n", kernel_family.c_str()); } else { fprintf(stdout, "* Error while applying parameter override for '%s'\n", kernel_family.c_str()); } } if (file_names.size() > 0) { fprintf(stdout, "\n"); } } void GetBestParametersFromJSONFile(const std::string& file_name, BestParametersCollection& all_parameters, const Precision precision) { std::ifstream json_file(file_name); if (!json_file) { fprintf(stdout, "* Could not open file '%s'\n", file_name.c_str()); return; } fprintf(stdout, "* Reading override-parameters from '%s'\n", file_name.c_str()); std::string line; auto kernel_family = std::string{}; while (std::getline(json_file, line)) { const auto line_split = split(line, ':'); if (line_split.size() != 2) { continue; } // Retrieves the kernel name if (line_split[0] == " \"kernel_family\"") { const auto value_split = split(line_split[1], '\"'); if (value_split.size() != 3) { break; } kernel_family = value_split[1]; kernel_family[0] = toupper(kernel_family[0]); // because of a tuner - database naming mismatch kernel_family.erase(std::remove(kernel_family.begin(), kernel_family.end(), '_'), kernel_family.end()); kernel_family.erase(std::remove(kernel_family.begin(), kernel_family.end(), '1'), kernel_family.end()); kernel_family.erase(std::remove(kernel_family.begin(), kernel_family.end(), '2'), kernel_family.end()); kernel_family.erase(std::remove(kernel_family.begin(), kernel_family.end(), '3'), kernel_family.end()); if (kernel_family == "Xgemmdirect") { kernel_family = "XgemmDirect"; } // more kinds of mismatches } // Retrieves the best-parameters and sets the override if (line_split[0] == " \"best_parameters\"" && kernel_family != std::string{""}) { const auto value_split = split(line_split[1], '\"'); if (value_split.size() != 3) { break; } const auto config_split = split(value_split[1], ' '); if (config_split.size() == 0) { break; } // Loads an existing list of parameters for this kernel family (if present) BestParameters parameters; if (all_parameters.count(kernel_family) == 1) { parameters = all_parameters.at(kernel_family); } // Creates the list of parameters fprintf(stdout, "* Found parameters for kernel '%s': { ", kernel_family.c_str()); for (const auto& config : config_split) { const auto params_split = split(config, '='); if (params_split.size() != 2) { break; } const auto parameter_name = params_split[0]; const auto parameter_value = static_cast(std::stoi(params_split[1].c_str())); if (parameter_name != "PRECISION") { printf("%s=%zu ", parameter_name.c_str(), parameter_value); parameters[parameter_name] = parameter_value; } else { if (static_cast(precision) != parameter_value) { fprintf(stdout, "ERROR! }\n"); fprintf(stdout, "* Precision is not matching, continuing\n"); json_file.close(); return; } } } fprintf(stdout, "}\n"); // Sets the new (possibly extended) parameter map as the final result all_parameters[kernel_family] = parameters; json_file.close(); return; } } // Ends this function (failure) fprintf(stdout, "* Failed to extract parameters from this file, continuing\n"); json_file.close(); } // ================================================================================================= } // namespace clblast