From 3e3a26e0da526de9f577c007a61d5ee7b5343e69 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Fri, 20 Apr 2018 21:50:36 +0200 Subject: Fixes for the CUDA API --- CMakeLists.txt | 3 +-- src/clblast_cuda.cpp | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index eb04287e..0715b866 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -236,7 +236,6 @@ set(SOURCES src/routine.cpp src/routines/levelx/xinvert.cpp # only source, don't include it as a test src/tuning/configurations.cpp - src/tuning/tuning_api.cpp ) set(HEADERS # such that they can be discovered by IDEs such as CLion and Visual Studio include/clblast_half.h @@ -265,7 +264,7 @@ set(HEADERS # such that they can be discovered by IDEs such as CLion and Visual src/tuning/routines/routine_tuner.hpp ) if(OPENCL) - set(SOURCES ${SOURCES} src/clblast.cpp src/clblast_c.cpp) + set(SOURCES ${SOURCES} src/clblast.cpp src/clblast_c.cpp src/tuning/tuning_api.cpp) set(HEADERS ${HEADERS} include/clblast.h include/clblast_c.h src/clpp11.hpp) if(NETLIB) set(SOURCES ${SOURCES} src/clblast_netlib_c.cpp) diff --git a/src/clblast_cuda.cpp b/src/clblast_cuda.cpp index 0ba57056..8927014b 100644 --- a/src/clblast_cuda.cpp +++ b/src/clblast_cuda.cpp @@ -2593,7 +2593,8 @@ StatusCode GemmTempBufferSize(const Layout layout, const Transpose a_transpose, else { temp_buffer_size = Xgemm::GetTempSize(layout, a_transpose, b_transpose, m, n, k, a_offset, a_ld, b_offset, b_ld, c_offset, c_ld, - db["MWG"], db["NWG"], db["KWG"]); + db["MWG"], db["NWG"], db["KWG"] * db["KREG"], + db["GEMMK"]); } temp_buffer_size *= sizeof(T); // translate from num-elements to bytes return StatusCode::kSuccess; -- cgit v1.2.3