summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2018-04-20 21:50:36 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2018-04-20 21:50:36 +0200
commit3e3a26e0da526de9f577c007a61d5ee7b5343e69 (patch)
tree1e52e666cd2430043a63c8f9f109460f6bd1cdaf
parent458e6717a9dbb515e2c09e9c02f8936f6afacff1 (diff)
Fixes for the CUDA API
-rw-r--r--CMakeLists.txt3
-rw-r--r--src/clblast_cuda.cpp3
2 files changed, 3 insertions, 3 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index eb04287e..0715b866 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -236,7 +236,6 @@ set(SOURCES
src/routine.cpp
src/routines/levelx/xinvert.cpp # only source, don't include it as a test
src/tuning/configurations.cpp
- src/tuning/tuning_api.cpp
)
set(HEADERS # such that they can be discovered by IDEs such as CLion and Visual Studio
include/clblast_half.h
@@ -265,7 +264,7 @@ set(HEADERS # such that they can be discovered by IDEs such as CLion and Visual
src/tuning/routines/routine_tuner.hpp
)
if(OPENCL)
- set(SOURCES ${SOURCES} src/clblast.cpp src/clblast_c.cpp)
+ set(SOURCES ${SOURCES} src/clblast.cpp src/clblast_c.cpp src/tuning/tuning_api.cpp)
set(HEADERS ${HEADERS} include/clblast.h include/clblast_c.h src/clpp11.hpp)
if(NETLIB)
set(SOURCES ${SOURCES} src/clblast_netlib_c.cpp)
diff --git a/src/clblast_cuda.cpp b/src/clblast_cuda.cpp
index 0ba57056..8927014b 100644
--- a/src/clblast_cuda.cpp
+++ b/src/clblast_cuda.cpp
@@ -2593,7 +2593,8 @@ StatusCode GemmTempBufferSize(const Layout layout, const Transpose a_transpose,
else {
temp_buffer_size = Xgemm<T>::GetTempSize(layout, a_transpose, b_transpose, m, n, k,
a_offset, a_ld, b_offset, b_ld, c_offset, c_ld,
- db["MWG"], db["NWG"], db["KWG"]);
+ db["MWG"], db["NWG"], db["KWG"] * db["KREG"],
+ db["GEMMK"]);
}
temp_buffer_size *= sizeof(T); // translate from num-elements to bytes
return StatusCode::kSuccess;