diff options
author | Cedric Nugteren <web@cedricnugteren.nl> | 2018-04-20 21:50:36 +0200 |
---|---|---|
committer | Cedric Nugteren <web@cedricnugteren.nl> | 2018-04-20 21:50:36 +0200 |
commit | 3e3a26e0da526de9f577c007a61d5ee7b5343e69 (patch) | |
tree | 1e52e666cd2430043a63c8f9f109460f6bd1cdaf /src/clblast_cuda.cpp | |
parent | 458e6717a9dbb515e2c09e9c02f8936f6afacff1 (diff) |
Fixes for the CUDA API
Diffstat (limited to 'src/clblast_cuda.cpp')
-rw-r--r-- | src/clblast_cuda.cpp | 3 |
1 files changed, 2 insertions, 1 deletions
diff --git a/src/clblast_cuda.cpp b/src/clblast_cuda.cpp index 0ba57056..8927014b 100644 --- a/src/clblast_cuda.cpp +++ b/src/clblast_cuda.cpp @@ -2593,7 +2593,8 @@ StatusCode GemmTempBufferSize(const Layout layout, const Transpose a_transpose, else { temp_buffer_size = Xgemm<T>::GetTempSize(layout, a_transpose, b_transpose, m, n, k, a_offset, a_ld, b_offset, b_ld, c_offset, c_ld, - db["MWG"], db["NWG"], db["KWG"]); + db["MWG"], db["NWG"], db["KWG"] * db["KREG"], + db["GEMMK"]); } temp_buffer_size *= sizeof(T); // translate from num-elements to bytes return StatusCode::kSuccess; |