summaryrefslogtreecommitdiff
path: root/src/routines/level2
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2016-07-23 10:20:11 +0200
committerCedric Nugteren <web@cedricnugteren.nl>2016-07-23 10:20:11 +0200
commit75fe8235f78520fbbfff7c9c035ecd9f1aa3e6f6 (patch)
treed49e584504b8192f250d96227aa3be01fbac2c92 /src/routines/level2
parentb33bec4a59d9d4d0b2e6a3d7e5f1d6e23d4279cb (diff)
Improved the XgemvFastRot kernel by tiled loading of the input matrix A, enabling better memory performance
Diffstat (limited to 'src/routines/level2')
-rw-r--r--src/routines/level2/xgemv.cpp2
1 files changed, 1 insertions, 1 deletions
diff --git a/src/routines/level2/xgemv.cpp b/src/routines/level2/xgemv.cpp
index 2842ef07..e4d407c8 100644
--- a/src/routines/level2/xgemv.cpp
+++ b/src/routines/level2/xgemv.cpp
@@ -122,7 +122,7 @@ StatusCode Xgemv<T>::MatVec(const Layout layout, const Transpose a_transpose,
}
if (fast_kernel_rot) {
kernel_name = "XgemvFastRot";
- global_size = m_real / db_["WPT3"];
+ global_size = m_real;
local_size = db_["WGS3"];
}