From 93ffb876c60838bee75d3bb25ebbcbfce02e2cc7 Mon Sep 17 00:00:00 2001 From: Cedric Nugteren Date: Wed, 29 Nov 2017 20:21:08 +0100 Subject: Reformatted unrollable kernel loops and added the new promote_to_registers pragma for several kernels --- src/kernels/level2/xgemv.opencl | 34 ++++---- src/kernels/level2/xgemv_fast.opencl | 155 ++++++++++++++++++----------------- src/kernels/level2/xger.opencl | 46 ++++++----- 3 files changed, 119 insertions(+), 116 deletions(-) (limited to 'src/kernels/level2') diff --git a/src/kernels/level2/xgemv.opencl b/src/kernels/level2/xgemv.opencl index ea0478f0..2a50e8fb 100644 --- a/src/kernels/level2/xgemv.opencl +++ b/src/kernels/level2/xgemv.opencl @@ -227,10 +227,11 @@ void Xgemv(const int m, const int n, __local real xlm[WGS1]; // Initializes the accumulation register + #pragma promote_to_registers real acc[WPT1]; #pragma unroll - for (int w=0; w