summaryrefslogtreecommitdiff
path: root/src/kernels
diff options
context:
space:
mode:
authorCedric Nugteren <web@cedricnugteren.nl>2017-03-04 15:21:33 +0100
committerCedric Nugteren <web@cedricnugteren.nl>2017-03-04 15:21:33 +0100
commite993ee077b50d3a6134309d465a4174b5c749596 (patch)
treeb967f2702b90d8080a3e3cb41b9cbc01ab9eddc3 /src/kernels
parent3fc73851f7ed885335940eb85e53069638567323 (diff)
Added a proper data-preparation function for the TRSM tests
Diffstat (limited to 'src/kernels')
-rw-r--r--src/kernels/level3/invert_diagonal_blocks.opencl4
1 files changed, 3 insertions, 1 deletions
diff --git a/src/kernels/level3/invert_diagonal_blocks.opencl b/src/kernels/level3/invert_diagonal_blocks.opencl
index c59bcbcb..55f4a963 100644
--- a/src/kernels/level3/invert_diagonal_blocks.opencl
+++ b/src/kernels/level3/invert_diagonal_blocks.opencl
@@ -140,7 +140,9 @@ void InvertDiagonalBlock(int n, __global const real* restrict src, const int src
for (int k = j + 1; k < INTERNAL_BLOCK_SIZE; ++k) {
MultiplyAdd(sum, lm[thread_index][k], lm[k][j]);
}
- Multiply(lm[thread_index][j], -lm[j][j], sum);
+ real diagonal_value = lm[j][j];
+ Negate(diagonal_value);
+ Multiply(lm[thread_index][j], diagonal_value, sum);
}
barrier(CLK_LOCAL_MEM_FENCE);
}