summaryrefslogtreecommitdiff
path: root/external/clBLAS/src/library/blas/gens/clTemplates/iamax.cl
diff options
context:
space:
mode:
Diffstat (limited to 'external/clBLAS/src/library/blas/gens/clTemplates/iamax.cl')
-rw-r--r--external/clBLAS/src/library/blas/gens/clTemplates/iamax.cl108
1 files changed, 0 insertions, 108 deletions
diff --git a/external/clBLAS/src/library/blas/gens/clTemplates/iamax.cl b/external/clBLAS/src/library/blas/gens/clTemplates/iamax.cl
deleted file mode 100644
index 9152ccb1..00000000
--- a/external/clBLAS/src/library/blas/gens/clTemplates/iamax.cl
+++ /dev/null
@@ -1,108 +0,0 @@
-/* ************************************************************************
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ************************************************************************/
-
-static const char *iamax_kernel = "
-#pragma OPENCL EXTENSION cl_amd_printf:enable
-#ifdef DOUBLE_PRECISION
- #ifdef cl_khr_fp64
- #pragma OPENCL EXTENSION cl_khr_fp64 : enable
- #else
- #pragma OPENCL EXTENSION cl_amd_fp64 : enable
- #endif
- #define MIN 0x1.0p-1022 // Min in case of d/z (values from khronos site)
-#else
- #define MIN 0x1.0p-126f // Min in case od s/c
-#endif
-/******************************************************
- * Implementations available for REDUCTION_BY_MAX
- 0 - ATOMIC_FLI
- 1 - REG_FLI,
- 2 - ATOMIC_FHI,
- 3 - REG_FHI
-
- Implementation available for REDUCE_MAX
- 0 - FHI
- 1 - FLI
- ***************************************************/
-
-__kernel void i%PREFIXamax_kernel( __global %TYPE *_X, __global %PTYPE *_scratchBuf,
- uint N, uint offx, int incx)
-{
- __global %TYPE *X = _X + offx;
- __global %PTYPE *scratchBufVal = _scratchBuf;
- int numGrps = get_num_groups(0);
- __global uint *scratchBufIndex = (__global uint*)(&_scratchBuf[numGrps]);
-
- #ifdef RETURN_ON_INVALID
- // Incase of incx<1, index will be zero
- if( get_global_id(0) == 0 ) {
- scratchBufVal[0] = (%PTYPE)0.0;
- scratchBufIndex[0] = 0;
- }
- return;
- #endif
-
- %PTYPE maxVal = MIN, val = MIN;
- uint index = 0, maxIndex = 0;
- %TYPE%V vReg1;
- %PTYPE%V pReg1;
-
- int gOffset;
- for( gOffset=(get_global_id(0) * %V); (gOffset + %V - 1)<N; gOffset+=( get_global_size(0) * %V ) )
- {
- #ifdef INCX_NONUNITY
- %VLOADWITHINCX( vReg1, (X + (gOffset*incx)), incx);
- #else
- vReg1 = %VLOAD( 0, (X + (gOffset * incx)) );
- #endif
-
- pReg1 = %VABS(vReg1);
-
- %REDUCE_MAX(pReg1,val,index,1); // Find max within a vector
-
- if(val > maxVal)
- {
- maxVal = val;
- maxIndex = (gOffset + index);
- }
- }
-
- // Loop for the last thread to handle the tail part of the vector
- // Using the same gOffset used above
- for( ; gOffset<N; gOffset++ )
- {
- %TYPE sReg1;
- sReg1 = X[gOffset * incx];
- if(%VABS(sReg1) > maxVal)
- {
- maxVal = %VABS(sReg1);
- maxIndex = gOffset;
- }
- }
-
- // Note: this has to be called outside any if-conditions- because REDUCTION uses barrier
-#ifdef REDUCE_MAX_WITH_INDEX_ATOMICS
- %REDUCTION_BY_MAX(maxVal,maxIndex,0);
-#else
- %REDUCTION_BY_MAX(maxVal,maxIndex,1);
-#endif
-
- if(get_local_id(0) == 0)
- {
- scratchBufVal[get_group_id(0)] = maxVal;
- scratchBufIndex[get_group_id(0)] = maxIndex + 1; // because 0 is reserved for error
- }
-}";