diff options
Diffstat (limited to 'external/clBLAS/src/library/blas/generic/solution_seq.c')
-rw-r--r-- | external/clBLAS/src/library/blas/generic/solution_seq.c | 465 |
1 files changed, 0 insertions, 465 deletions
diff --git a/external/clBLAS/src/library/blas/generic/solution_seq.c b/external/clBLAS/src/library/blas/generic/solution_seq.c deleted file mode 100644 index cc52c425..00000000 --- a/external/clBLAS/src/library/blas/generic/solution_seq.c +++ /dev/null @@ -1,465 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> -#include <string.h> -#include <clblas_stddef.h> - -#include "matrix_dims.h" -#include "problem_iter.h" -#include "solution_assert.h" -#include "solution_seq.h" - -bool VISIBILITY_HIDDEN isMatrixInImage(MemoryPattern *pattern, MatrixRole mrole); -void VISIBILITY_HIDDEN releaseStepImgs(SolutionStep *step); - -static cl_int -enqueueKernel( - SolutionStep *step, - const Kernel *kernel, - cl_uint numEventsInWaitList, - const cl_event *eventWaitList, - cl_event *event); - -static void -splitSolutionStep( - SolutionStep *rem, - SolutionStep *cut, - SDimComponent component, - size_t chunk, - bool backward); - -static cl_int -executeImageStep( - SolutionStep *step, - cl_uint numEventsInWaitList, - const cl_event *eventWaitList, - cl_event *event); - -void -freeSolutionSeq(ListHead *seq) -{ - listDoForEachSafe(seq, freeSolutionStep); - listInitHead(seq); -} - -cl_int -executeSolutionSeq(const ListHead *seq) -{ - cl_int err = CL_SUCCESS; - ListNode *i; - SolutionStep *step; - - - /* Enqueue computing kernels */ - for (i = listNodeFirst(seq); (i != seq) && (err == CL_SUCCESS); - i = i->next) { - - step = container_of(i, node, SolutionStep); - if (step->cmdQueue == NULL) { - continue; - } - - if (step->args.scimage[0]) { - err = executeImageStep(step, step->numEventsInWaitList, - step->eventWaitList, step->event); - } - else { - #ifdef DEBUG_2 - printf("enqueueKernel from executreSolutionSeq...\n"); - #endif - - err = enqueueKernel(step, - step->kernels[CLBLAS_COMPUTING_KERNEL], - step->numEventsInWaitList, step->eventWaitList, - step->event); - } - } - - return err; -} - -/* private functions */ - -void VISIBILITY_HIDDEN -freeSolutionStep(ListNode *node) -{ - SolutionStep *step = container_of(node, node, SolutionStep); - int i; - - for (i = 0; i < MAX_CLBLAS_KERNELS_PER_STEP; i++) { - if (step->kernels[i] != NULL) { - putKernel(clblasKernelCache, step->kernels[i]); - } - } - releaseStepImgs(step); - free(step); -} - -static cl_int -enqueueKernel( - SolutionStep *step, - const Kernel *kernel, - cl_uint numEventsInWaitList, - const cl_event *eventWaitList, - cl_event *event) -{ - cl_int err; - KernelDesc kernelDesc; - KernelErrorInfo errInfo; - MemoryPattern *pattern; - const CLBLASKernExtra *kextra = (const CLBLASKernExtra*)kernel->extra; - SubproblemDim subdims[MAX_SUBDIMS]; - - step->args.kernType = kextra->kernType; - pattern = &clblasSolvers[step->funcID].memPatterns[step->patternID]; - kernelDesc.workDim = step->pgran.wgDim; - - memcpy(subdims, step->subdims, sizeof(step->subdims)); - - if(NULL==pattern->sops->calcThreads) - { - SubproblemDim globDim; - const PGranularity *pgran; - - pgran = (pattern->nrLevels == 1) ? NULL : &step->pgran; - kargsToProbDims(&globDim, step->funcID, &step->args, false); - - // fixup dimensions in respect with desired work dispatch order - if ((step->pgran.wgDim == 2) && pattern->sops->innerDecompositionAxis) { - if (pattern->sops->innerDecompositionAxis(&step->args) == - DECOMP_AXIS_X) { - - /* - * these dimensions will not used more anywhere, so we can - * just swap them - */ - swapDimXY(&subdims[0]); - swapDimXY(&subdims[1]); - swapDimXY(&globDim); - } - } - - calcGlobalThreads(kernelDesc.globalThreads, subdims, - pgran, globDim.y, globDim.x); - } - else - { - #ifdef DEBUG_2 - printf("calcThreads is defined\n"); - #endif - - pattern->sops->calcThreads( kernelDesc.globalThreads, - subdims, - &step->pgran, - &step->args, - kextra); - } - - // - // Store the numWGSpawned for this kernel - // This size can be used by sequence-steps down the line - // e.g. Reduction of intermediate results of each work group - // - step->pgran.numWGSpawned[0] = kernelDesc.globalThreads[0] / step->pgran.wgSize[0]; - step->pgran.numWGSpawned[1] = kernelDesc.globalThreads[1] / step->pgran.wgSize[1]; - - kernelDesc.localThreads[0] = step->pgran.wgSize[0]; - kernelDesc.localThreads[1] = step->pgran.wgSize[1]; - kernelDesc.workDim = step->pgran.wgDim; - kernelDesc.waitListSize = numEventsInWaitList; - kernelDesc.eventWaitList = eventWaitList; - kernelDesc.nowait = 1; - kernelDesc.event = event; - kernelDesc.needExecTime = 0; - - memset(kernelDesc.args, 0, sizeof(KernelArg) * MAX_KERNEL_ARGS); - pattern->sops->assignKargs(kernelDesc.args, (const void*)&(step->args), - kextra); - - errInfo.wrongArg = 0; - errInfo.phase = 0; - - /* - * TODO: log launchClKernel errors - */ - dumpKernel(step, kextra->kernType); - - err = clCreateKernelsInProgram(kernel->program, 1, &kernelDesc.kernel, - NULL); - if (err == CL_SUCCESS) { - err = launchClKernel(&kernelDesc, step->cmdQueue, &errInfo); - clReleaseKernel(kernelDesc.kernel); - } - - return err; -} - -bool VISIBILITY_HIDDEN -isMatrixInImage( - MemoryPattern *pattern, - MatrixRole mrole) -{ - const CLBLASMpatExtra *extra = (const CLBLASMpatExtra*)pattern->extra; - bool ret = false; - - if (extra != NULL) { - switch (mrole) { - case MATRIX_A: - ret = (extra->mobjA == CLMEM_IMAGE); - break; - case MATRIX_B: - ret = (extra->mobjB == CLMEM_IMAGE); - break; - default: - break; - } - } - - return ret; -} - -void VISIBILITY_HIDDEN -releaseStepImgs(SolutionStep *step) -{ - int i; - cl_mem *imgs = step->args.scimage; - cl_device_id devID = NULL;; - - for (i = 0; (i < 2) && (imgs[i] != NULL); i++) { - if (devID == NULL) { - getQueueDevice(step->cmdQueue, &devID); - } - putSCImage(devID, imgs[i]); - imgs[i] = NULL; //to avoid double release - } -} - -static cl_int -executeImageStep( - SolutionStep *step, - cl_uint numEventsInWaitList, - const cl_event *eventWaitList, - cl_event *event) -{ - SolutionStep outerStep, innerStep, execStep; - cl_int err = CL_SUCCESS; - int currImg = 0; - size_t imgWidth, imgHeight; - size_t ha, hb; - size_t maxPanels[MATRIX_ROLES_NUMBER], maxBlocks[MATRIX_ROLES_NUMBER]; - size_t off; - SubproblemDim wholeDim; - MatrixRole mrole; - CLBlasKargs *kargs = &step->args; - cl_mem *imgs = kargs->scimage; - MemoryPattern *mempat = &clblasSolvers[step->funcID].memPatterns[step->patternID]; - ProblemIterator innerIter, outerIter; - int oend = 0, iend; - SDimComponent comp[2]; - bool backward; - ListHead doneSteps; - CLBlasKernelType ktype; - - kargsToProbDims(&wholeDim, step->funcID, kargs, false); - memset(maxPanels, 0, sizeof(maxPanels)); - memset(maxBlocks, 0, sizeof(maxPanels)); - - memcpy(&outerStep, step, sizeof(SolutionStep)); - memcpy(&execStep, step, sizeof(SolutionStep)); - listInitHead(&doneSteps); - - /* - * Cover the whole problem with dimension which matrix blocks are - * fitted to images at. - */ - - for (mrole = MATRIX_A; mrole < MATRIX_C; mrole++) { - if (!isMatrixInImage(mempat, mrole)) { - continue; - } - - clGetImageInfo(imgs[currImg], CL_IMAGE_WIDTH, sizeof(imgWidth), - &imgWidth, NULL); - clGetImageInfo(imgs[currImg], CL_IMAGE_HEIGHT, sizeof(imgHeight), - &imgHeight, NULL); - - if (step->funcID == CLBLAS_TRSM) { - maxPanels[mrole] = 0; - maxBlocks[mrole] = 0; - } else { - maxPanels[mrole] = imgHeight / matrBlockHeight(step->subdims, mrole, - clblasLeft); - } - currImg++; - } - - /* - * for GEMM function we can take both the matrices as outer, it depends on - * their sizes and image sizes - */ - if (step->funcID == CLBLAS_GEMM) { - size_t dx, dy; - - // FIXME: check which of them use really an image - - ha = matrBlockHeight(&wholeDim, MATRIX_A, clblasLeft); - hb = matrBlockHeight(&wholeDim, MATRIX_B, clblasLeft); - - dx = maxPanels[MATRIX_B] * matrBlockHeight(step->subdims, MATRIX_B, - clblasLeft); - dy = maxPanels[MATRIX_A] * matrBlockHeight(step->subdims, MATRIX_A, - clblasLeft); - - // hb + (hb*ha)/dx < ha + (ha*hb)/dy - if ((hb / ha) < (1 + hb / dy) / (1 + ha / dx)) { - mrole = MATRIX_B; - } - else { - mrole = MATRIX_A; - } - } - else { - mrole = MATRIX_B; - } - /* - * Let's cover the whole image based step. - * Pattern iterator is used for traversing - */ - initProblemIterator(&outerIter, step->funcID, mrole, kargs, - maxPanels[mrole], maxBlocks[mrole], step->subdims); - if (mrole == MATRIX_B) { - comp[0] = SDIM_X; - comp[1] = SDIM_Y; - mrole = MATRIX_A; - } - else { - comp[0] = SDIM_Y; - comp[1] = SDIM_X; - mrole = MATRIX_B; - } - initProblemIterator(&innerIter, step->funcID, mrole, - kargs, maxPanels[mrole], maxBlocks[mrole], - step->subdims); - backward = isIterBackward(&innerIter); - - /* - * Difference in overflowing checking in the outer and inner loops - * is due to - */ - do { - iteratorReset(&innerIter); - iend = 0; - oend = iterateProblem(&outerIter); - off = iterLastOffset(&outerIter); - - splitSolutionStep(&outerStep, &execStep, comp[0], - off, false); - if (execStep.funcID == CLBLAS_GEMM) { - fixupGemmOffsets(&execStep.args, execStep.extraFlags, 0); - } - - memcpy(&innerStep, &execStep, sizeof(SolutionStep)); - - ktype = (comp[0] == SDIM_Y) ? CLBLAS_PREP_A_KERNEL : - CLBLAS_PREP_B_KERNEL; - - if (execStep.kernels[ktype] != NULL) { - err = enqueueKernel(&execStep, execStep.kernels[ktype], - numEventsInWaitList, eventWaitList, event); - if (err != CL_SUCCESS) { - break; - } - } - - do { - iend = iterateProblem(&innerIter); - off = iterLastOffset(&innerIter); - splitSolutionStep(&innerStep, &execStep, - comp[1], off, backward); - if (execStep.funcID == CLBLAS_GEMM) { - fixupGemmOffsets(&execStep.args, execStep.extraFlags, 0); - } - - assertImageSubstep(step, &execStep, &doneSteps); - - ktype = (comp[1] == SDIM_Y) ? CLBLAS_PREP_A_KERNEL : - CLBLAS_PREP_B_KERNEL; - if (execStep.kernels[ktype] != NULL) { - err = enqueueKernel(&execStep, execStep.kernels[ktype], - numEventsInWaitList, eventWaitList, event); - } - if (err == CL_SUCCESS) { - err = enqueueKernel(&execStep, - execStep.kernels[CLBLAS_COMPUTING_KERNEL], - numEventsInWaitList, eventWaitList, - event); - } - } while (!iend && (err == CL_SUCCESS)); - } while (!oend && (err == CL_SUCCESS)); - - if (err == CL_SUCCESS) { - assertImageStep(step, &doneSteps); - } - releaseImageAssertion(&doneSteps); - - return err; -} - -static void -splitSolutionStep( - SolutionStep *rem, - SolutionStep *cut, - SDimComponent component, - size_t chunk, - bool backward) -{ - SubproblemDim remDim, cutDim; - SubproblemDim remDimOff, cutDimOff; - - kargsToProbDims(&remDimOff, rem->funcID, &rem->args, true); - kargsToProbDims(&remDim, rem->funcID, &rem->args, false); - memcpy(&cutDim, &remDim, sizeof(SubproblemDim)); - memcpy(&cutDimOff, &remDimOff, sizeof(SubproblemDim)); - - memcpy(cut, rem, sizeof(SolutionStep)); - if (component == SDIM_Y) { - if (backward) { - cutDimOff.y += remDim.y - chunk; - } - else { - remDimOff.y += chunk; - } - cutDim.y = chunk; - remDim.y -= chunk; - } - else { - if (backward) { - cutDimOff.x += remDim.x - chunk; - } - else { - remDimOff.x += chunk; - } - cutDim.x = chunk; - remDim.x -= chunk; - } - - probDimsToKargs(&rem->args, rem->funcID, &remDimOff, true); - probDimsToKargs(&rem->args, rem->funcID, &remDim, false); - probDimsToKargs(&cut->args, cut->funcID, &cutDimOff, true); - probDimsToKargs(&cut->args, cut->funcID, &cutDim, false); -} |