summaryrefslogtreecommitdiff
path: root/external/clBLAS/src/library/common/tests/t_dblock_kgen.c
diff options
context:
space:
mode:
Diffstat (limited to 'external/clBLAS/src/library/common/tests/t_dblock_kgen.c')
-rw-r--r--external/clBLAS/src/library/common/tests/t_dblock_kgen.c1389
1 files changed, 0 insertions, 1389 deletions
diff --git a/external/clBLAS/src/library/common/tests/t_dblock_kgen.c b/external/clBLAS/src/library/common/tests/t_dblock_kgen.c
deleted file mode 100644
index 75987f9c..00000000
--- a/external/clBLAS/src/library/common/tests/t_dblock_kgen.c
+++ /dev/null
@@ -1,1389 +0,0 @@
-/* ************************************************************************
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ************************************************************************/
-
-
-/*
- * data block processing function
- * generators test
- *
- * NOTES:
- * 1) The test can run incorrectly on devices with
- * wavefront less than 64.
- * 2) The test with -n or (and) -o option will not work
- * on CPU since unaligned access to vector data are
- * not allowed for it.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <getopt.h>
-#include <unistd.h>
-#include <string.h>
-#include <assert.h>
-
-#include <clkern.h>
-#include <dblock_kgen.h>
-
-#define MAX(a, b) ((b) > (a)) ? (b) : (a)
-#define ARRAY_LENGTH(ar) sizeof((ar)) / sizeof((ar)[0])
-
-#define EXTRACT_COMPLEX_DOUBLE(ptr, type, re, img) \
-do { \
- type *ptr1 = (type*)ptr; \
- \
- re = ptr1->s[0]; \
- img = ptr1->s[1]; \
-} while (0)
-
-#define MUL_COMPLEX(mul1, mul2, type) \
-do { \
- type *mul11 = (type*)mul1; \
- type *mul21 = (type*)mul2; \
- type tmp = *mul11; \
- \
- mul11->s[0] = tmp.s[0] * mul21->s[0] - tmp.s[1] * mul21->s[1]; \
- mul11->s[1] = tmp.s[0] * mul21->s[1] + tmp.s[1] * mul21->s[0]; \
-} while (0) \
-
-enum {
- SOURCE_BUFLEN = 1048576
-};
-
-enum {
- DEBUG_BUFLEN = 1048576
-};
-
-typedef enum TransposeType {
- TRANSPOSE_LOCAL, // transpose at copying to the local memory
- TRANSPOSE_GLOBAL, // transpose at copying to the global memory
- TRANSPOSE_BOTH // transpose at both the directions copying
-} TransposeType;
-
-typedef struct TestDesc {
- cl_uint widthA;
- cl_uint heightA;
- cl_uint widthB;
- cl_uint heightB;
- cl_uint srowA; // start row in matrix A
- cl_uint scolA;
- cl_uint srowB;
- cl_uint scolB;
- SubproblemDim dim;
- PGranularity pgran;
- bool transpose;
- bool generic;
- bool packedImages;
- TransposeType transpType;
- // type size
- DataType type;
-} TestDesc;
-
-typedef struct FuncTable {
- // fill matrix element with random value
- void (*fillRandom)(void *a);
- // fill the matrix element with a special marker
- void (*fillMarker)(void *a);
- // function comparing two elements
- int (*compare)(const void *a, const void *b);
- // multiply an element 'a' on element 'b' and update the element 'a'
- void (*mul)(void *a, const void *b);
-} FuncTable;
-
-typedef int
-(*TestFn)(
- struct KgenContext *ctx,
- void *srcBuf,
- TestDesc *tdesc,
- cl_device_id devID,
- cl_context clCtx,
- cl_command_queue queue);
-
-extern char *optarg;
-
-const float boundMarker = 5.0;
-
-const char *usage =
- "Usage: t_dblock_kgen -f <proc> [-c] [-t type] -d <type> [-n] [-o] [-g];\n"
- "-c -- launch the CL code on CPU\n"
- "-t -- transposed version: if option argument is 'local', transpose at copying\n"
- " to the local memory, if it is 'global', then transpose at copying to the\n"
- " global memory, if 'both' transpose at both the copying\n"
- "-d -- data type: float, double, complex_float, complex_double\n"
- "-n -- matrix width is not float4 aligned\n"
- "-o -- start offset is not zero\n"
- "-g -- generic (slow) version\n"
- "-b -- several rows can be packed to one image row;\n";
-
-const char *rwBlockKernelDecl =
- "__kernel void\n"
- "rwMatrBlockTest(\n"
- " __global %s *matrA,\n"
- " unsigned int lda,\n"
- " __global %s *matrB,\n"
- " unsigned int ldb,\n"
- " unsigned int srowA,\n"
- " unsigned int scolA,\n"
- " unsigned int srowB,\n"
- " unsigned int scolB)\n";
-
-const char *rwBlockKernelImgDecl =
- "__kernel void\n"
- "rwMatrBlockTest(\n"
- " __global %s *matrA,\n"
- " unsigned int lda,\n"
- " __global %s *matrB,\n"
- " unsigned int ldb,\n"
- " unsigned int srowA,\n"
- " unsigned int scolA,\n"
- " unsigned int srowB,\n"
- " unsigned int scolB,\n"
- " __write_only image2d_t image1,\n"
- " __write_only image2d_t image2)\n";
-
-// type specific functions
-
-// for the float type
-static void
-fFillRandom(void *a)
-{
- *(cl_float*)a = random() % 1000;
-}
-
-static void
-fFillMarker(void *a)
-{
- *(cl_float*)a = boundMarker;
-}
-
-static int
-fCompare(const void *a, const void *b)
-{
- cl_float *a1 = (cl_float*)a;
- cl_float *b1 = (cl_float*)b;
-
- return !(*a1 == *b1);
-}
-
-static void
-fmul(void *a, const void *b)
-{
- cl_float *a1 = (cl_float*)a;
- cl_float *b1 = (cl_float*)b;
-
- *a1 *= *b1;
-}
-
-// for the double type
-
-static void
-dFillRandom(void *a)
-{
- *(cl_double*)a = random() % 1000;
-}
-
-static void
-dFillMarker(void *a)
-{
- *(cl_double*)a = boundMarker;
-}
-
-static int
-dCompare(const void *a, const void *b)
-{
- cl_double *a1 = (cl_double*)a;
- cl_double *b1 = (cl_double*)b;
-
- return !(*a1 == *b1);
-}
-
-static void
-dmul(void *a, const void *b)
-{
- cl_double *a1 = (cl_double*)a;
- cl_double *b1 = (cl_double*)b;
-
- *a1 *= *b1;
-}
-
-// for the complex float type
-
-static void
-cFillRandom(void *a)
-{
- cl_float2 *a1 = (cl_float2*)a;
-
- a1->s[0] = random() % 1000;
- a1->s[1] = random() % 1000;
-}
-
-static void
-cFillMarker(void *a)
-{
- cl_float2 *a1 = (cl_float2*)a;
-
- a1->s[0] = boundMarker;
- a1->s[1] = boundMarker;
-}
-
-static int
-cCompare(const void *a, const void *b)
-{
- cl_float2 *a1 = (cl_float2*)a;
- cl_float2 *b1 = (cl_float2*)b;
-
- return !((a1->s[0] == b1->s[0]) && (a1->s[1] == b1->s[1]));
-}
-
-static void
-cmul(void *a, const void *b)
-{
- MUL_COMPLEX(a, b, cl_float2);
-}
-
-// for the complex double type
-
-void
-zFillRandom(void *a)
-{
- cl_double2 *a1 = (cl_double2*)a;
-
- a1->s[0] = random() % 1000;
- a1->s[1] = random() % 1000;
-}
-
-void
-zFillMarker(void *a)
-{
- cl_double2 *a1 = (cl_double2*)a;
-
- a1->s[0] = boundMarker;
- a1->s[1] = boundMarker;
-}
-
-int
-zCompare(const void *a, const void *b)
-{
- cl_double2 *a1 = (cl_double2*)a;
- cl_double2 *b1 = (cl_double2*)b;
-
- return !((a1->s[0] == b1->s[0]) && (a1->s[1] == b1->s[1]));
-}
-
-static void
-zmul(void *a, const void *b)
-{
- MUL_COMPLEX(a, b, cl_double2);
-}
-
-static FuncTable funcTable[TYPE_COMPLEX_DOUBLE + 1] = {
- {fFillRandom, fFillMarker, fCompare, fmul},
- {dFillRandom, dFillMarker, dCompare, dmul},
- {cFillRandom, cFillMarker, cCompare, cmul},
- {zFillRandom, zFillMarker, zCompare, zmul}
-};
-
-/*
- * fill matrix with random elements or the special random
- * element if 'random' is set to true
- */
-static void
-fillMatrix(
- cl_float *matr,
- size_t height,
- size_t width,
- size_t ld,
- DataType dtype,
- bool marker)
-{
- unsigned int nfloats;
- size_t i, j;
- void *p;
- void (*fill)(void*);
-
- fill = (marker) ? funcTable[dtype].fillMarker : funcTable[dtype].fillRandom;
-
- nfloats = dtypeSize(dtype) / sizeof(cl_float);
- for (i = 0; i < height; i++) {
- for (j = 0; j < width; j++) {
- p = (cl_float*)matr + (i * ld + j) * nfloats;
- fill(p);
- }
- }
-}
-
-static int
-compareMatrices(void *matrA, void *matrB, const TestDesc *tdesc)
-{
- size_t i, j;
- unsigned int nfloats;
- void *p1, *p2;
- int ret = 0;
- double a1, b1, a2, b2;
-
- nfloats = dtypeSize(tdesc->type) / sizeof(cl_float);
- for (i = 0; (i < tdesc->dim.y) && !ret; i++) {
- for (j = 0; j < tdesc->dim.x; j++) {
- p1 = (cl_float*)matrA + ((tdesc->srowA + i) * tdesc->widthA +
- tdesc->scolA + j) * nfloats;
- if (tdesc->transpose && (tdesc->transpType != TRANSPOSE_BOTH)) {
- p2 = (cl_float*)matrB + ((tdesc->srowB + j) * tdesc->widthB +
- tdesc->scolB + i) * nfloats;
- }
- else {
- p2 = (cl_float*)matrB + ((tdesc->srowB + i) * tdesc->widthB +
- tdesc->scolB + j) * nfloats;
- }
- ret = funcTable[tdesc->type].compare(p1, p2);
- if (ret) {
- printf("The first error occurred at row %lu, column %lu "
- "of the block: ", i + tdesc->srowA, j + tdesc->scolA);
- if ((tdesc->type == TYPE_FLOAT) ||
- (tdesc->type == TYPE_DOUBLE)) {
-
- if (tdesc->type == TYPE_FLOAT) {
- a1 = *(cl_float*)p1;
- b1 = *(cl_float*)p2;
- }
- else {
- a1 = *(cl_double*)p1;
- b1 = *(cl_double*)p2;
- }
- printf("value is %.5E but must be %.5E\n", b1, a1);
- }
- else {
- if (tdesc->type == TYPE_COMPLEX_FLOAT) {
- EXTRACT_COMPLEX_DOUBLE(p1, cl_float2, a1, a2);
- EXTRACT_COMPLEX_DOUBLE(p2, cl_float2, b1, b2);
- }
- else {
- EXTRACT_COMPLEX_DOUBLE(p1, cl_double2, a1, a2);
- EXTRACT_COMPLEX_DOUBLE(p2, cl_double2, b1, b2);
- }
- printf("value is (%.5E, %.5E) but must be (%.5E, %.5E)\n",
- b1, b2, a1, a2);
- }
- break;
- }
- }
- }
-
- return ret;
-}
-
-static int
-checkBound(
- void *matr,
- DataType dtype,
- size_t srow,
- size_t scol,
- size_t nrRows,
- size_t nrCols,
- size_t rwidth)
-{
- size_t i, j;
- unsigned int nfloats;
- void *p;
- int ret = 0;
- double a1, a2;
- unsigned char marker[sizeof(cl_double2)];
-
- nfloats = dtypeSize(dtype) / sizeof(cl_float);
- funcTable[dtype].fillMarker(marker);
-
- for (i = 0; (i < nrRows) && !ret; i++) {
- for (j = 0; j < nrCols; j++) {
- p = (cl_float*)matr + ((srow + i) * rwidth +
- scol + j) * nfloats;
- ret = funcTable[dtype].compare(p, marker);
- if (ret) {
- printf("The bound marker first damaged at row %lu, column %lu "
- "of the block: ", i + srow, j + scol);
- if ((dtype == TYPE_FLOAT) ||
- (dtype == TYPE_DOUBLE)) {
-
- if (dtype == TYPE_FLOAT) {
- a1 = *(cl_float*)p;
- }
- else {
- a1 = *(cl_double*)p;
- }
- printf("actual value is %.5E\n", a1);
- }
- else {
- if (dtype == TYPE_COMPLEX_FLOAT) {
- EXTRACT_COMPLEX_DOUBLE(p, cl_float2, a1, a2);
- }
- else {
- EXTRACT_COMPLEX_DOUBLE(p, cl_double2, a1, a2);
- }
- printf("actual value is (%.5E, %.5E)\n",
- a1, a2);
- }
- break;
- }
- }
- }
-
- return ret;
-}
-
-// check the data was not written outside bound
-static int
-checkMatrixBound(void *matrB, const TestDesc *tdesc)
-{
- int ret = 0;
- size_t dimr, dimc;
-
- if (tdesc->transpose && (tdesc->transpType != TRANSPOSE_BOTH)) {
- dimr = tdesc->dim.y;
- dimc = tdesc->dim.x;
- }
- else {
- dimr = tdesc->dim.x;
- dimc = tdesc->dim.y;
- }
-
- if (tdesc->srowB) {
- ret = checkBound(matrB, tdesc->type, 0, 0, tdesc->srowB,
- tdesc->widthB, tdesc->widthB);
- }
-
- if (tdesc->scolB && !ret) {
- ret = checkBound(matrB, tdesc->type, tdesc->srowB, 0, dimc,
- tdesc->scolB, tdesc->widthB);
- }
-
- if ((tdesc->scolB + dimr < tdesc->widthB) && !ret) {
- ret = checkBound(matrB, tdesc->type, tdesc->srowB,
- tdesc->scolB + dimr, dimc,
- tdesc->widthB - tdesc->scolB - dimr,
- tdesc->widthB);
- }
-
- if ((tdesc->srowB + dimc < tdesc->heightB) && !ret) {
- ret = checkBound(matrB, tdesc->type,
- tdesc->srowB + dimc, 0,
- tdesc->heightB - tdesc->srowB - dimc,
- tdesc->widthB, tdesc->widthB);
- }
-
- return ret;
-}
-
-// Check the data was not written outside bound. Several matrix rows can be
-// packed into single image line.
-static int
-checkImageBound(void *imgB, const TestDesc *tdesc)
-{
- int ret = 0;
- // Size of packed line of rows, in tdesc->type's
- size_t pLine;
- size_t rowsInLine;
-
- rowsInLine = (tdesc->widthB / tdesc->dim.x);
- pLine = rowsInLine * tdesc->dim.x;
-
- //right
- ret = checkBound(imgB, tdesc->type, 0, pLine, tdesc->heightB,
- tdesc->widthB - pLine, tdesc->widthB);
-
- //last image line tail
- if (!ret && ((tdesc->dim.x * tdesc->dim.y) % pLine != 0)) {
- ret = checkBound(imgB, tdesc->type,
- (tdesc->dim.x * tdesc->dim.y) / pLine,
- (tdesc->dim.x * tdesc->dim.y) % pLine, 1,
- (pLine - (tdesc->dim.x * tdesc->dim.y) % pLine)
- % pLine, tdesc->widthB);
- }
-
- //bottom
- if (!ret) {
- int startRow = tdesc->dim.x * tdesc->dim.y / pLine;
- if (tdesc->dim.x * tdesc->dim.y % pLine != 0) {
- startRow ++;
- }
- ret = checkBound(imgB, tdesc->type, startRow, 0,
- tdesc->heightB - startRow,
- tdesc->widthB, tdesc->widthB);
- }
- return ret;
-}
-
-// Compare image with matrix. Several matrix rows can be packed into single
-// image line.
-static int
-compareImage(void *matrA, void *imgB, const TestDesc *tdesc)
-{
- size_t i, j;
- unsigned int nfloats;
- void *p1, *p2;
- int ret = 0;
- double a1, b1, a2, b2;
-
- nfloats = dtypeSize(tdesc->type) / sizeof(cl_float);
-
- for (i = 0; (i < tdesc->dim.y) && !ret; i++) {
- for (j = 0; j < tdesc->dim.x; j++) {
- // Size of packed line of rows, in tdesc->type's
- int pLine;
- // absolute index of element in image
- int index;
- p1 = (cl_float*)matrA + ((tdesc->srowA + i) * tdesc->widthA +
- tdesc->scolA + j) * nfloats;
- pLine = (tdesc->widthB / tdesc->dim.x) * tdesc->dim.x;
- index = i * tdesc->dim.x + j;
-
- p2 = (cl_float*)imgB + ((index / pLine) * tdesc->widthB +
- index % pLine) * nfloats;
- ret = funcTable[tdesc->type].compare(p1, p2);
-
- if (ret) {
- printf("The first error occurred at row %lu, column %lu "
- "of the block: ", i + tdesc->srowA, j + tdesc->scolA);
- if ((tdesc->type == TYPE_FLOAT) ||
- (tdesc->type == TYPE_DOUBLE)) {
-
- if (tdesc->type == TYPE_FLOAT) {
- a1 = *(cl_float*)p1;
- b1 = *(cl_float*)p2;
- }
- else {
- a1 = *(cl_double*)p1;
- b1 = *(cl_double*)p2;
- }
- printf("value is %.5E but must be %.5E\n", b1, a1);
- }
- else {
- if (tdesc->type == TYPE_COMPLEX_FLOAT) {
- EXTRACT_COMPLEX_DOUBLE(p1, cl_float2, a1, a2);
- EXTRACT_COMPLEX_DOUBLE(p2, cl_float2, b1, b2);
- }
- else {
- EXTRACT_COMPLEX_DOUBLE(p1, cl_double2, a1, a2);
- EXTRACT_COMPLEX_DOUBLE(p2, cl_double2, b1, b2);
- }
- printf("value is (%.5E, %.5E) but must be (%.5E, %.5E)\n",
- b1, b2, a1, a2);
- }
- break;
- }
- }
- }
-
- return ret;
-}
-
-static cl_uint
-get_cl_device(cl_device_id *id, int type)
-{
- cl_uint status;
- cl_uint numEnt;
- cl_platform_id platform;
-
- status = clGetPlatformIDs(0, NULL, &numEnt);
- status += clGetPlatformIDs(1, &platform, NULL);
- status += clGetDeviceIDs(platform, type, 1, id, &numEnt);
-
- return status;
-}
-
-// create memory buffer objects needed for a test case
-static cl_int
-createBufferObjs(
- void *matrA,
- void *matrB,
- cl_mem *aobj,
- cl_mem *bobj,
- cl_context ctx,
- size_t asize,
- size_t bsize)
-{
- cl_int status;
-
- if (aobj != NULL) {
- *aobj = clCreateBuffer(ctx, (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR),
- asize, matrA, &status);
- if (*aobj == NULL) {
- printf("Memory object creation for A matrix failed, status = %d, "
- "asize = %lu\n", status, asize);
- return status;
- }
- }
-
- *bobj = clCreateBuffer(ctx, (CL_MEM_READ_WRITE | CL_MEM_USE_HOST_PTR),
- bsize, matrB, &status);
- if (*bobj == NULL) {
- printf("Memory object creation for B matrix failed, status = %d, "
- "bsize = %lu\n", status, bsize);
- if (aobj) {
- clReleaseMemObject(*aobj);
- *aobj = NULL;
- }
- }
-
- return status;
-}
-
-// create image memory objects needed for a test case
-static cl_int
-createImageObjs(
- void *img1,
- void *img2,
- cl_mem *img1obj,
- cl_mem *img2obj,
- cl_context ctx,
- size_t pixels_width,
- size_t pixels_height)
-{
- cl_mem *objs[2] = {img1obj, img2obj};
- void *bufs[2] = {img1, img2};
- const char *names[2]={"first", "second"};
- const cl_image_format format = { CL_RGBA, CL_FLOAT };
- cl_int status;
- int i;
-
- for (i=0; i<2; i++) {
- if (objs[i] == NULL) {
- continue;
- }
- *objs[i] = clCreateImage2D(ctx,
- (CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR), &format,
- pixels_width, pixels_height, 0, bufs[i], &status);
- if (status != CL_SUCCESS) {
- printf("Memory object creation for %s image failed, status = %d, "
- "width = %lupx, height = %lupx\n", names[i], status,
- pixels_width, pixels_height);
- if (i==1) { //first image was created successfully, release it
- if(objs[0] != NULL) {
- clReleaseMemObject(*objs[0]);
- }
- }
- break;
- }
- }
-
- return status;
-}
-
-// create a kernel needed for a test case
-static cl_kernel
-createKernel(
- const char *kernName,
- char *srcBuf,
- cl_context ctx,
- cl_device_id devID,
- cl_program *program)
-{
- char log[65536];
- cl_int status;
- cl_kernel krn = NULL;
-
- *program = buildClProgram(srcBuf, NULL, ctx, devID, log,
- sizeof(log), &status);
- if (*program == NULL) {
- printf("Program building failed, status = %d, log info:\n%s\n",
- status, log);
- }
- else {
- krn = clCreateKernel(*program, kernName, &status);
- if (krn == NULL) {
- printf("Kernel creation failed, status = %d\n", status);
- clReleaseProgram(*program);
- *program = NULL;
- printf("failed program code: \"%s\"\n", srcBuf);
- fflush(stdout);
- }
- }
-
- return krn;
-}
-
-static void
-releaseBufferObjs(
- cl_mem aobj,
- cl_mem bobj)
-{
- if (aobj != NULL) {
- clReleaseMemObject(aobj);
- }
- clReleaseMemObject(bobj);
-}
-
-static int
-testMatrBlockRW(
- struct KgenContext *ctx,
- void *srcBuf,
- TestDesc *tdesc,
- cl_device_id devID,
- cl_context clCtx,
- cl_command_queue queue)
-{
- cl_float *matrA;
- cl_float *matrB;
- cl_float *img1;
- cl_float *img2;
- TestDesc tdescImage;
- cl_mem aobj = NULL, bobj = NULL;
- cl_mem img1obj = NULL, img2obj = NULL;
- unsigned int tsize;
- char tmp[1024];
- KernelDesc kdesc;
- const char *s, *s1;
- int ret;
- // read, write, global to image, local to image functions names
- char rname[128], wname[128], giname[128], liname[128];
- size_t size, asize, bsize;
- // width and height in pixels, size in bytes
- size_t imageWidth, imageHeight, imgSize;
- cl_program program = NULL;
- cl_device_type devType;
- KernelArg *karg;
- KernelErrorInfo errInfo;
- cl_event event;
- cl_int status;
- SubproblemDim dim, *pdim;
- // local memory block leading dimension for generic read and write back
- size_t ld;
- bool testImages;
- DBlockCopyFlags flags = 0;
- unsigned int nfloats;
- bool b;
-
- memset(&kdesc, 0, sizeof(kdesc));
- rname[0] = wname[0] = giname[0] = liname[0] = '\0';
-
- clGetDeviceInfo(devID, CL_DEVICE_TYPE, sizeof(devType), &devType, NULL);
-
- tsize = dtypeSize(tdesc->type);
- nfloats = tsize / sizeof(cl_float);
- if (((tdesc->dim.x * tsize) % sizeof(cl_float4) == 0) &&
- (devType == CL_DEVICE_TYPE_GPU) && !tdesc->transpose) {
-
- testImages = true;
- }
- else {
- printf("Size of row is not float4 aligned, or the target device is CPU,"
- "or copying should be transposed, images are not used.\n");
- testImages = false;
- }
- resetKgenContext(ctx);
- asize = tdesc->heightA * tdesc->widthA * tsize;
- bsize = tdesc->heightB * tdesc->widthB * tsize;
-
- // Size of images in pixels. Each pixel is float4.
- if (tdesc->packedImages) {
- imageWidth = fl4RowWidth(tdesc->dim.x * 3.5, tsize);
- imageHeight = tdesc->dim.y;
- }
- else {
- imageWidth = fl4RowWidth(tdesc->dim.x, tsize);
- imageHeight = tdesc->dim.y;
- }
-
- imgSize = imageHeight * imageWidth * sizeof(cl_float4);
-
- matrA = malloc(asize);
- matrB = malloc(bsize);
- img1 = malloc(imgSize);
- img2 = malloc(imgSize);
- if (!matrA || !matrB || !img1 || !img2) {
- printf("Memory allocation failed\n");
- return -1;
- }
- fillMatrix(matrA, tdesc->heightA, tdesc->widthA, tdesc->widthA,
- tdesc->type, false);
- fillMatrix(matrB, tdesc->heightB, tdesc->widthB, tdesc->widthB,
- tdesc->type, true);
- fillMatrix(img1, imageHeight, imageWidth * FLOAT4_VECLEN / nfloats,
- imageWidth * FLOAT4_VECLEN / nfloats, tdesc->type, true);
- fillMatrix(img2, imageHeight, imageWidth * FLOAT4_VECLEN / nfloats,
- imageWidth * FLOAT4_VECLEN / nfloats, tdesc->type, true);
-
- if (createBufferObjs(matrA, matrB, &aobj, &bobj, clCtx, asize, bsize)
- != CL_SUCCESS) {
- return -1;
- }
- if (testImages) {
- // function gets width in float4's
- if (createImageObjs(img1, img2, &img1obj, &img2obj, clCtx,
- imageWidth, imageHeight)
- != CL_SUCCESS) {
- releaseBufferObjs(aobj, bobj);
- return -1;
- }
- }
-
- b = isDoubleBasedType(tdesc->type);
- kgenDeclareUptrs(ctx, b);
- kgenAddBlankLine(ctx);
-
- s = dtypeBuiltinType(tdesc->type);
- s1 = dtypeUPtrField(tdesc->type);
-
- pdim = (tdesc->generic) ? NULL : &dim;
-
- // generate the functions
- dim = tdesc->dim;
- if (tdesc->transpose && (tdesc->transpType != TRANSPOSE_GLOBAL)) {
- flags = DBLOCK_COPY_TRANSPOSE;
- }
-
- if ((devType == CL_DEVICE_TYPE_CPU) &&
- (tdesc->widthA % sizeof(cl_float4) || tdesc->srowA)) {
- flags |= DBLOCK_COPY_NOT_VECTORIZE;
- }
-
- copyDataBlockGen(ctx, pdim, &tdesc->pgran, tdesc->type,
- DBLOCK_GLOBAL_TO_LOCAL, flags);
- kgenGetLastFuncName(rname, sizeof(rname), ctx);
- kgenAddBlankLine(ctx);
-
- if (tdesc->transpose && (tdesc->transpType != TRANSPOSE_GLOBAL)) {
- ld = fl4RowWidth(tdesc->dim.y, tsize) * FLOAT4_VECLEN / nfloats;
- }
- else {
- ld = fl4RowWidth(tdesc->dim.x, tsize) * FLOAT4_VECLEN / nfloats;
- }
-
- if (tdesc->transpose) {
- flags = (tdesc->transpType == TRANSPOSE_LOCAL) ? 0 :
- DBLOCK_COPY_TRANSPOSE;
- if (tdesc->transpType != TRANSPOSE_GLOBAL) {
- dim.x = tdesc->dim.y;
- dim.y = tdesc->dim.x;
- }
- }
- else {
- flags = 0;
- }
-
- if ((devType == CL_DEVICE_TYPE_CPU) &&
- (tdesc->widthA % sizeof(cl_float4) || tdesc->srowA)) {
- flags |= DBLOCK_COPY_NOT_VECTORIZE;
- }
-
- copyDataBlockGen(ctx, pdim, &tdesc->pgran, tdesc->type,
- DBLOCK_LOCAL_TO_GLOBAL, flags);
- kgenGetLastFuncName(wname, sizeof(wname), ctx);
- kgenAddBlankLine(ctx);
-
- if (testImages) {
- if (tdesc->packedImages) {
- flags |= DBLOCK_COPY_PACKED_IMAGE;
- }
- copyDataBlockGen(ctx, pdim, &tdesc->pgran, tdesc->type,
- DBLOCK_GLOBAL_TO_IMAGE, flags);
- kgenGetLastFuncName(giname, sizeof(giname), ctx);
- kgenAddBlankLine(ctx);
-
- copyDataBlockGen(ctx, pdim, &tdesc->pgran, tdesc->type,
- DBLOCK_LOCAL_TO_IMAGE, flags);
- kgenGetLastFuncName(liname, sizeof(liname), ctx);
- kgenAddBlankLine(ctx);
- }
-
- if (testImages) {
- sprintf(tmp, rwBlockKernelImgDecl, s, s);
- }
- else {
- sprintf(tmp, rwBlockKernelDecl, s, s);
- }
- kgenDeclareFunction(ctx, tmp);
- kgenBeginFuncBody(ctx);
-
- size = fl4RowWidth(tdesc->dim.x, tsize) * tdesc->dim.y * FLOAT4_VECLEN;
- if (size < fl4RowWidth(tdesc->dim.y, tsize) * tdesc->dim.x * FLOAT4_VECLEN) {
- size = fl4RowWidth(tdesc->dim.y, tsize) * tdesc->dim.x * FLOAT4_VECLEN;
- }
-
- // declare and initialize local variables
- sprintf(tmp, "__local float tmpBuf[%lu];\n"
- "LPtr tmp;\n"
- "GPtr src, dst;\n"
- "\n"
- "tmp.f = tmpBuf;\n"
- "src.%s = matrA;\n"
- "dst.%s = matrB;\n\n",
- size, s1, s1);
- kgenAddStmt(ctx, tmp);
- // read block call
- if (tdesc->generic) {
- sprintf(tmp, "%s(tmp, src, srowA, scolA, %lu, %lu, %lu, lda);\n",
- rname, tdesc->dim.y, tdesc->dim.x, ld);
- }
- else {
- sprintf(tmp, "%s(tmp, src, srowA, scolA, lda);\n", rname);
- }
- kgenAddStmt(ctx, tmp);
-
- kgenAddStmt(ctx, "barrier(CLK_LOCAL_MEM_FENCE);\n");
-
- // write block call
- if (tdesc->generic) {
- sprintf(tmp, "%s(dst, tmp, srowB, scolB, %lu, %lu, ldb, %lu);\n",
- wname, dim.y, dim.x, ld);
- }
- else {
- sprintf(tmp, "%s(dst, tmp, srowB, scolB, ldb);\n", wname);
- }
- kgenAddStmt(ctx, tmp);
-
- if (testImages) {
- // global memory to image write function call
- if (tdesc->generic) {
- sprintf(tmp, "%s(image1, 0, 0, src, srowA, scolA, %lu, %lu, lda);\n",
- giname, dim.y, dim.x);
- }
- else {
- sprintf(tmp, "%s(image1, 0, 0, src, srowA, scolA, lda);\n", giname);
- }
- kgenAddStmt(ctx, tmp);
-
- // local memory to image write function call
- if (tdesc->generic) {
- sprintf(tmp, "%s(image2, 0, 0, tmp, %lu, %lu, %lu);\n",
- liname, dim.y, dim.x, ld);
- }
- else {
- sprintf(tmp, "%s(image2, 0, 0, tmp);\n", liname);
- }
- kgenAddStmt(ctx, tmp);
- }
-
- ret = kgenEndFuncBody(ctx);
-
- // now compile and launch the kernel
- if (!ret) {
- kdesc.kernel = createKernel("rwMatrBlockTest", srcBuf, clCtx,
- devID, &program);
- if (kdesc.kernel == NULL) {
- ret = -1;
- }
- }
-
- karg = kdesc.args;
- initMemobjKarg(&karg[0], aobj, matrA, asize, MEMOBJ_WRITE);
- INIT_KARG(&karg[1], tdesc->widthA);
- initMemobjKarg(&karg[2], bobj, matrB, bsize, MEMOBJ_READ);
- INIT_KARG(&karg[3], tdesc->widthB);
- INIT_KARG(&karg[4], tdesc->srowA);
- INIT_KARG(&karg[5], tdesc->scolA);
- INIT_KARG(&karg[6], tdesc->srowB);
- INIT_KARG(&karg[7], tdesc->scolB);
- if (testImages) {
- INIT_KARG(&karg[8], img1obj);
- INIT_KARG(&karg[9], img2obj);
- }
-
- kdesc.globalThreads[0] = tdesc->pgran.wgSize[0];
- kdesc.localThreads[0] = tdesc->pgran.wgSize[0];
- kdesc.workDim = 1;
- kdesc.needExecTime = 1;
- kdesc.event = &event;
-
- if (!ret) {
- status = launchClKernel(&kdesc, queue, &errInfo);
- if (status != CL_SUCCESS) {
- printf("Kernel launching failed: status = %d, phase = %d, "
- "wrong arg = %d\n", status, errInfo.phase, errInfo.wrongArg);
- ret = -1;
- }
- }
- if (testImages) {
- if (!ret) {
- ret = clEnqueueReadImage(queue, img1obj, CL_TRUE,
- (size_t[3]){0, 0, 0},
- (size_t[3]){imageWidth, imageHeight, 1},
- 0, 0, img1, 0, NULL, NULL);
- if (ret) {
- printf ("image read failed, code %d\n", ret);
- }
- }
- if (!ret) {
- ret = clEnqueueReadImage(queue, img2obj, CL_TRUE,
- (size_t[3]){0, 0, 0},
- (size_t[3]){imageWidth, imageHeight, 1},
- 0, 0, img2, 0, NULL, NULL);
- if (ret) {
- printf ("image read failed, code %d\n", ret);
- }
- }
- }
-
- memcpy(&tdescImage, tdesc, sizeof(tdescImage));
- // width in tdesc->types
- tdescImage.widthB = (imageWidth * FLOAT4_VECLEN) / nfloats;
- tdescImage.heightB = imageHeight;
- tdescImage.scolB = 0;
- tdescImage.srowB = 0;
- // check the result
- if (!ret) {
- ret = compareMatrices(matrA, matrB, tdesc);
- // check the data wasn't written outside the square
- if (!ret) {
- ret = checkMatrixBound(matrB, tdesc);
- }
- }
- if (testImages) {
- if (tdesc->packedImages) {
- // compare matrix with packed image data
- if (!ret) {
- ret = compareImage(matrA, img1, &tdescImage);
- if (!ret) {
- ret = checkImageBound(img1, &tdescImage);
- }
- }
- if (!ret) {
- ret = compareImage(matrA, img2, &tdescImage);
- if (!ret) {
- ret = checkImageBound(img2, &tdescImage);
- }
- }
- }
- else {
- if (!ret) {
- ret = compareMatrices(matrA, img1, &tdescImage);
- if (!ret) {
- ret = checkMatrixBound(img1, &tdescImage);
- }
- }
- if (!ret) {
- ret = compareMatrices(matrA, img2, &tdescImage);
- if (!ret) {
- ret = checkMatrixBound(img2, &tdescImage);
- }
- }
- }
- }
- releaseBufferObjs(aobj, bobj);
- if (testImages) {
- releaseBufferObjs(img1obj, img2obj);
- }
-
- if (kdesc.kernel) {
- clReleaseKernel(kdesc.kernel);
- clReleaseProgram(program);
- }
-
- free(matrA);
- free(matrB);
- free(img1);
- free(img2);
-
- return ret;
-}
-
-static int
-parseDataType(DataType *dtype)
-{
- int ret = 0;
-
- if (!strcmp(optarg, "float")) {
- *dtype = TYPE_FLOAT;
- }
- else if (!strcmp(optarg, "double")) {
- *dtype = TYPE_DOUBLE;
- }
- else if (!strcmp(optarg, "complex_float")) {
- *dtype = TYPE_COMPLEX_FLOAT;
- }
- else if (!strcmp(optarg, "complex_double")) {
- *dtype = TYPE_COMPLEX_DOUBLE;
- }
- else {
- printf("An unsupported data typs is specified: %s\n", optarg);
- ret = -1;
- }
-
- return ret;
-}
-
-static int
-parseTransposeType(TransposeType *ttype)
-{
- int ret = 0;
-
- if (!strcmp(optarg, "local")) {
- *ttype = TRANSPOSE_LOCAL;
- }
- else if (!strcmp(optarg, "global")) {
- *ttype = TRANSPOSE_GLOBAL;
- }
- else if (!strcmp(optarg, "both")) {
- *ttype = TRANSPOSE_BOTH;
- }
- else {
- printf("An unsupported transpose type is specified: %s\n",
- optarg);
- ret = -1;
- }
-
- return ret;
-}
-
-static int
-runTestCases(
- struct KgenContext *ctx,
- char *srcBuf,
- TestDesc *tdesc,
- cl_device_id devID,
- cl_context clCtx,
- cl_command_queue queue,
- TestFn fn)
-{
- int i, i1;
- int ret = 0;
- unsigned int nfloats;
-
- i1 = (tdesc->type == TYPE_COMPLEX_DOUBLE) ? 1 : 2;
- nfloats = dtypeSize(tdesc->type) / sizeof(cl_float);
- tdesc->pgran.wgDim = 1;
- tdesc->pgran.wgSize[1] = 1;
- tdesc->pgran.wfSize = 64;
-
- for (i = 0; i < i1; i++) {
- if (!i) {
- printf("Tests with float4 aligned rows:\n\n");
- tdesc->dim.x = 64;
- }
- else {
- printf("Tests with not float4 aligned rows:\n\n");
- tdesc->dim.x = 65;
- }
-
- printf("Number of block rows is equal to the work group size\n");
- tdesc->dim.y = 64 / nfloats;
- tdesc->pgran.wgSize[0] = 64 / nfloats;
- ret = fn(ctx, srcBuf, tdesc, devID, clCtx, queue);
- if (ret) {
- printf("FAIL\n\n");
- break;
- }
- printf("PASS\n\n");
-
- printf("Number of block rows is greater than the work group size, "
- "the rows number is divided on the work group size\n");
- tdesc->pgran.wgSize[0] = 32 / nfloats;
- ret = fn(ctx, srcBuf, tdesc, devID, clCtx, queue);
- if (ret) {
- printf("FAIL\n\n");
- break;
- }
- tdesc->pgran.wgSize[0] = 64 / nfloats;
- printf("PASS\n\n");
-
- printf("Number of block rows is greater than the work group size, "
- "the rows number is not divided on the work group size\n");
- tdesc->dim.y = 99 / nfloats;
- ret = fn(ctx, srcBuf, tdesc, devID, clCtx, queue);
- if (ret) {
- printf("FAIL\n\n");
- break;
- }
- printf("PASS\n\n");
-
- printf("Number of block rows is less than the work group size\n"
- "The work group size is divided on the number of rows\n");
- tdesc->dim.y = 32 / nfloats;
- ret = fn(ctx, srcBuf, tdesc, devID, clCtx, queue);
- if (ret) {
- printf("FAIL\n\n");
- break;
- }
- printf("PASS\n\n");
-
- printf("Number of block rows is less than the work group size\n"
- "The work group size is not divided on the number of rows\n");
- tdesc->dim.y = (17 + nfloats - 1) / nfloats;
- ret = fn(ctx, srcBuf, tdesc, devID, clCtx, queue);
- if (ret) {
- printf("FAIL\n\n");
- break;
- }
- printf("PASS\n\n");
-
- printf("Number of block rows is less than the work group size\n"
- "The work group size is not divided on the number of rows\n"
- "Each row consists of 1 elements\n");
- tdesc->dim.x = 1;
- ret = fn(ctx, srcBuf, tdesc, devID, clCtx, queue);
- if (ret) {
- printf("FAIL\n\n");
- break;
- }
- printf("PASS\n\n");
- }
-
- return ret;
-}
-
-int
-main(int argc, char *argv[])
-{
- struct KgenContext *ctx;
- char *buf;
- TestDesc tdesc;
- cl_context clCtx = NULL;
- cl_command_queue queue = NULL;
- cl_device_id devID;
- int devType = CL_DEVICE_TYPE_GPU;
- cl_int status;
- int err = 0;
- int opt;
- TestFn func;
- // test with non zero offset
- bool off = false;
- // test with non float4 aligned width
- bool v4na = false;
- char dataType[64];
- const char *s2 = "", *s3 = "", *s4 = "", *s5 = "", *s7 = "";
- const char *s6 = "GPU";
-
- memset(&tdesc, 0, sizeof(tdesc));
- tdesc.transpose = false;
- tdesc.type = -1;
-
- // parse command line arguments
- while (!err) {
- opt = getopt(argc, argv, "ct:d:nogb");
- if (opt == -1) {
- break;
- }
- switch (opt) {
- case 'c':
- devType = CL_DEVICE_TYPE_CPU;
- s5 = "CPU";
- break;
- case 't':
- tdesc.transpose = true;
- err = parseTransposeType(&tdesc.transpType);
- break;
- case 'd':
- err = parseDataType(&tdesc.type);
- if (!err) {
- sprintf(dataType, "%s", optarg);
- }
- break;
- case 'g':
- tdesc.generic = true;
- s5 = ", generic (slow) version";
- break;
- case 'n':
- v4na = true;
- break;
- case 'o':
- off = true;
- break;
- case 'b':
- tdesc.packedImages = true;
- s7 = ", several rows can be packed to one image row";
- break;
- default:
- printf("Wrong option %c\n", opt);
- err = 1;
- break;
- }
- }
-
- if ((signed)tdesc.type == -1) {
- printf("Data type is not specified\n");
- err = -1;
- }
-
- if (err) {
- printf("%s", usage);
- return 1;
- }
-
- status = get_cl_device(&devID, devType);
- if (status) {
- printf("Device opening failed, status = %d\n", status);
- return 1;
- }
-
- clCtx = clCreateContext((const cl_context_properties*)NULL, 1, &devID,
- NULL, NULL, &status);
- if (clCtx == NULL) {
- printf("Context creation failed, status = %d\n", status);
- }
- if (clCtx != NULL) {
- queue = clCreateCommandQueue(clCtx, devID,
- CL_QUEUE_PROFILING_ENABLE,
- &status);
- if (queue == NULL) {
- clReleaseContext(clCtx);
- printf("Command queue creation failed, status = %d\n", status);
- }
- }
-
- buf = malloc(SOURCE_BUFLEN);
- ctx = createKgenContext(buf, SOURCE_BUFLEN, true);
- func = testMatrBlockRW;
-
- if (v4na) {
- tdesc.widthA = 2055;
- tdesc.widthB = 2777;
- s2 = ", matrix rows are not aligned to float4 boundary";
- }
- else {
- tdesc.widthA = 2048;
- tdesc.widthB = 2560;
- s2 = "matrix rows are aligned to float4 boundary";
- }
-
- tdesc.heightA = 2048;
- tdesc.heightB = 2048;
-
- if (off) {
- s3 = ", starting offsets are not zero";
- tdesc.srowA = 17;
- tdesc.scolA = 27;
- tdesc.srowB = 55;
- tdesc.scolB = 86;
- }
- else {
- s3 = ", starting offsets are zero";
- }
-
- if (tdesc.transpose) {
- switch (tdesc.transpType) {
- case TRANSPOSE_LOCAL:
- s4 = ", transpose at reading";
- break;
- case TRANSPOSE_GLOBAL:
- s4 = ", transpose at writing back";
- break;
- case TRANSPOSE_BOTH:
- s4 = ", transpose at both reading and writing back";
- break;
- }
- }
-
- printf("Test read/write block function with %s data type%s%s%s%s%s.\n"
- "Run the test on %s...\n\n",
- dataType, s2, s3, s4, s5, s7, s6);
- if (runTestCases(ctx, buf, &tdesc, devID, clCtx, queue,
- func)) {
- printf("Source: \n%s\n", buf);
- }
-
- // release OpenCL objects
- clReleaseCommandQueue(queue);
- clReleaseContext(clCtx);
-
- return 0;
-}
-