summaryrefslogtreecommitdiff
path: root/external/clBLAS/src/library/blas/gens/legacy/tests/t_blkmul.c
diff options
context:
space:
mode:
Diffstat (limited to 'external/clBLAS/src/library/blas/gens/legacy/tests/t_blkmul.c')
-rw-r--r--external/clBLAS/src/library/blas/gens/legacy/tests/t_blkmul.c737
1 files changed, 0 insertions, 737 deletions
diff --git a/external/clBLAS/src/library/blas/gens/legacy/tests/t_blkmul.c b/external/clBLAS/src/library/blas/gens/legacy/tests/t_blkmul.c
deleted file mode 100644
index 590231ee..00000000
--- a/external/clBLAS/src/library/blas/gens/legacy/tests/t_blkmul.c
+++ /dev/null
@@ -1,737 +0,0 @@
-/* ************************************************************************
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ************************************************************************/
-
-
-#ifdef __APPLE__
-#include <OpenCL/cl.h>
-#else
-#include <CL/cl.h>
-#endif
-#include <string.h>
-#include <stdlib.h>
-#include <math.h>
-#include <stdio.h>
-#include <kerngen.h>
-#include <blas_kgen.h>
-#include "../blas_kgen_legacy.h"
-
-enum {
- ITEM_WORK_M = 8,
- ITEM_WORK_N = 8,
- GROUP_SIZE = ITEM_WORK_M * ITEM_WORK_N,
- BLOCKS_K = 2,
- PACK_RATE = 4,
- RAND_BOUND = 10
-};
-
-// float types based unified pointer
-typedef union FPtr {
- void *v;
- cl_float *f;
- cl_double *d;
- cl_float2 *f2;
- cl_double2 *d2;
-} FPtr;
-
-// float type based unified data type
-typedef union FType {
- unsigned char u[sizeof(cl_double)];
- cl_float f;
- cl_float2 f2;
- cl_double d;
- cl_double2 d2;
-} FType;
-
-static void
-usage(void)
-{
- printf("USAGE: blkmul_test type <M N K> [--iter i] [--imA] [--imB] [--alpha] a "
- "--[img-packed]\n"
- "type argument can be a value from the following list:s, d, c, z\n"
- "iter - number of iterations\n"
- "imA, imB - image usage for matrix\n"
- "img-packed - store elements of matrix A or (and) B "
- "to an image in the packed way\n");
-}
-
-static void
-imageSizes(
- int *height,
- int *width,
- int blockHeight,
- int blockWidth,
- int AB,
- int typeSize,
- int packed)
-{
- *width = blockWidth * typeSize / 16;
- *height = blockHeight;
- if (packed) {
- int smallHeight = (AB) ? (blockHeight / ITEM_WORK_M) :
- (blockHeight / ITEM_WORK_N);
-
- *width *= smallHeight * PACK_RATE;
- *height /= smallHeight * PACK_RATE;
- }
-}
-
-void
-addTestPrefix(struct KgenContext *ctx, bool isDouble)
-{
- kgenDeclareUptrs(ctx, isDouble);
-}
-
-void
-addTestSuffix(
- struct KgenContext *ctx,
- const SubproblemDim subdims[2],
- DataType type,
- BlkMulOpts *mulOpts)
-{
- char c;
- char s[300];
- bool isImageA, isImageB;
- char *tName;
- size_t m, n, k;
- size_t blockWidth;
- char imgXA[64], imgYA[64], imgXB[64], imgYB[64];
- unsigned int vecLen = sizeof(cl_float4) / dtypeSize(type);
-
- isImageA = (mulOpts->aMobj == CLMEM_IMAGE);
- isImageB = (mulOpts->bMobj == CLMEM_IMAGE);
-
- m = subdims[1].y;
- n = subdims[1].x;
- k = subdims[1].bwidth;
- blockWidth = k * BLOCKS_K;
-
- switch (type) {
- case TYPE_FLOAT:
- c = 's';
- tName = "float";
- break;
- case TYPE_DOUBLE:
- c = 'd';
- tName = "double";
- break;
- case TYPE_COMPLEX_FLOAT:
- c = 'c';
- tName = "float2";
- break;
- case TYPE_COMPLEX_DOUBLE:
- c = 'z';
- tName = "double2";
- break;
- default:
- return;
- }
-
- kgenAddBlankLine(ctx);
- kgenAddStmt(ctx, "__kernel void\n");
- kgenAddStmt(ctx, "blkmul_test(\n");
- sprintf(s," %s alpha,\n", tName);
- kgenAddStmt(ctx, s);
- if (isImageA) {
- kgenAddStmt(ctx, " __read_only image2d_t A,\n");
- }
- else {
- sprintf(s," __global %s *A,\n", tName);
- kgenAddStmt(ctx, s);
- }
- if (isImageB) {
- kgenAddStmt(ctx, " __read_only image2d_t B,\n");
- }
- else {
- sprintf(s," __global %s *B,\n", tName);
- kgenAddStmt(ctx, s);
- }
- kgenAddStmt(ctx, " size_t M,\n"
- " size_t N,\n"
- " size_t K,\n");
- sprintf(s," __global %s *C,\n", tName);
- kgenAddStmt(ctx, s);
- kgenAddStmt(ctx, " size_t iter)\n");
- kgenBeginFuncBody(ctx);
- kgenAddStmt(ctx, "size_t i, j, it, m0, n0;\n");
- if (!isImageA) {
- sprintf(s,"__local %s LA[%lu];\n", tName, m * ITEM_WORK_M * blockWidth);
- kgenAddStmt(ctx, s);
- }
- else {
- if (mulOpts->flags & BLKMUL_IMAGE_PACKED) {
- sprintf(imgXA, "(m0 / %lu) %% %d * %lu", m, PACK_RATE,
- m * blockWidth / vecLen);
- sprintf(imgYA, "m0 / %lu", m * PACK_RATE);
- }
- else {
- strcpy(imgXA, "0");
- strcpy(imgYA, "m0");
- }
- }
- if (!isImageB) {
- sprintf(s,"__local %s LB[%lu];\n", tName, n * ITEM_WORK_N * blockWidth);
- kgenAddStmt(ctx, s);
- }
- else {
- if (mulOpts->flags & BLKMUL_IMAGE_PACKED) {
- sprintf(imgXB, "(n0 / %lu) %% %d * %lu", n, PACK_RATE,
- n * blockWidth / vecLen);
- sprintf(imgYB, "n0 / %lu", n * PACK_RATE);
- }
- else {
- strcpy(imgXB, "0");
- strcpy(imgYB, "n0");
- }
- }
-
- sprintf(s,"__local %s LC[%lu];\n", tName, n * m * GROUP_SIZE);
- kgenAddStmt(ctx, s);
-
- sprintf(s, "m0 = %lu * (get_global_id(0) / %d);\n"
- "n0 = %lu * (get_global_id(0) %% %d);\n",
- m, ITEM_WORK_N, n, ITEM_WORK_N);
- kgenAddStmt(ctx, s);
-
- if (!isImageA) {
- kgenAddBlankLine(ctx);
- sprintf(s, "for (i = m0; i < m0 + %lu; i++)", m);
- kgenBeginBranch(ctx, s);
- kgenBeginBranch(ctx, "for (j = 0; j < K; j++)");
- kgenAddStmt(ctx,"LA[i * K + j] = A[i * K + j];\n");
- kgenEndBranch(ctx, NULL);
- kgenEndBranch(ctx, NULL);
- }
-
- if (!isImageB) {
- kgenAddBlankLine(ctx);
- sprintf(s, "for (i = n0; i < n0 + %lu; i++)", n);
- kgenBeginBranch(ctx, s);
- kgenBeginBranch(ctx,"for (j = 0; j < K; j++)");
- kgenAddStmt(ctx,"LB[i * K + j] = B[i * K + j];\n");
- kgenEndBranch(ctx, NULL);
- kgenEndBranch(ctx, NULL);
- }
-
- kgenAddBlankLine(ctx);
-
- kgenAddBlankLine(ctx);
- kgenBeginBranch(ctx,"for (it = 0; it < iter; it++)");
- sprintf(s, "for (i = m0; i < m0 + %lu; i++)", m);
- kgenBeginBranch(ctx, s);
- sprintf(s, "for (j = n0; j < n0 + %lu; j++)", n);
- kgenBeginBranch(ctx, s);
- kgenAddStmt(ctx,"LC[i * N + j] = 0;\n");
- kgenEndBranch(ctx, NULL);
- kgenEndBranch(ctx, NULL);
-
- if (isImageA) {
- if (isImageB) {
- sprintf(s, "%cgemmBlock_%lu_%lu(alpha, A, (int2)(%s, %s), B, "
- "(int2)(%s, %s), (LPtr)(LC + m0 * %lu + n0));\n",
- c, m, n, imgXA, imgYA, imgXB, imgYB, subdims[0].x);
- }
- else {
- sprintf(s, "%cgemmBlock_%lu_%lu(alpha, A, (int2)(%s, %s), "
- "(LPtr)(LB + n0 * %lu), (LPtr)(LC + m0 * %lu + n0));\n",
- c, m, n, imgXA, imgYA, subdims[0].bwidth, subdims[0].x);
- }
- }
- else {
- if (isImageB) {
- sprintf(s, "%cgemmBlock_%lu_%lu(alpha, (LPtr)(LA + m0 * %lu), B, "
- "(int2)(%s, %s), (LPtr)(LC + m0 * %lu + n0));\n",
- c, m, n, subdims[0].bwidth, imgXB, imgYB, subdims[0].x);
- }
- else {
- sprintf(s, "%cgemmBlock_%lu_%lu(alpha, (LPtr)(LA + m0 * %lu), "
- "(LPtr)(LB + n0 * %lu), (LPtr)(LC + m0 * %lu + n0));\n",
- c, m, n, subdims[0].bwidth, subdims[0].bwidth,
- subdims[0].x);
- }
- }
- kgenAddStmt(ctx, s);
- kgenEndBranch(ctx, NULL);
-
- kgenAddBlankLine(ctx);
- sprintf(s, "for (i = m0; i < m0 + %lu; i++)", m);
- kgenBeginBranch(ctx, s);
- sprintf(s, "for (j = n0; j < n0 + %lu; j++)", n);
- kgenBeginBranch(ctx, s);
- kgenAddStmt(ctx,"C[i * N + j] = LC[i * N + j];\n");
- kgenEndBranch(ctx, NULL);
- kgenEndBranch(ctx, NULL);
-
- kgenEndFuncBody(ctx);
-}
-
-cl_int
-run (char *ker, cl_uint M, cl_uint N, cl_uint K, FType alpha, DataType type, BlkMulOpts *mulOpts, cl_uint iter)
-{
- cl_int err;
- cl_platform_id platform;
- cl_context ctx;
- cl_device_id device;
- cl_command_queue queue;
- cl_event evt;
- FType tmp;
-
- cl_mem imA, imB, bufC;
- FPtr A, B, C, C_naive;
- bool is_complex = type == TYPE_COMPLEX_FLOAT || type == TYPE_COMPLEX_DOUBLE;
- bool is_double = type == TYPE_DOUBLE || type == TYPE_COMPLEX_DOUBLE;
- cl_uint nwords = (is_complex) ? 2 : 1;
- unsigned int tsize = dtypeSize(type);
- cl_kernel kernel;
- const cl_image_format image_format = {CL_RGBA, CL_FLOAT};
- size_t i, j, k;
- size_t globalWorkSize[1] = {GROUP_SIZE};
- size_t localWorkSize[1] = {GROUP_SIZE};
- char log[100000]; size_t logSize;
- cl_long sTime, fTime;
- cl_program program = NULL;
- const char *kernelName = "blkmul_test";
- int imgWidth, imgHeight;
- bool packed = (mulOpts->flags & BLKMUL_IMAGE_PACKED);
-
- clGetPlatformIDs(1, &platform, NULL);
-
- clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
-
- ctx = clCreateContext(NULL, 1, &device, NULL, NULL, &err);
- if (err != CL_SUCCESS) {
- return err;
- }
-
- queue = clCreateCommandQueue(ctx, device, CL_QUEUE_PROFILING_ENABLE, &err);
- if (err != CL_SUCCESS) {
- return err;
- }
-
- /* Prepare OpenCL kernel and its arguments */
-
- program = clCreateProgramWithSource(ctx, 1, (const char**)&ker, NULL, NULL);
-
- err = clBuildProgram(program, 1, &device, NULL, NULL, NULL);
- if (err != CL_SUCCESS){
- clGetProgramBuildInfo (program,
- device,
- CL_PROGRAM_BUILD_LOG,
- 100000,
- log,
- &logSize);
- printf("%s", log);
- clReleaseProgram(program);
- return err;
- }
-
- kernel = clCreateKernel(program, kernelName, &err);
- if (err != CL_SUCCESS){
- clReleaseProgram(program);
- return err;
- }
-
- /* Memory allocation */
-
- A.v = malloc(M * K * tsize);
- B.v = malloc(K * N * tsize);
- C.v = malloc(M * N * tsize);
- C_naive.v = malloc(M * N * tsize);
-
- srand(0);
- if (is_double) {
- for(i = 0; i < M * K * nwords; i++){
- A.d[i] = (double)(rand() % RAND_BOUND);
- }
- for(i = 0; i < N * K * nwords; i++){
- B.d[i] = (double)(rand() % RAND_BOUND);
- }
- for(i = 0; i < M * N * nwords; i++){
- C.d[i] = 0.0;
- C_naive.d[i] = 0.0;
- }
- }
- else {
- for(i = 0; i < M * K * nwords; i++){
- A.f[i] = (float)(rand() % RAND_BOUND);
- }
- for(i = 0; i < N * K * nwords; i++){
- B.f[i] = (float)(rand() % RAND_BOUND);
- }
- for(i = 0; i < M * N * nwords; i++){
- C.f[i] = 0.0;
- C_naive.f[i] = 0.0;
- }
- }
-
- if (mulOpts->aMobj == CLMEM_IMAGE) {
- imageSizes(&imgHeight, &imgWidth, M, K, 0, tsize, packed);
- imA = clCreateImage2D (ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
- &image_format, imgWidth, imgHeight, 0, A.v, &err);
- }
- else {
- imA = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
- K * M * tsize, A.v, &err);
- }
- if (err != CL_SUCCESS) {
- clReleaseKernel(kernel);
- return err;
- }
- if (mulOpts->bMobj == CLMEM_IMAGE) {
- imageSizes(&imgHeight, &imgWidth, N, K, 0, tsize, packed);
- imB = clCreateImage2D (ctx, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
- &image_format, imgWidth, imgHeight, 0, B.v, &err);
- }
- else {
- imB = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
- K * N * tsize, B.v, &err);
- }
- if (err != CL_SUCCESS) {
- clReleaseMemObject(imA);
- clReleaseKernel(kernel);
- return err;
- }
-
- bufC = clCreateBuffer(ctx, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
- M * N * tsize, C.v, &err);
-
- if (err != CL_SUCCESS) {
- clReleaseMemObject(imB);
- clReleaseMemObject(imA);
- clReleaseKernel(kernel);
- return err;
- }
-
- err = clEnqueueWriteBuffer (queue,
- bufC,
- CL_TRUE,
- 0,
- M * N * tsize,
- C.v,
- 0,
- NULL,
- NULL);
-
- /* Argument setting and kernel execution */
- err = clSetKernelArg(kernel, 0, tsize, alpha.u);
- err |= clSetKernelArg(kernel, 1, sizeof(imA), &imA);
- err |= clSetKernelArg(kernel, 2, sizeof(imB), &imB);
- err |= clSetKernelArg(kernel, 3, sizeof(M), &M);
- err |= clSetKernelArg(kernel, 4, sizeof(N), &N);
- err |= clSetKernelArg(kernel, 5, sizeof(K), &K);
- err |= clSetKernelArg(kernel, 6, sizeof(bufC), &bufC);
- err |= clSetKernelArg(kernel, 7, sizeof(iter), &iter);
-
- if (err != CL_SUCCESS) {
- clReleaseMemObject(bufC);
- clReleaseMemObject(imB);
- clReleaseMemObject(imA);
- clReleaseKernel(kernel);
- return err;
- }
-
- err = clEnqueueNDRangeKernel(queue, kernel, 1, NULL,
- globalWorkSize, localWorkSize, 0,
- NULL, &evt);
-
- if (err != CL_SUCCESS) {
- clReleaseMemObject(bufC);
- clReleaseMemObject(imB);
- clReleaseMemObject(imA);
- clReleaseKernel(kernel);
- return err;
- }
-
- err = clFinish(queue);
-
- err = clEnqueueReadBuffer (queue,
- bufC,
- CL_TRUE,
- 0,
- M * N * tsize,
- C.v,
- 0,
- NULL,
- NULL);
-
- /* Naive CPU multiplication */
- if (is_double) {
- if (is_complex) {
- for (i = 0; i < M; i++) {
- for (j = 0; j < N; j++) {
- for (k = 0; k < K; k++) {
- C_naive.d[(i * N + j) * 2] +=
- A.d[(i * K + k) * 2] * B.d[(j * K + k) * 2] -
- A.d[(i * K + k) * 2 + 1] * B.d[(j * K + k) * 2 + 1];
-
- C_naive.d[(i * N + j) * 2 + 1] +=
- A.d[(i * K + k) * 2] * B.d[(j * K + k) * 2 + 1] +
- A.d[(i * K + k) * 2 + 1] * B.d[(j * K + k) * 2];
- }
-
- tmp.d2.s[0] = C_naive.d[(i * N + j) * 2] * alpha.d2.s[0] -
- C_naive.d[(i * N + j) * 2 + 1] * alpha.d2.s[1];
- tmp.d2.s[1] = C_naive.d[(i * N + j) * 2] * alpha.d2.s[1] +
- C_naive.d[(i * N + j) * 2 + 1] * alpha.d2.s[0];
- C_naive.d[(i * N + j) * 2] = tmp.d2.s[0];
- C_naive.d[(i * N + j) * 2 + 1] = tmp.d2.s[1];
- }
- }
-
- for (i = 0; i < M * N; i++) {
- if ((C.d[i * 2] != C_naive.d[i * 2]) ||
- (C.d[i * 2 + 1] != C_naive.d[i * 2 + 1])) {
- printf("Differ at (%lu, %lu): (%lf; %lf) != (%lf; %lf)\n",
- i / N, i % N, C.d[i * 2], C.d[i * 2 + 1],
- C_naive.d[i * 2], C_naive.d[i * 2 + 1]);
- break;
- }
- }
- if (i == M * N) {
- printf("Match\n");
- }
- }
- else {
- for (i = 0; i < M; i++) {
- for (j = 0; j < N; j++) {
- for (k = 0; k < K; k++) {
- C_naive.d[i * N + j] += A.d[i * K + k] * B.d[j * K + k];
- }
- C_naive.d[i * N + j] *= alpha.d;
- }
- }
-
- for (i = 0; i < M * N; i++) {
- if (C.d[i] != C_naive.d[i]) {
- printf("Differ at (%lu, %lu): %lf != %lf\n", i / N, i % N,
- C.d[i], C_naive.d[i]);
- break;
- }
- }
- if (i == M * N) {
- printf("Match\n");
- }
- }
- }
- else {
- if (is_complex) {
- for (i = 0; i < M; i++) {
- for (j = 0; j < N; j++) {
- for (k = 0; k < K; k++) {
- C_naive.f[(i * N + j) * 2] +=
- A.f[(i * K + k) * 2] * B.f[(j * K + k) * 2] -
- A.f[(i * K + k) * 2 + 1] * B.f[(j * K + k) * 2 + 1];
-
- C_naive.f[(i * N + j) * 2 + 1] +=
- A.f[(i * K + k) * 2] * B.f[(j * K + k) * 2 + 1] +
- A.f[(i * K + k) * 2 + 1] * B.f[(j * K + k) * 2];
- }
-
- tmp.f2.s[0] = C_naive.f[(i * N + j) * 2] * alpha.f2.s[0] -
- C_naive.f[(i * N + j) * 2 + 1] * alpha.f2.s[1];
- tmp.f2.s[1] = C_naive.f[(i * N + j) * 2] * alpha.f2.s[1] +
- C_naive.f[(i * N + j) * 2 + 1] * alpha.f2.s[0];
- C_naive.f[(i * N + j) * 2] = tmp.f2.s[0];
- C_naive.f[(i * N + j) * 2 + 1] = tmp.f2.s[1];
- }
- }
-
- for (i = 0; i < M * N; i++) {
- if ((C.f[i * 2] != C_naive.f[i * 2]) ||
- (C.f[i * 2 + 1] != C_naive.f[i * 2 + 1])) {
- printf("Differ at (%lu, %lu): (%lf; %lf) != (%lf; %lf)\n",
- i / N, i % N, C.f[i * 2], C.f[i * 2 + 1],
- C_naive.f[i * 2], C_naive.f[i * 2 + 1]);
- break;
- }
- }
- if (i == M * N) {
- printf("Match\n");
- }
- }
- else {
- for (i = 0; i < M; i++) {
- for (j = 0; j < N; j++) {
- for (k = 0; k < K; k++) {
- C_naive.f[i * N + j] += A.f[i * K + k] * B.f[j * K + k];
- }
- C_naive.f[i * N + j] *= alpha.f;
- }
- }
-
- for (i = 0; i < M * N; i++) {
- if (C.f[i] != C_naive.f[i]) {
- printf("Differ at (%lu, %lu): %lf != %lf\n",
- i / N, i % N, C.f[i], C_naive.f[i]);
- break;
- }
- }
- if (i == M * N) {
- printf("Match\n");
- }
- }
- }
- /* End of naive CPU multiplication */
-
- clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_START, sizeof(cl_ulong), &sTime, NULL);
- clGetEventProfilingInfo(evt, CL_PROFILING_COMMAND_END, sizeof(cl_ulong), &fTime, NULL);
-
- printf("Total multiplication time: %d ms\nTime per iteration: %d ns\n",
- (int)((fTime-sTime)/1000000), (int)((fTime-sTime)/iter));
-
- clReleaseMemObject(bufC);
- clReleaseMemObject(imB);
- clReleaseMemObject(imA);
- clReleaseKernel(kernel);
- return CL_SUCCESS;
-}
-
-int main(int argc, char *argv[])
-{
- char out[65535];
- SubproblemDim subdims[2];
- BlkMulOpts mulOpts;
- DataType dtype;
- int i;
- cl_uint iter = 1, blockM = 4, blockN = 4, blockK = 8;
- struct KgenContext *ctx = createKgenContext(out, 65535, 1);
- FType alpha;
- int cmdAlpha = 0;
-
- mulOpts.aMobj = CLMEM_BUFFER;
- mulOpts.bMobj = CLMEM_BUFFER;
- mulOpts.flags = BLKMUL_NO_FLAGS;
-
- // parse command line
-
- if (argc < 2) {
- usage();
- return 1;
- }
-
- if (!strcmp(argv[1], "s")) {
- dtype = TYPE_FLOAT;
- alpha.f = 1;
- }
- else if (!strcmp(argv[1], "d")) {
- dtype = TYPE_DOUBLE;
- alpha.d = 1;
- }
- else if (!strcmp(argv[1], "c")) {
- dtype = TYPE_COMPLEX_FLOAT;
- alpha.f2.s[0] = 1;
- alpha.f2.s[1] = 0;
- }
- else if (!strcmp(argv[1], "z")) {
- dtype = TYPE_COMPLEX_DOUBLE;
- alpha.d2.s[0] = 1;
- alpha.d2.s[1] = 0;
- }
- else {
- printf("Wrong type specified: %s\n", argv[1]);
- return 1;
- }
-
- for (i = 2; i < argc; i++) {
- if (strcmp(argv[i], "--imA") == 0) {
- mulOpts.aMobj = CLMEM_IMAGE;
- continue;
- }
- if (strcmp(argv[i], "--imB") == 0) {
- mulOpts.bMobj = CLMEM_IMAGE;
- continue;
- }
- if (strcmp(argv[i], "--img-packed") == 0) {
- mulOpts.flags |= BLKMUL_IMAGE_PACKED;
- continue;
- }
-
- if (strcmp(argv[i], "--iter") == 0) {
- if (i + 1 == argc) {
- printf("Error: 'iter' argument is not specified\n");
- usage();
- return 1;
- }
- iter = atoi(argv[i + 1]);
- i++;
- continue;
- }
-
- if (strcmp(argv[i], "--alpha") == 0) {
- if (i + 1 == argc) {
- printf("Error: 'alpha' argument is not specified\n");
- usage();
- return 1;
- }
- cmdAlpha = atoi(argv[i + 1]);
- i++;
- continue;
- }
-
- if (i + 2 >= argc) {
- printf("Error: Not all sizes are specified\n");
- usage();
- return 1;
- }
- blockM = atoi(argv[i]);
- blockN = atoi(argv[i + 1]);
- blockK = atoi(argv[i + 2]);
- i += 2;
- }
-
- if (cmdAlpha) {
- switch (dtype) {
- case TYPE_FLOAT:
- alpha.f = cmdAlpha;
- break;
- case TYPE_DOUBLE:
- alpha.d = cmdAlpha;
- break;
- case TYPE_COMPLEX_FLOAT:
- alpha.f2.s[0] = cmdAlpha;
- alpha.f2.s[1] = -cmdAlpha / 2;
- break;
- case TYPE_COMPLEX_DOUBLE:
- alpha.d2.s[0] = cmdAlpha;
- alpha.d2.s[1] = -cmdAlpha / 2;
- break;
- default:
- break;
- }
- }
-
- subdims[0].y = blockM * ITEM_WORK_M;
- subdims[0].x = blockN * ITEM_WORK_N;
- subdims[0].bwidth = blockK * BLOCKS_K;
- subdims[1].y = blockM;
- subdims[1].x = blockN;
- subdims[1].bwidth = blockK;
-
- memset(out, 0, sizeof(out));
-
- i = isDoubleBasedType(dtype);
- addTestPrefix(ctx, i);
-
- blkMulGen(ctx, subdims, dtype, &mulOpts);
-
- addTestSuffix(ctx, subdims, dtype, &mulOpts);
-
- run(out, subdims[0].y, subdims[0].x, subdims[0].bwidth, alpha,
- dtype, &mulOpts, iter);
-
- destroyKgenContext(ctx);
-
- return 0;
-}