summaryrefslogtreecommitdiff
path: root/external/clBLAS/src/library/blas/gens/legacy/blkmul.c
diff options
context:
space:
mode:
Diffstat (limited to 'external/clBLAS/src/library/blas/gens/legacy/blkmul.c')
-rw-r--r--external/clBLAS/src/library/blas/gens/legacy/blkmul.c823
1 files changed, 0 insertions, 823 deletions
diff --git a/external/clBLAS/src/library/blas/gens/legacy/blkmul.c b/external/clBLAS/src/library/blas/gens/legacy/blkmul.c
deleted file mode 100644
index 8b78d808..00000000
--- a/external/clBLAS/src/library/blas/gens/legacy/blkmul.c
+++ /dev/null
@@ -1,823 +0,0 @@
-/* ************************************************************************
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ************************************************************************/
-
-
-/*
- * TODO: throw away this generator and replace it with tileMulGen() in all
- * kernel generators
- */
-
-#include <stdio.h>
-#include <string.h>
-
-#include <defbool.h>
-#include <clblas_stddef.h>
-#include <sys/types.h>
-#include <kerngen.h>
-#include <matrix_props.h>
-#include <matrix_dims.h>
-#include <dis_warning.h>
-
-#include "../blas_kgen.h"
-#include "blas_kgen_legacy.h"
-
-#define MAX_LENGTH 4096
-#define BITS_INT (sizeof(int) * 8)
-
-typedef enum VectMulType {
- VECT_MULT_REAL,
- VECT_MULT_COMPLEX_REAL,
- VECT_MULT_IMAG_FLOAT,
- VECT_MULT_IMAG_DOUBLE
-} VectMulType;
-
-static __inline bool
-isPower2(size_t a)
-{
- return (a && ((a & (a - 1)) == 0));
-}
-
-/*
- * get vector chunk size to copy
- * taking into account its alignment
- */
-static unsigned int
-vecChunkSize(size_t offset, size_t vecLen)
-{
- size_t chunk;
-
- for (chunk = vecLen; (chunk > 1) && (offset % chunk); chunk /= 2) { }
-
- return (unsigned int)chunk;
-}
-
-static void
-getCyclicAddrData(
- BlkMulFlags flags,
- const char **op,
- size_t *value,
- size_t bound)
-{
- if (isPower2(bound) && !(flags & BLKMUL_AVOID_AND)) {
- *op = "&";
- *value = bound - 1;
- }
- else {
- *op = "%";
- *value = bound;
- }
-}
-
-static void
-sprintfInputOffset(
- char *buf,
- MatrixRole mrole,
- int row,
- int col,
- size_t vecPitch,
- size_t bheight,
- const BlkMulOpts *opts,
- BlkmulArgNames *argNames,
- bool singleStepK)
-{
- const char *vfield;
- const char *coordName;
- const char *op;
- size_t bound;
- char colOff[64], rowOff[64];
- CLMemType mtype;
- BlkMulFlags flags = opts->flags;
-
- vfield = (mrole == MATRIX_A) ? "y" : "x";
- mtype = (mrole == MATRIX_A) ? opts->aMobj : opts->bMobj;
- if ((mrole == MATRIX_B) && (flags & BLKMUL_TRANSPOSED_B)) {
- flags &= ~BLKMUL_SKEW_ROW;
- }
-
- if (flags & BLKMUL_SKEW_ROW) {
- getCyclicAddrData(flags, &op, &bound, bheight);
- sprintf(rowOff, "((%s.%s + %d) %s %lu)",
- argNames->skewRow, vfield, row, op, bound);
- }
- else {
- sprintf(rowOff, "%d", row);
- }
-
- if (flags & BLKMUL_SKEW_COLUMN) {
- getCyclicAddrData(flags, &op, &bound, vecPitch);
- if (flags & BLKMUL_INLINE) {
- if (singleStepK) {
- sprintf(colOff, "%d", col);
- }
- else {
- sprintf(colOff, "(%s + %s + %d) %% %s",
- argNames->skewCol, argNames->k, col,
- argNames->vectBoundK);
- }
- }
- else {
- if (singleStepK) {
- sprintf(colOff, "%s", argNames->skewCol);
- }
- else {
- sprintf(colOff, "((skewCol + k + %d) %s %lu)",
- col, op, bound);
- }
- }
- }
- else {
- sprintf(colOff, "%d", col);
- }
-
- if (mtype == CLMEM_IMAGE) {
- coordName = (mrole == MATRIX_A) ? argNames->coordA : argNames->coordB;
- if (flags & BLKMUL_IMAGE_PACKED) {
- sprintf(buf, "(int2)(%s.x + mad24(%s, %lu, %s), %s.y)",
- coordName, rowOff, vecPitch, colOff, coordName);
- }
- else {
- sprintf(buf, "(int2)(%s.x + %s, %s.y + %s)",
- coordName, colOff, coordName, rowOff);
- }
- }
- else {
- if (flags & BLKMUL_SKEW_ROW) {
- sprintf(buf, "mad24(%s, %lu, %s)", rowOff, vecPitch, colOff);
- }
- else {
- sprintf(buf, "%lu + %s", row * vecPitch, colOff);
- }
- }
-}
-
-static void
-genRealDot(
- struct KgenContext *ctx,
- size_t m,
- size_t n,
- size_t nrCols,
- size_t lenK,
- unsigned int vecLen)
-{
- size_t k;
- char tmp[MAX_LENGTH], prefix[MAX_LENGTH];
- const char *vect = "xyzw";
- size_t regPitch = nrCols;
- size_t off;
-
- if (regPitch % vecLen) {
- regPitch += vecLen - regPitch % vecLen;
- }
-
- off = m * regPitch + n;
- sprintf(prefix, "c[%lu].%c += ", off / vecLen, vect[off % vecLen]);
-
- for (k = 0; k < lenK / vecLen; k++) {
- off = n * lenK / vecLen + k;
- sprintf(tmp, "%sdot(a[%lu], b[%lu]);\n", prefix, k, off);
- kgenAddStmt(ctx, tmp);
- }
-}
-
-/*
- * sprintf vector multiplication expression
- */
-static void
-genVecMul(
- struct KgenContext *ctx,
- size_t currCol,
- size_t lenK,
- VectMulType type)
-{
- size_t k;
- char tmp[MAX_LENGTH];
- const char *suff[] = {"", "", ".yxwz", ".yx"};
-
- sprintf(tmp, "sum = a[%d] * b[%lu]%s", 0, currCol * lenK, suff[type]);
- for (k = 1; k < lenK; k++) {
- sprintf(tmp, "%s + a[%lu] * b[%lu]%s", tmp, k,
- currCol * lenK + k, suff[type]);
- }
- strcat(tmp, ";\n");
- kgenAddStmt(ctx, tmp);
-}
-
-/*
- * sprintf vector multiplication expression using mad()'s
- */
-static void
-genMadMul(
- struct KgenContext *ctx,
- size_t currCol,
- size_t lenK,
- VectMulType type)
-{
- size_t k;
- char tmp[MAX_LENGTH];
- const char *suff[] = {"", "", ".yxwz", ".yx"};
-
- sprintf(tmp, "sum = a[%d] * b[%lu]%s;\n", 0, currCol * lenK,
- suff[type]);
- for (k = 1; k < lenK; k++) {
- sprintf(tmp, "%ssum = mad(a[%lu], b[%lu]%s, sum);\n", tmp, k,
- currCol * lenK + k, suff[type]);
- }
- kgenAddStmt(ctx, tmp);
-}
-
-
-/*
- * sprint expression for all the vector components
- * accumulation
- */
-static void
-genVecSum(
- struct KgenContext *ctx,
- DataType dataType,
- size_t currRow,
- size_t currCol,
- size_t nrCols,
- unsigned int vecLen,
- VectMulType mulType)
-{
- const char *vect = "xyzw";
- unsigned long vecOff, regOff;
- char c;
- unsigned int k;
- size_t pitch = nrCols;
- char tmp1[MAX_LENGTH], tmp2[MAX_LENGTH];
- unsigned int sumLen;
-
- // get offset taking into account alignment
- if ((pitch % vecLen) && !isComplexType(dataType)) {
- pitch += vecLen - pitch % vecLen;
- }
-
- regOff = (unsigned int)(currRow * pitch + currCol);
- if (isComplexType(dataType)) {
- vecOff = (mulType == VECT_MULT_COMPLEX_REAL) ? 0 : 1;
- sumLen = vecLen * 2;
- }
- else {
- vecOff = regOff % vecLen;
- regOff /= vecLen;
- sumLen = vecLen;
- }
-
- sprintf(tmp1, " sum.x");
- for (k = 1; k < sumLen; k++) {
- c = ((mulType == VECT_MULT_COMPLEX_REAL) && (k & 1)) ? '-' : '+';
- sprintf(tmp1, "%s %c sum.%c", tmp1, c, vect[k]);
- }
-
- sprintf(tmp2, "c[%lu].%c += %s;\n", regOff, vect[vecOff], tmp1);
- kgenAddStmt(ctx, tmp2);
-}
-
-/*
- * vector multiplication expression using mad() operations
- */
-static void
-genMad(
- struct KgenContext *ctx,
- DataType dataType,
- size_t currRow,
- size_t currCol,
- size_t nrCols,
- size_t lenK,
- unsigned int vecLen,
- bool vectorized)
-{
- const char *vect = {"xyzw"};
- unsigned long vecOff, regOff;
- unsigned int k;
- size_t pitch = nrCols;
- char tmp[MAX_LENGTH];
- unsigned int sumLen;
- int bIndex;
-
- // get offset taking into account alignment
- if ((pitch % vecLen) && !isComplexType(dataType)) {
- pitch += vecLen - pitch % vecLen;
- }
- regOff = (unsigned int)(currRow * pitch + currCol);
- vecOff = (unsigned int)(regOff % vecLen);
-
- if (isComplexType(dataType)) {
- sumLen = vecLen * 2;
- for (k = 0; k < lenK; k++) {
- int aIndex = k;
- bIndex = (int)(currCol * lenK + k);
-
- sprintf(tmp, "c[%lu] = mad(a[%d].xy, (float2)(b[%d].x), c[%lu]);\n",
- regOff, aIndex, bIndex, regOff);
- kgenAddStmt(ctx, tmp);
- sprintf(tmp, "c[%lu] = mad(a[%d].yx, (float2)(-b[%d].y, b[%d].y), c[%lu]);\n",
- regOff, aIndex, bIndex, bIndex, regOff);
- kgenAddStmt(ctx, tmp);
- sprintf(tmp, "c[%lu] = mad(a[%d].zw, (float2)(b[%d].z), c[%lu]);\n",
- regOff, aIndex, bIndex, regOff);
- kgenAddStmt(ctx, tmp);
- sprintf(tmp, "c[%lu] = mad(a[%d].wz, (float2)(-b[%d].w, b[%d].w), c[%lu]);\n",
- regOff, aIndex, bIndex, bIndex, regOff);
- kgenAddStmt(ctx, tmp);
- }
- }
- else {
- // Real case
- if (vectorized) {
- const char *tname = (isDoubleBasedType(dataType)) ? "double" : "float";
-
- regOff = (unsigned int)(currRow * nrCols / vecLen + currCol);
- for (k = 0; k < lenK * vecLen; k++) {
- bIndex = (int)(currCol * lenK * vecLen + k);
- sprintf(tmp, "c[%lu] = mad((%s%u)a[%u].%c, b[%d], c[%lu]);\n",
- regOff, tname, vecLen, k / vecLen, vect[k % vecLen],
- bIndex, regOff);
- kgenAddStmt(ctx, tmp);
- }
- }
- else {
- int dimNum;
-
- regOff /= vecLen;
- sumLen = vecLen;
- if (isDoubleBasedType(dataType)) {
- dimNum = 2;
- }
- else {
- dimNum = 4;
- }
-
- for (k = 0; k < sumLen*lenK; k++) {
- sprintf(tmp, "c[%lu].%c = mad(a[%u].%c, b[%lu].%c, "
- "c[%lu].%c);\n",
- regOff, vect[vecOff], k / sumLen, vect[k % dimNum],
- currCol * lenK + (k / sumLen), vect[k % dimNum],
- regOff, vect[vecOff]);
- kgenAddStmt(ctx, tmp);
- }
- kgenAddBlankLine(ctx);
- }
- }
-}
-
-static void
-getUpdateSkewCoords(
- struct KgenContext *ctx,
- const BlkMulOpts *opts,
- size_t subK,
- size_t pitchA,
- size_t pitchB,
- unsigned int vecLen,
- const char *ptrNameIn)
-{
- char tmp[1024];
- bool trb = ((opts->flags & BLKMUL_TRANSPOSED_B) != 0);
-
- if (!(opts->flags & BLKMUL_SKEW_COLUMN)) {
- kgenAddBlankLine(ctx);
- if (opts->aMobj == CLMEM_IMAGE) {
- sprintf(tmp, "coordA.x += %lu;\n", subK / vecLen);
- }
- else {
- sprintf(tmp, "A.%s += %lu;\n", ptrNameIn, subK / vecLen);
- }
- kgenAddStmt(ctx, tmp);
-
- if (!trb) {
- subK /= vecLen;
- }
- if (opts->bMobj == CLMEM_IMAGE) {
- const char *vfield = (trb) ? "y" : "x";
-
- sprintf(tmp, "coordB.%s += %lu;\n", vfield, subK);
- }
- else {
- size_t u = (trb) ? (subK * pitchB / vecLen) : subK;
-
- sprintf(tmp, "B.%s += %lu;\n", ptrNameIn, u);
- }
- kgenAddStmt(ctx, tmp);
- }
- else if (subK == vecLen) {
- if (isPower2(pitchA / vecLen)) {
- sprintf(tmp, "\nskewCol = (skewCol + 1) & %lu;\n",
- pitchA / vecLen - 1);
- }
- else {
- sprintf(tmp, "\nskewCol = (skewCol + 1) %% %lu;\n",
- pitchA / vecLen);
- }
- kgenAddStmt(ctx, tmp);
- }
-}
-
-// MUST BE LATER DEPRECATED
-static void
-genScaleAccResults(
- struct KgenContext *ctx,
- DataType dtype,
- size_t m,
- size_t n,
- size_t outPitch,
- unsigned int vecLen,
- bool transpose)
-{
- char s[MAX_LENGTH];
- const char *vect = "xyzw";
- char vecChunk[6];
- size_t inOff = 0, outOff, vecOff;
- size_t regPitch = n;
- size_t i, j, k;
- bool isDouble;
- const char *ptrNames[2][4] = {
- {"f", "f2v", "", "f4v"},
- {"d", "d2v", "", ""}};
-
- if ((regPitch % vecLen) && !isComplexType(dtype)) {
- regPitch += vecLen - regPitch % vecLen;
- }
-
- isDouble = isDoubleBasedType(dtype);
- for (i = 0; i < m; i++) {
- j = 0;
- inOff = i * regPitch;
-
- do {
- /*
- * get power of 2 size vector element to copy
- * in the case without transposing and copy
- * just with single element in the case with
- * transposing
- */
-
- if (transpose) {
- k = 1;
- outOff = (j * outPitch + i);
- }
- else {
- if (isComplexType(dtype)) {
- k = 1;
- }
- else {
- k = vecChunkSize(j, vecLen);
- k = szmin(k, n - j);
- }
- outOff = (i * outPitch + j);
- }
-
- if (isComplexType(dtype)) {
- sprintf(s, "tempC.%s[%lu] += "
- "c[%lu] * alphaR + c[%lu].yx * alphaI;\n",
- ptrNames[isDouble][1], outOff, inOff, inOff);
- }
- else {
- if (k == vecLen) {
- strcpy(vecChunk, "");
- }
- else {
- vecOff = inOff % vecLen;
- strcpy(vecChunk, ".");
- strncat(vecChunk, &vect[vecOff], k);
- }
-
- sprintf(s, "tempC.%s[%lu] += c[%lu]%s * alpha;\n",
- ptrNames[isDouble][k - 1], outOff / k,
- inOff / vecLen, vecChunk);
- }
-
- kgenAddStmt(ctx, s);
-
- j += k;
- inOff += k;
- } while (j < n);
- }
-}
-
-static void
-declareBlkMul(
- struct KgenContext *ctx,
- DataType dtype,
- size_t m,
- size_t n,
- const BlkMulOpts *opts,
- BlkmulArgNames *argNames)
-{
- char s[MAX_LENGTH];
- const char *s1;
- char c;
- const char *typeName;
- bool isPriv = (opts->flags & BLKMUL_OUTPUT_PRIVATE);
-
- c = dtypeToBlasPrefix(dtype);
- typeName = dtypeBuiltinType(dtype);
- s1 = (opts->flags & BLKMUL_TRANSPOSE) ? "Transp" : "";
-
- // fill argument names
- argNames->coordA = "coordA";
- argNames->coordB = "coordB";
- argNames->skewRow = "skewRow";
- argNames->skewCol = "skewCol";
-
- sprintf(s, "void\n"
- "%cgemmBlock%s_%lu_%lu(\n",
- c, s1, m, n);
-
- if (!isPriv) {
- sprintf(s, "%s %s alpha,\n", s, typeName);
- }
- if (opts->aMobj == CLMEM_IMAGE) {
- sprintf(s, "%s __read_only image2d_t A,\n"
- " int2 coordA,\n", s);
- }
- else {
- sprintf(s, "%s LPtr A,\n", s);
- }
- if (opts->bMobj == CLMEM_IMAGE) {
- sprintf(s, "%s __read_only image2d_t B,\n"
- " int2 coordB,\n", s);
- }
- else {
- sprintf(s, "%s LPtr B,\n", s);
- }
-
- if (opts->flags & BLKMUL_OUTPUT_PRIVATE) {
- if (isDoubleBasedType(dtype)) {
- typeName = "double2";
- }
- else {
- typeName = (dtype == TYPE_COMPLEX_FLOAT) ? "float2" : "float4";
- }
- sprintf(s, "%s %s *c", s, typeName);
- }
- else {
- sprintf(s, "%s LPtr tempC", s);
-
- }
-
- if (opts->flags & BLKMUL_SKEW_ROW) {
- sprintf(s, "%s,\n int2 skewRow", s);
- }
- if (opts->flags & BLKMUL_SKEW_COLUMN) {
- sprintf(s, "%s,\n int skewCol", s);
- }
- strcat(s, ")\n");
-
- kgenDeclareFunction(ctx, (const char*)s);
-}
-
-int
-blkMulGen(
- struct KgenContext *ctx,
- const SubproblemDim subdims[2],
- DataType dtype,
- const BlkMulOpts *opts)
-{
- char s[MAX_LENGTH], s1[MAX_LENGTH];
- const char *tNameIn, *tNameOut, *ptrNameIn;
- size_t vecLen, vlenJ, vlenK;
- size_t i, j, k;
- size_t m, n, subK;
- unsigned int nrRegs;
- int ret = 0;
- bool isReal, isDouble;
- bool isImageA, isImageB;
- size_t off;
- size_t pitchA, pitchB, pitchC;
- unsigned int tsize = dtypeSize(dtype);
- bool transpose = (opts->flags & BLKMUL_TRANSPOSE);
- bool trb = ((opts->flags & BLKMUL_TRANSPOSED_B) != 0);
- bool isPriv = (opts->flags & BLKMUL_OUTPUT_PRIVATE);
- bool isInlined = (opts->flags & BLKMUL_INLINE);
- BlkmulCore core = opts->core;
- BlkmulArgNames argNames;
- // code to fetch from images for double and float based types
- const char *imageFetch[2] = {
- "%c[%lu] = as_float4(read_imageui(%s, sampler, %s));\n",
- "%c[%lu] = as_double2(read_imageui(%s, sampler, %s));\n"};
-
- if (trb && (opts->flags & BLKMUL_SKEW_COLUMN)) {
- return -EINVAL;
- }
-
- memcpy(&argNames, &opts->argNames, sizeof(BlkmulArgNames));
- strcpy(s, "");
-
- isImageA = (opts->aMobj == CLMEM_IMAGE);
- isImageB = (opts->bMobj == CLMEM_IMAGE);
-
- m = subdims[1].y;
- n = subdims[1].x;
- subK = subdims[1].bwidth;
- tsize = dtypeSize(dtype);
-
- // matrix block pitches
- pitchA = matrBlockPitch(subdims, MATRIX_A, dtype, clblasLeft);
- k = (trb) ? subdims[0].x : subdims[0].bwidth;
- pitchB = fl4RowWidth(k, tsize) * sizeof(cl_float4) / tsize;
- pitchC = matrBlockPitch(subdims, MATRIX_C, dtype, clblasLeft);
-
- isReal = !isComplexType(dtype);
- isDouble = isDoubleBasedType(dtype);
-
- vecLen = FLOAT4_VECLEN * sizeof(cl_float) / tsize;
- if (isDouble) {
- tNameIn = "double2";
- ptrNameIn = "d2v";
- }
- else {
- tNameIn = "float4";
- ptrNameIn = "f4v";
- }
-
- getResultGPRsInfo(dtype, &subdims[1], (unsigned int)vecLen, &nrRegs, &tNameOut);
-
- if (!isInlined) {
- declareBlkMul(ctx, dtype, m, n, opts, &argNames);
- kgenBeginFuncBody(ctx);
- }
-
- //variables declaration
- if (isImageA || isImageB) {
- kgenAddStmt(ctx, "const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE "
- "| CLK_ADDRESS_NONE | CLK_FILTER_NEAREST;\n");
- }
- if (!isInlined) {
- strcpy(s, "uint k;\n");
- }
- sprintf(s, "%s%s a[%lu], b[%lu];\n",s , tNameIn, subK / vecLen,
- n * subK / vecLen);
-
- if (!isPriv) {
- // declare registers for result
- sprintf(s, "%s%s c[%u];\n", s, tNameOut, nrRegs);
- }
-
- // 'dot' function can't be used for complex types
- if (isComplexType(dtype) && (core == BLKMUL_DOT)) {
- core = BLKMUL_SEPARATE_MULADD;
- }
-
- if ((core == BLKMUL_SEPARATE_MULADD) || isComplexType(dtype)) {
- sprintf(s,"%s%s sum;\n", s, tNameIn);
- }
-
- kgenAddStmt(ctx, s);
-
- if (!isPriv && !isReal) {
- declareComplexMultParts(ctx, "alpha", tNameOut);
- }
- kgenAddBlankLine(ctx);
-
- // zeroing temporary multiplication data stored to registers
- if (!isPriv) {
- sprintf(s, "for (k = 0; k < %u; k++) {\n"
- " c[k] = 0;\n"
- "}\n\n", nrRegs);
- kgenAddStmt(ctx, s);
- }
-
- //main loop start
- if (!isInlined) {
- // initial skew correction
- if ((opts->flags & BLKMUL_SKEW_COLUMN) && (subK == vecLen)) {
- if (isPower2(pitchA / vecLen) &&
- !(opts->flags & BLKMUL_AVOID_AND)) {
- sprintf(s, "skewCol = skewCol & %lu;\n", pitchA / vecLen - 1);
- }
- else {
- sprintf(s, "\nskewCol = skewCol %% %lu;\n", pitchA / vecLen);
- }
- kgenAddStmt(ctx, s);
- }
- sprintf(s, "\nfor (k = 0; k < %lu; k += %lu)",
- subdims[0].bwidth / vecLen, subK / vecLen);
- ret = kgenBeginBranch(ctx, s);
- }
-
- if (trb) {
- vlenJ = vecLen;
- vlenK = 1;
- }
- else {
- vlenJ = 1;
- vlenK = vecLen;
- }
-
- for (j = 0; j < n / vlenJ; j++) {
- // fetch elements of matrix B
- for (k = 0; k < subK / vlenK; k++) {
- size_t coords[2] = {k, j};
- if (trb) {
- off = j * subK + k;
- }
- else {
- off = j * subK / vecLen + k;
- }
- sprintfInputOffset(s1, MATRIX_B, (int)coords[1 - trb],
- (int)coords[trb], pitchB / vecLen,
- subdims[1].x, opts, &argNames, (subK == vecLen));
- if (isImageB) {
- sprintf(s, imageFetch[isDouble], 'b', off, "B", s1);
- }
- else {
- sprintf(s, "b[%lu] = B.%s[%s];\n", off, ptrNameIn, s1);
- }
- ret = kgenAddStmt(ctx, s);
- }
- }
-
- for (i = 0; i < m; i++) {
- kgenAddBlankLine(ctx);
- // fetch elements of matrix A from single row
- for (k = 0; k < subK / vecLen; k++) {
- sprintfInputOffset(s1, MATRIX_A, (int)i,
- (int)k, pitchA / vecLen, subdims[1].y, opts,
- &argNames, (subK == vecLen));
- if (isImageA) {
- sprintf(s, imageFetch[isDouble], 'a', k, "A", s1);
- }
- else {
- sprintf(s,"a[%lu] = A.%s[%s];\n", k, ptrNameIn, s1);
- }
- ret = kgenAddStmt(ctx, s);
- }
-
- // multiply matrix A row on matrix B block
- for (j = 0; j < n / vlenJ; j++) {
- if (isReal) { //real case
- switch (core) {
- case BLKMUL_DOT:
- genRealDot(ctx, i, j, n, subK, (unsigned int)vecLen);
- break;
- case BLKMUL_MAD:
- genMad(ctx, dtype, i, j, n, subK / vecLen,
- (unsigned int)vecLen, trb);
- break;
- case BLKMUL_SEPARATE_MULADD:
- genVecMul(ctx, j, subK / vecLen, VECT_MULT_REAL);
- genVecSum(ctx, dtype, i, j, n, (unsigned int)vecLen,
- VECT_MULT_REAL);
- break;
- }
- }
- else { //complex case
- VectMulType mulType = (dtype == TYPE_COMPLEX_FLOAT) ?
- VECT_MULT_IMAG_FLOAT : VECT_MULT_IMAG_DOUBLE;
-
- if (core == BLKMUL_MAD) {
- //real part
- genMadMul(ctx, j, subK / vecLen, VECT_MULT_COMPLEX_REAL);
- genVecSum(ctx, dtype, i, j, n, (unsigned int)vecLen,
- VECT_MULT_COMPLEX_REAL);
-
- //imaginary part
- genMadMul(ctx, j, subK / vecLen, mulType);
- genVecSum(ctx, dtype, i, j, n, (unsigned int)vecLen, mulType);
- }
- else {
- //real part
- genVecMul(ctx, j, subK / vecLen, VECT_MULT_COMPLEX_REAL);
- genVecSum(ctx, dtype, i, j, n, (unsigned int)vecLen,
- VECT_MULT_COMPLEX_REAL);
-
- //imaginary part
- genVecMul(ctx, j, subK / vecLen, mulType);
- genVecSum(ctx, dtype, i, j, n, (unsigned int)vecLen, mulType);
- }
- }
- }
- }
-
- // update coordinates/skews and end the loop
- if (!isInlined) {
- getUpdateSkewCoords(ctx, opts, subK, pitchA, pitchB,
- (unsigned int)vecLen, ptrNameIn);
- kgenEndBranch(ctx, NULL);
- }
-
- if (!isPriv) {
- kgenAddBlankLine(ctx);
- genScaleAccResults(ctx, dtype, m, n, pitchC, (unsigned int)vecLen, transpose);
- }
-
- if (!isInlined) {
- ret = kgenEndFuncBody(ctx);
- }
-
- return ret ? -EOVERFLOW : 0;
-}