summaryrefslogtreecommitdiff
path: root/external/clBLAS/src/tests/performance/TrxmPerformanceTest.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'external/clBLAS/src/tests/performance/TrxmPerformanceTest.cpp')
-rw-r--r--external/clBLAS/src/tests/performance/TrxmPerformanceTest.cpp362
1 files changed, 0 insertions, 362 deletions
diff --git a/external/clBLAS/src/tests/performance/TrxmPerformanceTest.cpp b/external/clBLAS/src/tests/performance/TrxmPerformanceTest.cpp
deleted file mode 100644
index 6ee272da..00000000
--- a/external/clBLAS/src/tests/performance/TrxmPerformanceTest.cpp
+++ /dev/null
@@ -1,362 +0,0 @@
-/* ************************************************************************
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ************************************************************************/
-
-
-/*
- * Performance test case class implementation for
- * TRMM and TRSM routines
- */
-
-#include <string.h> // memcpy()
-#include <gtest/gtest.h>
-#include <clBLAS.h>
-
-#include <common.h>
-#include <clBLAS-wrapper.h>
-#include <BlasBase.h>
-#include <blas-random.h>
-
-#ifdef PERF_TEST_WITH_ACML
-#include <blas-internal.h>
-#include <blas-wrapper.h>
-#endif
-
-#include "PerformanceTest.h"
-
-using namespace std;
-using namespace clMath;
-
-namespace clMath {
-
-template <typename ElemType> class TrxmPerformanceTest : public PerformanceTest
-{
-public:
- virtual ~TrxmPerformanceTest();
-
- virtual int prepare(void);
- virtual nano_time_t etalonPerfSingle(void);
- virtual nano_time_t clblasPerfSingle(void);
-
- static void runInstance(BlasFunction fn, TestParams *params)
- {
- TrxmPerformanceTest<ElemType> *perfCase;
- int ret = 0;
- int opFactor;
- BlasBase *base;
-
- base = clMath::BlasBase::getInstance();
-
- if (fn == FN_STRMM || fn == FN_DTRMM ||
- fn == FN_STRSM || fn == FN_DTRSM) {
-
- opFactor = 1;
- }
- else {
- opFactor = 4;
- }
-
- if ((fn == FN_DTRMM || fn == FN_ZTRMM ||
- fn == FN_DTRSM || fn == FN_ZTRSM) &&
- !base->isDevSupportDoublePrecision()) {
-
- std::cerr << ">> WARNING: The target device doesn't support native "
- "double precision floating point arithmetic" <<
- std::endl << ">> Test skipped" << std::endl;
- return;
- }
-
- perfCase = new TrxmPerformanceTest<ElemType>(fn, params);
- if (!perfCase->areResourcesSufficient(params)) {
- std::cerr << ">> RESOURCE CHECK: Skip due to unsufficient "
- "resources" << std::endl;
- }
- else {
- ret = perfCase->run(opFactor);
- }
-
- delete perfCase;
-
- ASSERT_GE(ret, 0) << "Fatal error: can not allocate resources or "
- "perform an OpenCL request!" << endl;
- EXPECT_EQ(0, ret) << "The OpenCL version is slower in the case" << endl;
- }
-
-private:
- TrxmPerformanceTest(BlasFunction fn, TestParams *params);
-
- bool areResourcesSufficient(TestParams *params);
-
- TestParams params_;
- ElemType alpha_;
- ElemType *A_;
- ElemType *B_;
- ElemType *backB_;
- cl_mem mobjA_;
- cl_mem mobjB_;
- ::clMath::BlasBase *base_;
- bool isTrsm_;
-
- static problem_size_t problemSize(TestParams *params);
-};
-
-} // namespace
-
-template <typename ElemType>
-TrxmPerformanceTest<ElemType>::TrxmPerformanceTest(
- BlasFunction fn,
- TestParams *params) : PerformanceTest(fn, problemSize(params)),
- params_(*params), mobjA_(NULL), mobjB_(NULL)
-{
- A_ = new ElemType[params_.rowsA * params_.columnsA];
- B_ = new ElemType[params_.rowsB * params_.columnsB];
- backB_ = new ElemType[params_.rowsB * params_.columnsB];
-
- base_ = ::clMath::BlasBase::getInstance();
- isTrsm_ = (static_cast<int>(fn) >= FN_STRSM);
-}
-
-template <typename ElemType>
-TrxmPerformanceTest<ElemType>::~TrxmPerformanceTest()
-{
- delete[] A_;
- delete[] B_;
- delete[] backB_;
-
- clReleaseMemObject(mobjB_);
- clReleaseMemObject(mobjA_);
-}
-
-/*
- * Check if available OpenCL resources are sufficient to
- * run the test case
- */
-template <typename ElemType> bool
-TrxmPerformanceTest<ElemType>::areResourcesSufficient(TestParams *params)
-{
- clMath::BlasBase *base;
- size_t gmemSize, allocSize, maxMatrSize;
- bool ret = true;
- size_t m = params->M, n = params->N;
- size_t asize;
- clblasSide side = params->side;
-
- base = clMath::BlasBase::getInstance();
- gmemSize = (size_t)base->availGlobalMemSize(0);
- allocSize = (size_t)base->maxMemAllocSize();
- asize = (side == clblasLeft) ? m : n;
-
- if (base->useImages()) {
- size_t iw;
-
- // overall 2 images 1/5 of gmemSize each and 2 memory objects
- maxMatrSize = 3 * gmemSize / 10;
- iw = base->scratchImageWidth() * sizeof(cl_float4) / sizeof(ElemType);
-
- if (isTrsm_) {
- size_t ih, nb;
-
- // check if matrix A is fitted to the image with 32x32 blocks
- ih = base->scratchImageHeight();
- nb = asize / 32 + (asize % 32 != 0);
- ret = ((asize * asize + nb * 32 * 32) / 2 < iw * ih);
- }
- else {
- ret = (std::max(n, asize) < iw);
- }
- }
- else {
- maxMatrSize = gmemSize / 2;
- }
- maxMatrSize = std::min(maxMatrSize, allocSize);
-
- if (ret) {
- ret = ((m * n * sizeof(ElemType) < maxMatrSize) &&
- (asize * asize * sizeof(ElemType) < maxMatrSize));
- }
-
- return ret;
-}
-
-template <typename ElemType> int
-TrxmPerformanceTest<ElemType>::prepare(void)
-{
- bool useAlpha = base_->useAlpha();
-
- if (useAlpha) {
- alpha_ = convertMultiplier<ElemType>(base_->alpha());
- }
-
- if (isTrsm_) {
- randomTrsmMatrices<ElemType>(params_.order, params_.side, params_.uplo,
- params_.diag, params_.M, params_.N, useAlpha,
- &alpha_, A_, params_.lda, B_, params_.ldb);
- }
- else {
- randomTrmmMatrices<ElemType>(params_.order, params_.side, params_.uplo,
- params_.diag, params_.M, params_.N, useAlpha,
- &alpha_, A_, params_.lda, B_, params_.ldb);
- }
-
- mobjA_ = base_->createEnqueueBuffer(A_, params_.rowsA * params_.columnsA *
- sizeof(ElemType),
- params_.offA * sizeof(ElemType),
- CL_MEM_READ_ONLY);
- if (mobjA_) {
- mobjB_ = base_->createEnqueueBuffer(backB_, params_.rowsB *
- params_.columnsB * sizeof(ElemType),
- params_.offBX * sizeof(ElemType),
- CL_MEM_READ_WRITE);
- }
-
- return (mobjB_) ? 0 : -1;
-}
-
-template <typename ElemType> nano_time_t
-TrxmPerformanceTest<ElemType>::etalonPerfSingle(void)
-{
- nano_time_t time = 0;
- clblasOrder order;
- size_t lda, ldb;
-
-#ifndef PERF_TEST_WITH_ROW_MAJOR
- if (params_.order == clblasRowMajor) {
- cerr << "Row major order is not allowed" << endl;
- return NANOTIME_ERR;
- }
-#endif
-
- memcpy(B_, backB_, params_.rowsB * params_.columnsB *
- sizeof(ElemType));
- order = params_.order;
- lda = params_.lda;
- ldb = params_.ldb;
-
-#ifdef PERF_TEST_WITH_ACML
-
- if (order == clblasRowMajor) {
- order = clblasColumnMajor;
- if (params_.side == clblasLeft) {
- lda = params_.M;
- }
- else {
- lda = params_.N;
- }
- ldb = params_.M;
- }
-
- time = getCurrentTime();
- if (isTrsm_) {
- clMath::blas::trsm(order, params_.side, params_.uplo,
- params_.transA, params_.diag,
- params_.M, params_.N,
- alpha_, A_, lda, B_, ldb);
- }
- else {
- clMath::blas::trmm(order, params_.side, params_.uplo,
- params_.transA, params_.diag,
- params_.M, params_.N,
- alpha_, A_, lda, B_, ldb);
- }
- time = getCurrentTime() - time;
-
-#endif // PERF_TEST_WITH_ACML
-
- return time;
-}
-
-template <typename ElemType> nano_time_t
-TrxmPerformanceTest<ElemType>::clblasPerfSingle(void)
-{
- nano_time_t time;
- cl_event event;
- cl_int status;
- cl_command_queue queue;
-
- queue = base_->commandQueues()[0];
-
- status = clEnqueueWriteBuffer(queue, mobjB_, CL_TRUE, 0,
- params_.rowsB * params_.columnsB *
- sizeof(ElemType), backB_, 0, NULL, &event);
- if (status != CL_SUCCESS) {
- cerr << "Matrix B buffer object enqueuing error, status = " <<
- status << endl;
-
- return NANOTIME_ERR;
- }
-
- status = clWaitForEvents(1, &event);
- if (status != CL_SUCCESS) {
- cerr << "Wait on event failed, status = " <<
- status << endl;
-
- return NANOTIME_ERR;
- }
-
- event = NULL;
-
- if (isTrsm_) {
- status = (cl_int)clMath::clblas::trsm(params_.order, params_.side,
- params_.uplo, params_.transA, params_.diag, params_.M, params_.N,
- alpha_, mobjA_, params_.offA, params_.lda, mobjB_, params_.offBX,
- params_.ldb, 1, &queue, 0, NULL, &event);
- }
- else {
- status = (cl_int)clMath::clblas::trmm(params_.order, params_.side,
- params_.uplo, params_.transA, params_.diag, params_.M, params_.N,
- alpha_, mobjA_, params_.offA, params_.lda, mobjB_, params_.offBX,
- params_.ldb, 1, &queue, 0, NULL, &event);
- }
-
- if (status != CL_SUCCESS) {
- cerr << "The CLBLAS TRXM function failed, status = " <<
- status << endl;
-
- return NANOTIME_ERR;
- }
- status = flushAll(1, &queue);
- if (status != CL_SUCCESS) {
- cerr << "clFlush() failed, status = " << status << endl;
- return NANOTIME_ERR;
- }
-
- time = getCurrentTime();
- status = waitForSuccessfulFinish(1, &queue, &event);
- if (status == CL_SUCCESS) {
- time = getCurrentTime() - time;
- }
- else {
- cerr << "Waiting for completion of commands to the queue failed, "
- "status = " << status << endl;
- time = NANOTIME_ERR;
- }
-
- return time;
-}
-
-template <typename ElemType> problem_size_t
-TrxmPerformanceTest<ElemType>::problemSize(TestParams *params)
-{
- problem_size_t size;
-
- if (params->side == clblasRight) {
- size = (problem_size_t)params->N * params->N * params->M;
- }
- else {
- size = (problem_size_t)params->M * params->M * params->N;
- }
-
- return size;
-}