diff options
Diffstat (limited to 'external/clBLAS/src/tests/correctness')
56 files changed, 0 insertions, 17712 deletions
diff --git a/external/clBLAS/src/tests/correctness/BlasBase-corr.cpp b/external/clBLAS/src/tests/correctness/BlasBase-corr.cpp deleted file mode 100644 index 2bc1494e..00000000 --- a/external/clBLAS/src/tests/correctness/BlasBase-corr.cpp +++ /dev/null @@ -1,41 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <BlasBase.h> - -namespace clMath { - -clblasStatus -BlasBase::addScratchImages(void) -{ - //clblasStatus status; - - //// Height must be less than 1024 - //imageA_ = clblasAddScratchImage(context_, 2048, 512, &status); - //if (imageA_) { - // imageB_ = clblasAddScratchImage(context_, 2048, 512, &status); - //} - - //return status; - return clblasNotImplemented; -} - -} // namespace diff --git a/external/clBLAS/src/tests/correctness/blas-lapack.c b/external/clBLAS/src/tests/correctness/blas-lapack.c deleted file mode 100644 index 9687bdf3..00000000 --- a/external/clBLAS/src/tests/correctness/blas-lapack.c +++ /dev/null @@ -1,941 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -/* - * cblas to lapack's blas interface adapter - */ - -#include <blas-cblas.h> - -#if !defined CORR_TEST_WITH_ACML - -#include "blas-lapack.h" -#if defined(__APPLE__) -#include <Accelerate/Accelerate.h> -#endif - -void -sgemv(char transa, int m, int n, float alpha, float *a, int lda, float *x, int incx, float beta, float *y, int incy) -{ - sgemv_(&transa, &m, &n, &alpha, a, &lda, x, &incx, &beta, y, &incy); -} - -void -dgemv(char transa, int m, int n, double alpha, double *a, int lda, double *x, int incx, double beta, double *y, int incy) -{ - dgemv_(&transa, &m, &n, &alpha, a, &lda, x, &incx, &beta, y, &incy); -} - -void -cgemv(char transa, int m, int n, complex *alpha, complex *a, int lda, complex *x, int incx, complex *beta, complex *y, int incy) -{ - cgemv_(&transa, &m, &n, alpha, a, &lda, x, &incx, beta, y, &incy); -} - -void -zgemv(char transa, int m, int n, doublecomplex *alpha, doublecomplex *a, int lda, doublecomplex *x, int incx, doublecomplex *beta, doublecomplex *y, int incy) -{ - zgemv_(&transa, &m, &n, alpha, a, &lda, x, &incx, beta, y, &incy); -} - -void -ssymv(char uplo, int n, float alpha, float *a, int lda, float *x, int incx, float beta, float *y, int incy) -{ - ssymv_(&uplo, &n, &alpha, a, &lda, x, &incx, &beta, y, &incy); -} - -void -dsymv(char uplo, int n, double alpha, double *a, int lda, double *x, int incx, double beta, double *y, int incy) -{ - dsymv_(&uplo, &n, &alpha, a, &lda, x, &incx, &beta, y, &incy); -} - -void -sgemm(char transa, char transb, int m, int n, int k, float alpha, float *a, int lda, float *b, int ldb, float beta, float *c, int ldc) -{ - sgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); -} - -void -dgemm(char transa, char transb, int m, int n, int k, double alpha, double *a, int lda, double *b, int ldb, double beta, double *c, int ldc) -{ - dgemm_(&transa, &transb, &m, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); -} - -void -cgemm(char transa, char transb, int m, int n, int k, complex *alpha, complex *a, int lda, complex *b, int ldb, complex *beta, complex *c, int ldc) -{ - cgemm_(&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc); -} - -void -zgemm(char transa, char transb, int m, int n, int k, doublecomplex *alpha, doublecomplex *a, int lda, doublecomplex *b, int ldb, doublecomplex *beta, doublecomplex *c, int ldc) -{ - zgemm_(&transa, &transb, &m, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc); -} - -void -strmm(char side, char uplo, char transa, char diag, int m, int n, float alpha, float *a, int lda, float *b, int ldb) -{ - strmm_(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, &ldb); -} - -void -dtrmm(char side, char uplo, char transa, char diag, int m, int n, double alpha, double *a, int lda, double *b, int ldb) -{ - dtrmm_(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, &ldb); -} - -void -ctrmm(char side, char uplo, char transa, char diag, int m, int n, complex *alpha, complex *a, int lda, complex *b, int ldb) -{ - ctrmm_(&side, &uplo, &transa, &diag, &m, &n, alpha, a, &lda, b, &ldb); -} - -void -ztrmm(char side, char uplo, char transa, char diag, int m, int n, doublecomplex *alpha, doublecomplex *a, int lda, doublecomplex *b, int ldb) -{ - ztrmm_(&side, &uplo, &transa, &diag, &m, &n, alpha, a, &lda, b, &ldb); -} - -void -strsm(char side, char uplo, char transa, char diag, int m, int n, float alpha, float *a, int lda, float *b, int ldb) -{ - strsm_(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, &ldb); -} - -void -dtrsm(char side, char uplo, char transa, char diag, int m, int n, double alpha, double *a, int lda, double *b, int ldb) -{ - dtrsm_(&side, &uplo, &transa, &diag, &m, &n, &alpha, a, &lda, b, &ldb); -} - -void -ctrsm(char side, char uplo, char transa, char diag, int m, int n, complex *alpha, complex *a, int lda, complex *b, int ldb) -{ - ctrsm_(&side, &uplo, &transa, &diag, &m, &n, alpha, a, &lda, b, &ldb); -} - -void -ztrsm(char side, char uplo, char transa, char diag, int m, int n, doublecomplex *alpha, doublecomplex *a, int lda, doublecomplex *b, int ldb) -{ - ztrsm_(&side, &uplo, &transa, &diag, &m, &n, alpha, a, &lda, b, &ldb); -} - -void -ssyr2k(char uplo, char transa, int n, int k, float alpha, float *a, int lda, float *b, int ldb, float beta, float *c, int ldc) -{ - ssyr2k_(&uplo, &transa, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); -} - -void -dsyr2k(char uplo, char transa, int n, int k, double alpha, double *a, int lda, double *b, int ldb, double beta, double *c, int ldc) -{ - dsyr2k_(&uplo, &transa, &n, &k, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); -} - -void -csyr2k(char uplo, char transa, int n, int k, complex *alpha, complex *a, int lda, complex *b, int ldb, complex *beta, complex *c, int ldc) -{ - csyr2k_(&uplo, &transa, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc); -} - -void -zsyr2k(char uplo, char transa, int n, int k, doublecomplex *alpha, doublecomplex *a, int lda, doublecomplex *b, int ldb, doublecomplex *beta, doublecomplex *c, int ldc) -{ - zsyr2k_(&uplo, &transa, &n, &k, alpha, a, &lda, b, &ldb, beta, c, &ldc); -} - -void -ssyrk(char uplo, char transa, int n, int k, float alpha, float *a, int lda, float beta, float *c, int ldc) -{ - ssyrk_(&uplo, &transa, &n, &k, &alpha, a, &lda, &beta, c, &ldc); -} - -void -dsyrk(char uplo, char transa, int n, int k, double alpha, double *a, int lda, double beta, double *c, int ldc) -{ - dsyrk_(&uplo, &transa, &n, &k, &alpha, a, &lda, &beta, c, &ldc); -} - -void -csyrk(char uplo, char transa, int n, int k, complex *alpha, complex *a, int lda, complex *beta, complex *c, int ldc) -{ - csyrk_(&uplo, &transa, &n, &k, alpha, a, &lda, beta, c, &ldc); -} - -void -zsyrk(char uplo, char transa, int n, int k, doublecomplex *alpha, doublecomplex *a, int lda, doublecomplex *beta, doublecomplex *c, int ldc) -{ - zsyrk_(&uplo, &transa, &n, &k, alpha, a, &lda, beta, c, &ldc); -} - -void -strmv(char uplo, char transa, char diag, int n, float *a, int lda, float *x, int incx) -{ - strmv_( &uplo, &transa, &diag, &n, a, &lda, x, &incx); -} - -void -dtrmv(char uplo, char transa, char diag, int n, double *a, int lda, double *x, int incx) -{ - dtrmv_( &uplo, &transa, &diag, &n, a, &lda, x, &incx); -} - -void -ctrmv(char uplo, char transa, char diag, int n, complex *a, int lda, complex *x, int incx) -{ - ctrmv_( &uplo, &transa, &diag, &n, a, &lda, x, &incx); -} - -void -ztrmv(char uplo, char transa, char diag, int n, doublecomplex *a, int lda, doublecomplex *x, int incx) -{ - ztrmv_( &uplo, &transa, &diag, &n, a, &lda, x, &incx); -} - -void -strsv(char uplo, char transa, char diag, int n, float *a, int lda, float *x, int incx) -{ - strsv_( &uplo, &transa, &diag, &n, a, &lda, x, &incx); -} - -void -dtrsv(char uplo, char transa, char diag, int n, double *a, int lda, double *x, int incx) -{ - dtrsv_( &uplo, &transa, &diag, &n, a, &lda, x, &incx); -} - -void -ctrsv(char uplo, char transa, char diag, int n, complex *a, int lda, complex *x, int incx) -{ - ctrsv_( &uplo, &transa, &diag, &n, a, &lda, x, &incx); -} - -void -ztrsv(char uplo, char transa, char diag, int n, doublecomplex *a, int lda, doublecomplex *x, int incx) -{ - ztrsv_( &uplo, &transa, &diag, &n, a, &lda, x, &incx); -} - -void -ssymm(char side, char uplo, int m, int n, float alpha, float *a, int lda, float *b, int ldb, float beta, float *c, int ldc) -{ - ssymm_( &side, &uplo, &m, &n, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); -} - -void -dsymm(char side, char uplo, int m, int n, double alpha, double *a, int lda, double *b, int ldb, double beta, double *c, int ldc) -{ - dsymm_( &side, &uplo, &m, &n, &alpha, a, &lda, b, &ldb, &beta, c, &ldc); -} - -void -csymm(char side, char uplo, int m, int n, complex *alpha, complex *a, int lda, complex *b, int ldb, complex *beta, complex *c, int ldc) -{ - csymm_( &side, &uplo, &m, &n, alpha, a, &lda, b, &ldb, beta, c, &ldc); -} - -void -zsymm(char side, char uplo, int m, int n, doublecomplex *alpha, doublecomplex *a, int lda, doublecomplex *b, int ldb, doublecomplex *beta, doublecomplex *c, int ldc) -{ - zsymm_( &side, &uplo, &m, &n, alpha, a, &lda, b, &ldb, beta, c, &ldc); -} - -void -sger(int m, int n, float alpha, float *x, int incx, float *y, int incy, float *a, int lda) -{ - sger_( &m, &n, &alpha, x, &incx, y, &incy, a, &lda); -} - -void -dger(int m, int n, double alpha, double *x, int incx, double *y, int incy, double *a, int lda) -{ - dger_( &m, &n, &alpha, x, &incx, y, &incy, a, &lda); -} - -void -cgeru(int m, int n, complex *alpha, complex *x, int incx, complex *y, int incy, complex *a, int lda) -{ - cgeru_( &m, &n, alpha, x, &incx, y, &incy, a, &lda); -} - -void -zgeru(int m, int n, doublecomplex *alpha, doublecomplex *x, int incx, doublecomplex *y, int incy, doublecomplex *a, int lda) -{ - zgeru_( &m, &n, alpha, x, &incx, y, &incy, a, &lda); -} - -void -cgerc(int m, int n, complex *alpha, complex *x, int incx, complex *y, int incy, complex *a, int lda) -{ - cgerc_( &m, &n, alpha, x, &incx, y, &incy, a, &lda); -} - -void -zgerc(int m, int n, doublecomplex *alpha, doublecomplex *x, int incx, doublecomplex *y, int incy, doublecomplex *a, int lda) -{ - zgerc_( &m, &n, alpha, x, &incx, y, &incy, a, &lda); -} - -void -ssyr(char uplo, int n, float alpha, float *x, int incx, float *a, int lda) -{ - ssyr_( &uplo, &n, &alpha, x, &incx, a, &lda); -} - -void -dsyr(char uplo, int n, double alpha, double *x, int incx, double *a, int lda) -{ - dsyr_( &uplo, &n, &alpha, x, &incx, a, &lda); -} - -void -ssyr2(char uplo, int n, float alpha, float *x, int incx, float *y, int incy, float *a, int lda) -{ - ssyr2_( &uplo, &n, &alpha, x, &incx, y, &incy, a, &lda); -} - -void -dsyr2(char uplo, int n, double alpha, double *x, int incx, double *y, int incy, double *a, int lda) -{ - dsyr2_( &uplo, &n, &alpha, x, &incx, y, &incy, a, &lda); -} - -void -cher(char uplo, int n, float alpha, complex *x, int incx, complex *a, int lda) -{ - cher_( &uplo, &n, &alpha, x, &incx, a, &lda); -} - -void -zher(char uplo, int n, double alpha, doublecomplex *x, int incx, doublecomplex *a, int lda) -{ - zher_( &uplo, &n, &alpha, x, &incx, a, &lda); -} - -void -cher2(char uplo, int n, complex *alpha, complex *x, int incx, complex *y, int incy, complex *a, int lda) -{ - cher2_( &uplo, &n, alpha, x, &incx, y, &incy, a, &lda); -} - -void -zher2(char uplo, int n, doublecomplex *alpha, doublecomplex *x, int incx, doublecomplex *y, int incy, doublecomplex *a, int lda) -{ - zher2_( &uplo, &n, alpha, x, &incx, y, &incy, a, &lda); -} - -void -chemv(char uplo, int n, complex *alpha, complex *a, int lda, complex *x, int incx, complex *beta, complex *y, int incy) -{ - chemv_( &uplo, &n, alpha, a, &lda, x, &incx, beta, y, &incy ); -} - -void -zhemv(char uplo, int n, doublecomplex *alpha, doublecomplex *a, int lda, doublecomplex *x, int incx, doublecomplex *beta, doublecomplex *y, int incy) -{ - zhemv_( &uplo, &n, alpha, a, &lda, x, &incx, beta, y, &incy ); -} - -void -stpmv(char uplo, char transa, char diag, int n, float *ap, float *x, int incx) -{ - stpmv_( &uplo, &transa, &diag, &n, ap, x, &incx); -} - -void -dtpmv(char uplo, char transa, char diag, int n, double *ap, double *x, int incx) -{ - dtpmv_( &uplo, &transa, &diag, &n, ap, x, &incx); -} - -void -ctpmv(char uplo, char transa, char diag, int n, complex *ap, complex *x, int incx) -{ - ctpmv_( &uplo, &transa, &diag, &n, ap, x, &incx); -} - -void -ztpmv(char uplo, char transa, char diag, int n, doublecomplex *ap, doublecomplex *x, int incx) -{ - ztpmv_( &uplo, &transa, &diag, &n, ap, x, &incx); -} - -void -stpsv(char uplo, char transa, char diag, int n, float *ap, float *x, int incx) -{ - stpsv_( &uplo, &transa, &diag, &n, ap, x, &incx); -} - -void -dtpsv(char uplo, char transa, char diag, int n, double *ap, double *x, int incx) -{ - dtpsv_( &uplo, &transa, &diag, &n, ap, x, &incx); -} - -void -ctpsv(char uplo, char transa, char diag, int n, complex *ap, complex *x, int incx) -{ - ctpsv_( &uplo, &transa, &diag, &n, ap, x, &incx); -} - -void -ztpsv(char uplo, char transa, char diag, int n, doublecomplex *ap, doublecomplex *x, int incx) -{ - ztpsv_( &uplo, &transa, &diag, &n, ap, x, &incx); -} - -void -sspr(char uplo, int n, float alpha, float *x, int incx, float *ap ) -{ - sspr_( &uplo, &n, &alpha, x, &incx, ap ); -} - -void -dspr(char uplo, int n, double alpha, double *x, int incx, double *ap ) -{ - dspr_( &uplo, &n, &alpha, x, &incx, ap ); -} - -void -sspmv(char uplo, int n, float alpha, float *ap, float *x, int incx, float beta, float *y, int incy) -{ - sspmv_( &uplo, &n, &alpha, ap, x, &incx, &beta, y, &incy ); -} - -void -dspmv(char uplo, int n, double alpha, double *ap, double *x, int incx, double beta, double *y, int incy) -{ - dspmv_( &uplo, &n, &alpha, ap, x, &incx, &beta, y, &incy ); -} - -void -chpmv(char uplo, int n, complex *alpha, complex *ap, complex *x, int incx, complex *beta, complex *y, int incy) -{ - chpmv_( &uplo, &n, alpha, ap, x, &incx, beta, y, &incy ); -} - -void -zhpmv(char uplo, int n, doublecomplex *alpha, doublecomplex *ap, doublecomplex *x, int incx, doublecomplex *beta, doublecomplex *y, int incy) -{ - zhpmv_( &uplo, &n, alpha, ap, x, &incx, beta, y, &incy ); -} - -void -chpr(char uplo, int n, float alpha, complex *x, int incx, complex *ap ) -{ - chpr_( &uplo, &n, &alpha, x, &incx, ap ); -} - -void -zhpr(char uplo, int n, double alpha, doublecomplex *x, int incx, doublecomplex *ap ) -{ - zhpr_( &uplo, &n, &alpha, x, &incx, ap ); -} - -void -sspr2(char uplo, int n, float alpha, float *x, int incx, float *y, int incy, float *a ) -{ - sspr2_( &uplo, &n, &alpha, x, &incx, y, &incy, a ); -} -void -dspr2(char uplo, int n, double alpha, double *x, int incx, double *y, int incy, double *a ) -{ - dspr2_( &uplo, &n, &alpha, x, &incx, y, &incy, a ); -} -void -chpr2(char uplo, int n, complex *alpha, complex *x, int incx, complex *y, int incy, complex *a ) -{ - chpr2_( &uplo, &n, alpha, x, &incx, y, &incy, a ); -} -void -zhpr2(char uplo, int n, doublecomplex *alpha, doublecomplex *x, int incx, doublecomplex *y, int incy, doublecomplex *a ) -{ - zhpr2_( &uplo, &n, alpha, x, &incx, y, &incy, a ); -} - -void -sgbmv(char trans, int m, int n, int kl, int ku, float alpha, float *a, int inca, float *x, int incx, float beta, float *y, int incy ) -{ - sgbmv_( &trans, &m, &n, &kl, &ku, &alpha, a, &inca, x, &incx, &beta, y, &incy ); -} -void -dgbmv(char trans, int m, int n, int kl, int ku, double alpha, double *a, int inca, double *x, int incx, double beta, double *y, int incy ) -{ - dgbmv_( &trans, &m, &n, &kl, &ku, &alpha, a, &inca, x, &incx, &beta, y, &incy ); -} -void -cgbmv(char trans, int m, int n, int kl, int ku, complex *alpha, complex *a, int inca, complex *x, int incx, complex *beta, complex *y, int incy ) -{ - cgbmv_( &trans, &m, &n, &kl, &ku, alpha, a, &inca, x, &incx, beta, y, &incy ); -} -void -zgbmv(char trans, int m, int n, int kl, int ku, doublecomplex *alpha, doublecomplex *a, int inca, doublecomplex *x, int incx, doublecomplex *beta, doublecomplex *y, int incy ) -{ - zgbmv_( &trans, &m, &n, &kl, &ku, alpha, a, &inca, x, &incx, beta, y, &incy ); -} - -void -stbmv(char uplo, char trans, char diag, int n, int k, float *a, int lda, float *x, int incx ) -{ - stbmv_( &uplo, &trans, &diag, &n, &k, a, &lda, x, &incx ); -} - -void -dtbmv(char uplo, char trans, char diag, int n, int k, double *a, int lda, double *x, int incx ) -{ - dtbmv_( &uplo, &trans, &diag, &n, &k, a, &lda, x, &incx ); -} - -void -ctbmv(char uplo, char trans, char diag, int n, int k, complex *a, int lda, complex *x, int incx ) -{ - ctbmv_( &uplo, &trans, &diag, &n, &k, a, &lda, x, &incx ); -} - -void -ztbmv(char uplo, char trans, char diag, int n, int k, doublecomplex *a, int lda, doublecomplex *x, int incx ) -{ - ztbmv_( &uplo, &trans, &diag, &n, &k, a, &lda, x, &incx ); -} - -void -ssbmv(char uplo, int n, int k, float alpha, float *a, int lda, float *x, int incx, float beta, float *y, int incy ) -{ - ssbmv_( &uplo, &n, &k, &alpha, a, &lda, x, &incx, &beta, y, &incy ); -} - -void -dsbmv(char uplo, int n, int k, double alpha, double *a, int lda, double *x, int incx, double beta, double *y, int incy ) -{ - dsbmv_( &uplo, &n, &k, &alpha, a, &lda, x, &incx, &beta, y, &incy ); -} - -void -chbmv(char uplo, int n, int k, complex *alpha, complex *a, int lda, complex *x, int incx, complex *beta, complex *y, int incy ) -{ - chbmv_( &uplo, &n, &k, alpha, a, &lda, x, &incx, beta, y, &incy ); -} - -void -zhbmv(char uplo, int n, int k, doublecomplex *alpha, doublecomplex *a, int lda, doublecomplex *x, int incx, doublecomplex *beta, doublecomplex *y, int incy ) -{ - zhbmv_( &uplo, &n, &k, alpha, a, &lda, x, &incx, beta, y, &incy ); -} - -void -stbsv(char uplo, char trans, char diag, int n, int k, float *a, int lda, float *x, int incx ) -{ - stbsv_( &uplo, &trans, &diag, &n, &k, a, &lda, x, &incx ); -} - -void -dtbsv(char uplo, char trans, char diag, int n, int k, double *a, int lda, double *x, int incx ) -{ - dtbsv_( &uplo, &trans, &diag, &n, &k, a, &lda, x, &incx ); -} - -void -ctbsv(char uplo, char trans, char diag, int n, int k, complex *a, int lda, complex *x, int incx ) -{ - ctbsv_( &uplo, &trans, &diag, &n, &k, a, &lda, x, &incx ); -} - -void -ztbsv(char uplo, char trans, char diag, int n, int k, doublecomplex *a, int lda, doublecomplex *x, int incx ) -{ - ztbsv_( &uplo, &trans, &diag, &n, &k, a, &lda, x, &incx ); -} - -void -chemm(char side, char uplo, int m, int n, complex *alpha, complex *a, int lda, complex *b, int ldb, complex *beta, complex *c, int ldc) -{ - chemm_( &side, &uplo, &m, &n, alpha, a, &lda, b, &ldb, beta, c, &ldc); -} - -void -zhemm(char side, char uplo, int m, int n, doublecomplex *alpha, doublecomplex *a, int lda, doublecomplex *b, int ldb, doublecomplex *beta, doublecomplex *c, int ldc) -{ - zhemm_( &side, &uplo, &m, &n, alpha, a, &lda, b, &ldb, beta, c, &ldc); -} - -void -cherk(char uplo, char transa, int n, int k, float alpha, complex *a, int lda, float beta, complex *c, int ldc) -{ - cherk_( &uplo, &transa, &n, &k, &alpha, a, &lda, &beta, c, &ldc); -} - -void -zherk(char uplo, char transa, int n, int k, double alpha, doublecomplex *a, int lda, double beta, doublecomplex *c, int ldc) -{ - zherk_( &uplo, &transa, &n, &k, &alpha, a, &lda, &beta, c, &ldc); -} - -void -cher2k(char uplo, char transa, int n, int k, complex *alpha, complex *a, int lda, complex *b, int ldb, float beta, complex *c, int ldc) -{ - cher2k_( &uplo, &transa, &n, &k, alpha, a, &lda, b, &ldb, &beta, c, &ldc); -} - -void -zher2k(char uplo, char transa, int n, int k, doublecomplex *alpha, doublecomplex *a, int lda, doublecomplex *b, int ldb, double beta, doublecomplex *c, int ldc) -{ - zher2k_( &uplo, &transa, &n, &k, alpha, a, &lda, b, &ldb, &beta, c, &ldc); -} - -void sscal( int n, float alpha, float *x, int incx) -{ - sscal_(&n, &alpha, x, &incx); -} - -void dscal( int n, double alpha, double *x, int incx) -{ - dscal_(&n, &alpha, x, &incx); -} - -void cscal( int n, complex* alpha, complex *x, int incx) -{ - cscal_(&n, alpha, x, &incx); -} - -void zscal( int n, doublecomplex* alpha, doublecomplex *x, int incx) -{ - zscal_(&n, alpha, x, &incx); -} - -void csscal( int n, float alpha, complex *x, int incx) -{ - csscal_(&n, &alpha, x, &incx); -} - -void zdscal( int n, double alpha, doublecomplex *x, int incx) -{ - zdscal_(&n, &alpha, x, &incx); -} - -float sdot( int n, float *x, int incx, float *y, int incy) -{ -#ifdef __APPLE__ - return cblas_sdot(n, x, incx, y, incy); -#else - return sdot_(&n, x, &incx, y, &incy); -#endif -} - -double ddot( int n, double *x, int incx, double *y, int incy) -{ -#ifdef __APPLE__ - return cblas_ddot(n, x, incx, y, incy); -#else - return ddot_(&n, x, &incx, y, &incy); -#endif -} - -complex cdotu( int n, complex *x, int incx, complex *y, int incy) -{ - complex ans; - -#if defined( _WIN32 ) || defined( _WIN64 ) - ans = cdotu_(&n, x, &incx, y, &incy); - #elif defined( __APPLE__) - cblas_cdotu_sub(n, x, incx, y, incy, &ans); - #else - cdotusub_(&n, x, &incx, y, &incy, &ans); - #endif - - return ans; -} - -doublecomplex zdotu( int n, doublecomplex *x, int incx, doublecomplex *y, int incy) -{ - doublecomplex ans; - - #if defined( _WIN32 ) || defined( _WIN64 ) - ans = zdotu_(&n, x, &incx, y, &incy); - #elif defined(__APPLE__) - cblas_zdotu_sub(n, x, incx, y, incy, &ans); - #else - zdotusub_(&n, x, &incx, y, &incy, &ans); - #endif - - return ans; -} - -complex cdotc( int n, complex *x, int incx, complex *y, int incy) -{ - complex ans; - - #if defined( _WIN32 ) || defined( _WIN64 ) - ans = cdotc_(&n, x, &incx, y, &incy); - #elif defined(__APPLE__) - cblas_cdotc_sub(n, x, incx, y, incy, &ans); - #else - cdotcsub_(&n, x, &incx, y, &incy, &ans); - #endif - - return ans; -} - -doublecomplex zdotc( int n, doublecomplex *x, int incx, doublecomplex *y, int incy) -{ - doublecomplex ans; - - #if defined( _WIN32 ) || defined( _WIN64 ) - ans = zdotc_(&n, x, &incx, y, &incy); - #elif defined(__APPLE__) - cblas_zdotc_sub(n, x, incx, y, incy, &ans); - #else - zdotcsub_(&n, x, &incx, y, &incy, &ans); - #endif - - return ans; -} - -void scopy( int n, float *x, int incx, float *y, int incy) -{ - scopy_(&n, x, &incx, y, &incy); -} - -void dcopy( int n, double *x, int incx, double *y, int incy) -{ - dcopy_(&n, x, &incx, y, &incy); -} - -void ccopy( int n, complex *x, int incx, complex *y, int incy) -{ - ccopy_(&n, x, &incx, y, &incy); -} - -void zcopy( int n, doublecomplex *x, int incx, doublecomplex *y, int incy) -{ - zcopy_(&n, x, &incx, y, &incy); -} - -void sswap( int n, float *x, int incx, float *y, int incy) -{ - sswap_(&n, x, &incx, y, &incy); -} - -void dswap( int n, double *x, int incx, double *y, int incy) -{ - dswap_(&n, x, &incx, y, &incy); -} - -void cswap( int n, complex *x, int incx, complex *y, int incy) -{ - cswap_(&n, x, &incx, y, &incy); -} - -void zswap( int n, doublecomplex *x, int incx, doublecomplex *y, int incy) -{ - zswap_(&n, x, &incx, y, &incy); -} - -void saxpy( int n, float alpha, float *x, int incx, float *y, int incy) -{ - saxpy_(&n, &alpha, x, &incx, y, &incy); -} - -void daxpy( int n, double alpha, double *x, int incx, double *y, int incy) -{ - daxpy_(&n, &alpha, x, &incx, y, &incy); -} - -void caxpy( int n, complex *alpha, complex *x, int incx, complex *y, int incy) -{ - caxpy_(&n, alpha, x, &incx, y, &incy); -} - -void zaxpy( int n, doublecomplex *alpha, doublecomplex *x, int incx, doublecomplex *y, int incy) -{ - zaxpy_(&n, alpha, x, &incx, y, &incy); -} - -void srotg(float *A, float *B, float *C, float *S) -{ - srotg_(A, B, C, S); -} - -void drotg(double *A, double *B, double *C, double *S) -{ - drotg_(A, B, C, S); -} - -void crotg(complex *A, complex *B, float *C, complex *S) -{ - crotg_(A, B, C, S); -} - -void zrotg(doublecomplex *A, doublecomplex *B, double *C, doublecomplex *S) -{ - zrotg_(A, B, C, S); -} - -void srotmg(float *D1, float *D2, float *X1, const float *Y1, float *PARAM) -{ - srotmg_(D1, D2, X1, (float*)Y1, PARAM); -} - -void drotmg(double *D1, double *D2, double *X1, const double *Y1, double *PARAM) -{ - drotmg_(D1, D2, X1, (double*)Y1, PARAM); -} - -void srot(int N, float *x, int incx, float *y, int incy, float c, float s) -{ - srot_(&N, x, &incx, y, &incy, &c, &s); -} - -void drot(int N, double *x, int incx, double *y, int incy, double c, double s) -{ - drot_(&N, x, &incx, y, &incy, &c, &s); -} - -void csrot(int N, complex *x, int incx, complex *y, int incy, float c, float s) -{ - csrot_(&N, x, &incx, y, &incy, &c, &s); -} - -void zdrot(int N, doublecomplex *cx, int incx, doublecomplex *cy, int incy, double c, double s) -{ - zdrot_(&N, cx, &incx, cy, &incy, &c, &s); -} - -void srotm(int N, float *X, int incx, float *Y, int incy, float* PARAM) -{ - srotm_(&N, X, &incx, Y, &incy, PARAM); -} - -void drotm(int N, double *X, int incx, double *Y, int incy, double* PARAM) -{ - drotm_(&N, X, &incx, Y, &incy, PARAM); -} - -int isamax( int n, float *x, int incx) -{ - return isamax_(&n, x, &incx); -} - -int idamax( int n, double *x, int incx) -{ - return idamax_(&n, x, &incx); -} - -int icamax( int n, complex *x, int incx) -{ - return icamax_(&n, x, &incx); -} - -int izamax( int n, doublecomplex *x, int incx) -{ - return izamax_(&n, x, &incx); -} - -float snrm2( int n, float *x, int incx) -{ -#ifdef __APPLE__ - //On OSX passing negative values for incx can lead to a - //a crash, so we catch it here (cf. Github issue #37). - if (n < 1 || incx < 1) { - return 0; - } - return cblas_snrm2(n, x, incx); -#else - return snrm2_(&n, x, &incx); -#endif -} - -double dnrm2( int n, double *x, int incx) -{ -#ifdef __APPLE__ - //On OSX passing negative values for incx can lead to a - //a crash, so we catch it here (cf. Github issue #37). - if (n < 1 || incx < 1) { - return 0; - } - return cblas_dnrm2(n, x, incx); -#else - return dnrm2_(&n, x, &incx); -#endif -} - -float scnrm2( int n, complex *x, int incx) -{ -#ifdef __APPLE__ - //On OSX passing negative values for incx can lead to a - //a crash, so we catch it here (cf. Github issue #37). - if (n < 1 || incx < 1) { - return 0; - } - return cblas_scnrm2(n, x, incx); -#else - return scnrm2_(&n, x, &incx); -#endif -} - -double dznrm2( int n, doublecomplex *x, int incx) -{ -#ifdef __APPLE__ - //On OSX passing negative values for incx can lead to a - //a crash, so we catch it here (cf. Github issue #37). - if (n < 1 || incx < 1) { - return 0; - } - return cblas_dznrm2(n, x, incx); -#else - return dznrm2_(&n, x, &incx); -#endif -} - -float sasum( int n, float *x, int incx) -{ -#ifdef __APPLE__ - return cblas_sasum(n, x, incx); -#else - return sasum_(&n, x, &incx); -#endif -} - -double dasum( int n, double *x, int incx) -{ -#ifdef __APPLE__ - return cblas_dasum(n, x, incx); -#else - return dasum_(&n, x, &incx); -#endif -} - -float scasum( int n, complex *x, int incx) -{ -#ifdef __APPLE__ - return cblas_scasum(n, x, incx); -#else - return scasum_(&n, x, &incx); -#endif -} - -double dzasum( int n, doublecomplex *x, int incx) -{ -#ifdef __APPLE__ - return cblas_dzasum(n, x, incx); -#else - return dzasum_(&n, x, &incx); -#endif -} - -#endif diff --git a/external/clBLAS/src/tests/correctness/blas-lapack.h b/external/clBLAS/src/tests/correctness/blas-lapack.h deleted file mode 100644 index d2db1aa3..00000000 --- a/external/clBLAS/src/tests/correctness/blas-lapack.h +++ /dev/null @@ -1,1225 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#ifndef BLAS_LAPACK_H_ -#define BLAS_LAPACK_H_ - -#ifdef __cplusplus -extern "C" { -#endif - -/* BLAS-2 functions */ - -void sgemv_( - const char *transA, - const int *M, - const int *N, - const float *alpha, - const float *A, - const int *lda, - const float *X, - const int *incx, - const float *beta, - float *Y, - const int *incy); - -void dgemv_( - const char *transA, - const int *M, - const int *N, - const double *alpha, - const double *A, - const int *lda, - const double *X, - const int *incx, - const double *beta, - double *Y, - const int *incy); - -void cgemv_( - const char *transA, - const int *M, - const int *N, - const complex *alpha, - const complex *A, - const int *lda, - const complex *X, - const int *incx, - const complex *beta, - complex *Y, - const int *incy); - -void zgemv_( - const char *transA, - const int *M, - const int *N, - const doublecomplex *alpha, - const doublecomplex *A, - const int *lda, - const doublecomplex *X, - const int *incx, - const doublecomplex *beta, - doublecomplex *Y, - const int *incy); - -void ssymv_( - const char *uplo, - const int *N, - const float *alpha, - const float *A, - const int *lda, - const float *X, - const int *incx, - const float *beta, - float *Y, - int *incy); - -void dsymv_( - const char *uplo, - const int *N, - const double *alpha, - const double *A, - const int *lda, - const double *X, - const int *incx, - const double *beta, - double *Y, - const int *incy); - -/* BLAS-3 functions */ - -void sgemm_( - const char *transA, - const char *transB, - const int *M, - const int *N, - const int *K, - const float *alpha, - const float *A, - const int *lda, - const float *B, - const int *ldb, - const float *beta, - float *C, - const int *ldc); - -void dgemm_( - const char *transA, - const char *transB, - const int *M, - const int *N, - const int *K, - const double *alpha, - const double *A, - const int *lda, - const double *B, - const int *ldb, - const double *beta, - double *C, - const int *ldc); - -void cgemm_( - const char *transA, - const char *transB, - const int *M, - const int *N, - const int *K, - const complex *alpha, - const complex *A, - const int *lda, - const complex *B, - const int *ldb, - const complex *beta, - complex *C, - const int *ldc); - -void zgemm_( - const char *transA, - const char *transB, - const int *M, - const int *N, - const int *K, - const doublecomplex *alpha, - const doublecomplex *A, - const int *lda, - const doublecomplex *B, - const int *ldb, - const doublecomplex *beta, - doublecomplex *C, - const int *ldc); - -void strmm_( - const char *side, - const char *uplo, - const char *transA, - const char *diag, - const int *M, - const int *N, - const float *alpha, - const float *A, - const int *lda, - float *B, - const int *ldb); - -void dtrmm_( - const char *side, - const char *uplo, - const char *transA, - const char *diag, - const int *M, - const int *N, - const double *alpha, - const double *A, - const int *lda, - double *B, - const int *ldb); - -void ctrmm_( - const char *side, - const char *uplo, - const char *transA, - const char *diag, - const int *M, - const int *N, - const complex *alpha, - const complex *A, - const int *lda, - complex *B, - const int *ldb); - -void ztrmm_( - const char *side, - const char *uplo, - const char *transA, - const char *diag, - const int *M, - const int *N, - const doublecomplex *alpha, - const doublecomplex *A, - const int *lda, - doublecomplex *B, - const int *ldb); - -void strsm_( - const char *side, - const char *uplo, - const char *transA, - const char *diag, - const int *M, - const int *N, - const float *aplha, - const float *A, - const int *lda, - float *B, - const int *ldb); - -void dtrsm_( - const char *side, - const char *uplo, - const char *transA, - const char *diag, - const int *M, - const int *N, - const double *alpha, - const double *A, - const int *lda, - double *B, - const int *ldb); - -void ctrsm_( - const char *side, - const char *uplo, - const char *transA, - const char *diag, - const int *M, - const int *N, - const complex *alpha, - const complex *A, - const int *lda, - complex *B, - const int *ldb); - -void ztrsm_( - const char *side, - const char *uplo, - const char *transA, - const char *diag, - const int *M, - const int *N, - const doublecomplex *alpha, - const doublecomplex *A, - const int *lda, - doublecomplex *B, - const int *ldb); - -void ssyr2k_( - const char *uplo, - const char *transA, - const int *N, - const int *K, - const float *alpha, - const float *A, - const int *lda, - const float *B, - const int *ldb, - const float *beta, - float *C, - const int *ldc); - -void dsyr2k_( - const char *uplo, - const char *transA, - const int *N, - const int *K, - const double *alpha, - const double *A, - const int *lda, - const double *B, - const int *ldb, - const double *beta, - double *C, - const int *ldc); - -void csyr2k_( - const char *uplo, - const char *transA, - const int *N, - const int *K, - const complex *alpha, - const complex *A, - const int *lda, - const complex *B, - const int *ldb, - const complex *beta, - complex *C, - const int *ldc); - -void zsyr2k_( - const char *uplo, - const char *transA, - const int *N, - const int *K, - const doublecomplex *alpha, - const doublecomplex *A, - const int *lda, - const doublecomplex *B, - const int *ldb, - const doublecomplex *beta, - doublecomplex *C, - const int *ldc); - -void ssyrk_( - const char *uplo, - const char *transA, - const int *N, - const int *K, - const float *alpha, - const float *A, - const int *lda, - const float *beta, - float *C, - const int *ldc); - -void dsyrk_( - const char *uplo, - const char *transA, - const int *N, - const int *K, - const double *alpha, - const double *A, - const int *lda, - const double *beta, - double *C, - const int *ldc); - -void csyrk_( - const char *uplo, - const char *transA, - const int *N, - const int *K, - const complex *alpha, - const complex *A, - const int *lda, - const complex *beta, - complex *C, - const int *ldc); - -void zsyrk_( - const char *uplo, - const char *transA, - const int *N, - const int *K, - const doublecomplex *alpha, - const doublecomplex *A, - const int *lda, - const doublecomplex *beta, - doublecomplex *C, - const int *ldc); - -void strmv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const float *a, - const int *lda, - float *x, - const int *incx); - -void dtrmv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const double *a, - const int *lda, - double *x, - const int *incx); - -void ctrmv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const complex *a, - const int *lda, - complex *x, - const int *incx); - -void ztrmv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const doublecomplex *a, - const int *lda, - doublecomplex *x, - const int *incx); - -void strsv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const float *a, - const int *lda, - float *x, - const int *incx); - -void dtrsv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const double *a, - const int *lda, - double *x, - const int *incx); - -void ctrsv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const complex *a, - const int *lda, - complex *x, - const int *incx); - -void ztrsv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const doublecomplex *a, - const int *lda, - doublecomplex *x, - const int *incx); - -void ssymm_( - const char *side, - const char *uplo, - const int *m, - const int *n, - const float *alpha, - const float *a, - const int *lda, - const float *b, - const int *ldb, - const float *beta, - float *c, - const int *ldc); - -void dsymm_( - const char *side, - const char *uplo, - const int *m, - const int *n, - const double *alpha, - const double *a, - const int *lda, - const double *b, - const int *ldb, - const double *beta, - double *c, - const int *ldc); - -void csymm_( - const char *side, - const char *uplo, - const int *m, - const int *n, - const complex *alpha, - const complex *a, - const int *lda, - const complex *b, - const int *ldb, - const complex *beta, - complex *c, - const int *ldc); - -void zsymm_( - const char *side, - const char *uplo, - const int *m, - const int *n, - const doublecomplex *alpha, - const doublecomplex *a, - const int *lda, - const doublecomplex *b, - const int *ldb, - const doublecomplex *beta, - doublecomplex *c, - const int *ldc); - -void sger_( - const int *m, - const int *n, - const float *alpha, - const float *x, - const int *incx, - const float *y, - const int *incy, - float *a, - const int *lda); - -void dger_( - const int *m, - const int *n, - const double *alpha, - const double *x, - const int *incx, - const double *y, - const int *incy, - double *a, - const int *lda); - -void cgeru_( - const int *m, - const int *n, - const complex *alpha, - const complex *x, - const int *incx, - const complex *y, - const int *incy, - complex *a, - const int *lda); - -void zgeru_( - const int *m, - const int *n, - const doublecomplex *alpha, - const doublecomplex *x, - const int *incx, - const doublecomplex *y, - const int *incy, - doublecomplex *a, - const int *lda); - -void cgerc_( - const int *m, - const int *n, - const complex *alpha, - const complex *x, - const int *incx, - const complex *y, - const int *incy, - complex *a, - const int *lda); - -void zgerc_( - const int *m, - const int *n, - const doublecomplex *alpha, - const doublecomplex *x, - const int *incx, - const doublecomplex *y, - const int *incy, - doublecomplex *a, - const int *lda); - -void ssyr_( - const char *uplo, - const int *n, - const float *alpha, - const float *x, - const int *incx, - float *a, - const int *lda); - -void dsyr_( - const char *uplo, - const int *n, - const double *alpha, - const double *x, - const int *incx, - double *a, - const int *lda); - -void ssyr2_( - const char *uplo, - const int *n, - const float *alpha, - const float *x, - const int *incx, - const float *y, - const int *incy, - float *a, - const int *lda); - -void dsyr2_( - const char *uplo, - const int *n, - const double *alpha, - const double *x, - const int *incx, - const double *y, - const int *incy, - double *a, - const int *lda); - -void cher_( - const char *uplo, - const int *n, - const float *alpha, - const complex *x, - const int *incx, - complex *a, - const int *lda); - -void zher_( - const char *uplo, - const int *n, - const double *alpha, - const doublecomplex *x, - const int *incx, - doublecomplex *a, - const int *lda); - -void cher2_( - const char *uplo, - const int *n, - const complex *alpha, - const complex *x, - const int *incx, - const complex *y, - const int *incy, - complex *a, - const int *lda); - -void zher2_( - const char *uplo, - const int *n, - const doublecomplex *alpha, - const doublecomplex *x, - const int *incx, - const doublecomplex *y, - const int *incy, - doublecomplex *a, - const int *lda); - -void chemv_( - const char *uplo, - const int *n, - const complex *alpha, - const complex *a, - const int *lda, - const complex *x, - const int *incx, - const complex *beta, - complex *y, - const int *incy); - -void zhemv_( - const char *uplo, - const int *n, - const doublecomplex *alpha, - const doublecomplex *a, - const int *lda, - const doublecomplex *x, - const int *incx, - const doublecomplex *beta, - doublecomplex *y, - const int *incy); - -void stpmv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const float *ap, - float *x, - const int *incx); - -void dtpmv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const double *ap, - double *x, - const int *incx); - -void ctpmv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const complex *ap, - complex *x, - const int *incx); - -void ztpmv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const doublecomplex *ap, - doublecomplex *x, - const int *incx); - -void stpsv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const float *ap, - float *x, - const int *incx); - -void dtpsv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const double *ap, - double *x, - const int *incx); - -void ctpsv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const complex *ap, - complex *x, - const int *incx); - -void ztpsv_( - const char *uplo, - const char *transa, - const char *diag, - const int *n, - const doublecomplex *ap, - doublecomplex *x, - const int *incx); - -void sspr_( - const char *uplo, - const int *n, - const float *alpha, - const float *x, - const int *incx, - float *ap); - -void dspr_( - const char *uplo, - const int *n, - const double *alpha, - const double *x, - const int *incx, - double *ap); - -void -sspmv_( - const char *uplo, - const int *n, - const float *alpha, - const float *ap, - const float *x, - const int *incx, - const float *beta, - float *y, - const int *incy); - -void -dspmv_( - const char *uplo, - const int *n, - const double *alpha, - const double *ap, - const double *x, - const int *incx, - const double *beta, - double *y, - const int *incy); - -void -chpmv_( - const char *uplo, - const int *n, - const complex *alpha, - const complex *ap, - const complex *x, - const int *incx, - const complex *beta, - complex *y, - const int *incy); - -void -zhpmv_( - const char *uplo, - const int *n, - const doublecomplex *alpha, - const doublecomplex *ap, - const doublecomplex *x, - const int *incx, - const doublecomplex *beta, - doublecomplex *y, - const int *incy); - -void chpr_( - const char *uplo, - const int *n, - const float *alpha, - const complex *x, - const int *incx, - complex *ap); - -void zhpr_( - const char *uplo, - const int *n, - const double *alpha, - const doublecomplex *x, - const int *incx, - doublecomplex *ap); - -void sspr2_( - const char *uplo, - const int *n, - const float *alpha, - const float *x, - const int *incx, - const float *y, - const int *incy, - float *a ); - -void dspr2_( - const char *uplo, - const int *n, - const double *alpha, - const double *x, - const int *incx, - const double *y, - const int *incy, - double *a ); - -void chpr2_( - const char *uplo, - const int *n, - const complex *alpha, - const complex *x, - const int *incx, - const complex *y, - const int *incy, - complex *a ); - -void zhpr2_( - const char *uplo, - const int *n, - const doublecomplex *alpha, - const doublecomplex *x, - const int *incx, - const doublecomplex *y, - const int *incy, - doublecomplex *a ); - -void sgbmv_( - const char *trans, - const int *m, - const int *n, - const int *kl, - const int *ku, - const float *alpha, - const float *a, - const int *inca, - const float *x, - const int *incx, - const float *beta, - float *y, - const int *incy ); - -void dgbmv_( - const char *trans, - const int *m, - const int *n, - const int *kl, - const int *ku, - const double *alpha, - const double *a, - const int *inca, - const double *x, - const int *incx, - const double *beta, - double *y, - const int *incy ); - -void cgbmv_( - const char *trans, - const int *m, - const int *n, - const int *kl, - const int *ku, - const complex *alpha, - const complex *a, - const int *inca, - const complex *x, - const int *incx, - const complex *beta, - complex *y, - const int *incy ); - -void zgbmv_( - const char *trans, - const int *m, - const int *n, - const int *kl, - const int *ku, - const doublecomplex *alpha, - const doublecomplex *a, - const int *inca, - const doublecomplex *x, - const int *incx, - const doublecomplex *beta, - doublecomplex *y, - const int *incy ); - -void stbmv_( - const char *uplo, - const char *trans, - const char *diag, - const int *n, - const int *k, - const float *a, - const int *lda, - float *x, - const int *incx ); - -void dtbmv_( - const char *uplo, - const char *trans, - const char *diag, - const int *n, - const int *k, - const double *a, - const int *lda, - double *x, - const int *incx ); - -void ctbmv_( - const char *uplo, - const char *trans, - const char *diag, - const int *n, - const int *k, - const complex *a, - const int *lda, - complex *x, - const int *incx ); - -void ztbmv_( - const char *uplo, - const char *trans, - const char *diag, - const int *n, - const int *k, - const doublecomplex *a, - const int *lda, - doublecomplex *x, - const int *incx ); - -void ssbmv_( - const char *uplo, - const int *n, - const int *k, - const float *alpha, - const float *a, - const int *lda, - const float *x, - const int *incx, - const float *beta, - float *y, - const int *incy ); - -void dsbmv_( - const char *uplo, - const int *n, - const int *k, - const double *alpha, - const double *a, - const int *lda, - const double *x, - const int *incx, - const double *beta, - double *y, - const int *incy ); - -void chbmv_( - const char *uplo, - const int *n, - const int *k, - const complex *alpha, - const complex *a, - const int *lda, - const complex *x, - const int *incx, - const complex *beta, - complex *y, - const int *incy ); - -void zhbmv_( - const char *uplo, - const int *n, - const int *k, - const doublecomplex *alpha, - const doublecomplex *a, - const int *lda, - const doublecomplex *x, - const int *incx, - const doublecomplex *beta, - doublecomplex *y, - const int *incy ); - -void stbsv_( - const char *uplo, - const char *trans, - const char *diag, - const int *n, - const int *k, - const float *a, - const int *lda, - float *x, - const int *incx ); - -void dtbsv_( - const char *uplo, - const char *trans, - const char *diag, - const int *n, - const int *k, - const double *a, - const int *lda, - double *x, - const int *incx ); - -void ctbsv_( - const char *uplo, - const char *trans, - const char *diag, - const int *n, - const int *k, - const complex *a, - const int *lda, - complex *x, - const int *incx ); - -void ztbsv_( - const char *uplo, - const char *trans, - const char *diag, - const int *n, - const int *k, - const doublecomplex *a, - const int *lda, - doublecomplex *x, - const int *incx ); - -void chemm_( - const char *side, - const char *uplo, - const int *m, - const int *n, - const complex *alpha, - const complex *a, - const int *lda, - const complex *b, - const int *ldb, - const complex *beta, - complex *c, - const int *ldc); - -void zhemm_( - const char *side, - const char *uplo, - const int *m, - const int *n, - const doublecomplex *alpha, - const doublecomplex *a, - const int *lda, - const doublecomplex *b, - const int *ldb, - const doublecomplex *beta, - doublecomplex *c, - const int *ldc); - -void cherk_( - const char *uplo, - const char *transa, - const int *n, - const int *k, - const float *alpha, - const complex *a, - const int *lda, - const float *beta, - complex *c, - const int *ldc); - -void zherk_( - const char *uplo, - const char *transa, - const int *n, - const int *k, - const double *alpha, - const doublecomplex *a, - const int *lda, - const double *beta, - doublecomplex *c, - const int *ldc); - -void cher2k_( - const char *uplo, - const char *transa, - const int *n, - const int *k, - const complex *alpha, - const complex *a, - const int *lda, - const complex *b, - const int *ldb, - const float *beta, - complex *c, - const int *ldc); - -void zher2k_( - const char *uplo, - const char *transa, - const int *n, - const int *k, - const doublecomplex *alpha, - const doublecomplex *a, - const int *lda, - const doublecomplex *b, - const int *ldb, - const double *beta, - doublecomplex *c, - const int *ldc); - -void sscal_(int *n, float *alpha, float *x, int *incx); -void dscal_(int *n, double *alpha, double *x, int *incx); -void cscal_(int *n, complex *alpha, complex *x, int *incx); -void zscal_(int *n, doublecomplex *alpha, doublecomplex *x, int *incx); - -void csscal_(int *n, float *alpha, complex *x, int *incx); -void zdscal_(int *n, double *alpha, doublecomplex *x, int *incx); - -void scopy_(int *n, float *x, int *incx, float* y, int *incy); -void dcopy_(int *n, double *x, int *incx, double* y, int *incy); -void ccopy_(int *n, complex *x, int *incx, complex *y, int *incy); -void zcopy_(int *n, doublecomplex *x, int *incx, doublecomplex *y, int *incy); - -float sdot_(int *n, float *x, int *incx, float* y, int *incy); -double ddot_(int *n, double *x, int *incx, double* y, int *incy); - -#if defined( _WIN32 ) || defined( _WIN64 ) || defined( __APPLE__) - complex cdotu_(int *n, complex *x, int *incx, complex* y, int *incy); - doublecomplex zdotu_(int *n, doublecomplex *x, int *incx, doublecomplex* y, int *incy); - complex cdotc_(int *n, complex *x, int *incx, complex* y, int *incy); - doublecomplex zdotc_(int *n, doublecomplex *x, int *incx, doublecomplex* y, int *incy); -#else - void cdotusub_(int *n, complex *x, int *incx, complex* y, int *incy, complex *ans); - void zdotusub_(int *n, doublecomplex *x, int *incx, doublecomplex* y, int *incy, doublecomplex *ans); - void cdotcsub_(int *n, complex *x, int *incx, complex* y, int *incy, complex *ans); - void zdotcsub_(int *n, doublecomplex *x, int *incx, doublecomplex* y, int *incy, doublecomplex *ans); -#endif - -void sswap_(int *n, float *x, int *incx, float* y, int *incy); -void dswap_(int *n, double *x, int *incx, double* y, int *incy); -void cswap_(int *n, complex *x, int *incx, complex *y, int *incy); -void zswap_(int *n, doublecomplex *x, int *incx, doublecomplex *y, int *incy); - -void saxpy_(int *n, float *alpha, float *x, int *incx, float* y, int *incy); -void daxpy_(int *n, double *alpha, double *x, int *incx, double* y, int *incy); -void caxpy_(int *n, complex *alpha, complex *x, int *incx, complex *y, int *incy); -void zaxpy_(int *n, doublecomplex *alpha, doublecomplex *x, int *incx, doublecomplex *y, int *incy); - - -void srotg_(float *A, float *B, float *C, float *S); -void drotg_(double *A, double *B, double *C, double *S); -void crotg_(complex *A, complex *B, float *C, complex *S); -void zrotg_(doublecomplex *A, doublecomplex *B, double *C, doublecomplex *S); - -void srotmg_(float *D1, float *D2, float *X1, float *Y1, float *PARAM); -void drotmg_(double *D1, double *D2, double *X1, double *Y1, double *PARAM); - -void srot_(int *n, float *x, int *incx, float *y, int *incy, float *c, float *s); -void drot_(int *n, double *x, int *incx, double *y, int *incy, double *c, double *s); -void csrot_(int *n, complex *x, int *incx, complex *y, int *incy, float *c, float *s); -void zdrot_(int *n, doublecomplex *x, int *incx, doublecomplex *y, int *incy, double *c, double *s); - -void srotm_(int* N, float *X, int* incx, float *Y, int* incy, float* PARAM); -void drotm_(int* N, double *X, int* incx, double *Y, int* incy, double* PARAM); - -float sasum_(int *n, float *x, int *incx); -double dasum_(int *n, double *x, int *incx); -float scasum_(int *n, complex *x, int *incx); -double dzasum_(int *n, doublecomplex *x, int *incx); - -int isamax_(int *n, float *x, int *incx); -int idamax_(int *n, double *x, int *incx); -int icamax_(int *n, complex *x, int *incx); -int izamax_(int *n, doublecomplex *x, int *incx); - -float snrm2_(int *n, float *x, int *incx); -double dnrm2_(int *n, double *x, int *incx); -float scnrm2_(int *n, complex *x, int *incx); -double dznrm2_(int *n, doublecomplex *x, int *incx); - -#ifdef __cplusplus -} -#endif - -#endif /* BLAS_LAPACK_H */ diff --git a/external/clBLAS/src/tests/correctness/corr-asum.cpp b/external/clBLAS/src/tests/correctness/corr-asum.cpp deleted file mode 100644 index 81da8e06..00000000 --- a/external/clBLAS/src/tests/correctness/corr-asum.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <asum.h> - -static void -releaseMemObjects(cl_mem objX, cl_mem objAsum, cl_mem objScratch) -{ - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objAsum != NULL) - { - clReleaseMemObject(objAsum); - } - if(objScratch != NULL) - { - clReleaseMemObject(objScratch); - } -} - -template <typename T> static void -deleteBuffers(T *blasX, T *blasAsum=NULL, T *clblasAsum=NULL) -{ - if(blasX != NULL) - { - delete[] blasX; - } - if(clblasAsum != NULL) - { - delete[] clblasAsum; - } - if(blasAsum != NULL) - { - delete(blasAsum); - } -} - -template <typename T1, typename T2> -void -asumCorrectnessTest(TestParams *params) -{ - cl_int err; - T1 *blasX; - T2 *clblasAsum, *blasAsum; - cl_mem bufX, bufAsum, scratchBuff; - clMath::BlasBase *base; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T1) == typeid(cl_double) || - typeid(T1) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - - blasX = new T1[lengthX + params->offBX ]; - blasAsum = new T2[1]; - clblasAsum = new T2[1 + params->offa]; - - if((blasX == NULL) || (clblasAsum == NULL) || (blasAsum == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T1>(blasX); - deleteBuffers<T2>(blasAsum, clblasAsum); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - ::std::cerr << "Generating input data... "; - - randomVectors<T1>(params->N, (blasX + params->offBX), params->incx, (T1*)NULL, 0, true); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufX = base->createEnqueueBuffer(blasX, (lengthX + params->offBX)* sizeof(*blasX), 0, CL_MEM_READ_WRITE); - bufAsum = base->createEnqueueBuffer(NULL, (1 + params->offa) * sizeof(T2), 0, CL_MEM_READ_WRITE); - scratchBuff = base->createEnqueueBuffer(NULL, (lengthX * sizeof(T1)), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xASUM routine... "; - - *blasAsum = ::clMath::blas::asum( params->N, blasX, params->offBX, params->incx); - ::std::cerr << "Done" << ::std::endl; - - if ((bufX == NULL) || (bufAsum == NULL) || (scratchBuff == NULL)) { - releaseMemObjects(bufX, bufAsum, scratchBuff); - deleteBuffers<T1>(blasX); - deleteBuffers<T2>(blasAsum, clblasAsum); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xASUM routine... "; - - DataType type; - type = ( typeid(T1) == typeid(cl_float))? TYPE_FLOAT : ( typeid(T1) == typeid(cl_double))? TYPE_DOUBLE: ( typeid(T1) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT:TYPE_COMPLEX_DOUBLE; - - // Should use bufXTemp as well - err = (cl_int)::clMath::clblas::asum( type, params->N, bufAsum, params->offa, bufX, - params->offBX, params->incx, scratchBuff, params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufAsum, scratchBuff); - deleteBuffers<T1>(blasX ); - deleteBuffers<T2>(blasAsum, clblasAsum); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::ASUM() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufAsum, scratchBuff); - deleteBuffers<T1>(blasX ); - deleteBuffers<T2>(blasAsum, clblasAsum); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufAsum, CL_TRUE, 0, - (1 + params->offa) * sizeof(*clblasAsum), clblasAsum, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "ASUM: Reading results failed...." << std::endl; - } - releaseMemObjects(bufX, bufAsum, scratchBuff); - - compareMatrices<T2>(clblasColumnMajor, 1 , 1, (blasAsum), (clblasAsum+params->offa), 1); - deleteBuffers<T1>(blasX); - deleteBuffers<T2>(blasAsum, clblasAsum); - delete[] events; -} - -// Instantiate the test - -TEST_P(ASUM, sasum) { - TestParams params; - - getParams(¶ms); - asumCorrectnessTest<cl_float, cl_float>(¶ms); -} - -TEST_P(ASUM, dasum) { - TestParams params; - - getParams(¶ms); - asumCorrectnessTest<cl_double, cl_double>(¶ms); -} - -TEST_P(ASUM, scasum) { - TestParams params; - - getParams(¶ms); - asumCorrectnessTest<FloatComplex, cl_float>(¶ms); -} - -TEST_P(ASUM, dzasum) { - TestParams params; - - getParams(¶ms); - asumCorrectnessTest<DoubleComplex, cl_double>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-axpy.cpp b/external/clBLAS/src/tests/correctness/corr-axpy.cpp deleted file mode 100644 index c5816bc7..00000000 --- a/external/clBLAS/src/tests/correctness/corr-axpy.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <axpy.h> - -static void -releaseMemObjects(cl_mem objX, cl_mem objY) -{ - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objY != NULL) - { - clReleaseMemObject(objY); - } -} - -template <typename T> static void -deleteBuffers(T *X, T *Y, T *blasX, T *blasY) -{ - if(X != NULL) - { - delete[] X; - } - if(blasX != NULL) - { - delete[] blasX; - } - if(Y != NULL) - { - delete[] Y; - } - if(blasY != NULL) - { - delete[] blasY; - } -} - -template <typename T> -void -axpyCorrectnessTest(TestParams *params) -{ - cl_int err; - T *X, *Y; //For OpenCL implementation - T *blasX, *blasY;// For reference implementation - cl_mem bufX, bufY; - clMath::BlasBase *base; - cl_event *events; - T alpha; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - size_t lengthY = (1 + ((params->N -1) * abs(params->incy))); - - X = new T[lengthX + params->offBX ]; - Y = new T[lengthY + params->offCY ]; - blasX = new T[lengthX + params->offBX ]; - blasY = new T[lengthY + params->offCY ]; - - if((X == NULL) || (blasX == NULL) || (Y == NULL) || (blasY == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(X, Y, blasX, blasY); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - // Populate X and Y - randomVectors(params->N, (X+params->offBX), params->incx, (Y+params->offCY), params->incy); - - memcpy(blasX, X, (lengthX + params->offBX) * sizeof(T)); - memcpy(blasY, Y, (lengthY + params->offCY) * sizeof(T)); - alpha = convertMultiplier<T>(params->alpha); - - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX)* sizeof(T), 0, CL_MEM_READ_ONLY); - bufY = base->createEnqueueBuffer(Y, (lengthY + params->offCY)* sizeof(T), 0, CL_MEM_READ_WRITE); - - if ((bufX == NULL) || (bufY == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufX, bufY); - deleteBuffers<T>(X, Y, blasX, blasY); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling reference xAXPY routine... "; - - ::clMath::blas::axpy((size_t)params->N, alpha, blasX, (size_t)params->offBX, params->incx, - blasY, (size_t)params->offCY, params->incy); - ::std::cerr << "Done" << ::std::endl; - - - ::std::cerr << "Calling clblas xAXPY routine... "; - - err = (cl_int)::clMath::clblas::axpy(params->N, alpha, bufX, params->offBX, params->incx, bufY, params->offCY, params->incy, - params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufY); - deleteBuffers<T>(X, Y, blasX, blasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::AXPY() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufY); - deleteBuffers<T>(X, Y, blasX, blasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufY, CL_TRUE, 0, - (lengthY + params->offCY) * sizeof(T), Y, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "AXPY: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufX, bufY); - - compareMatrices<T>(clblasRowMajor, lengthY , 1, (blasY + params->offCY), (Y + params->offCY), 1); - deleteBuffers<T>(X, Y, blasX, blasY); - delete[] events; -} - -// Instantiate the test - -TEST_P(AXPY, saxpy) { - TestParams params; - - getParams(¶ms); - axpyCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(AXPY, daxpy) { - TestParams params; - - getParams(¶ms); - axpyCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(AXPY, caxpy) { - TestParams params; - - getParams(¶ms); - axpyCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(AXPY, zaxpy) { - TestParams params; - - getParams(¶ms); - axpyCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-copy.cpp b/external/clBLAS/src/tests/correctness/corr-copy.cpp deleted file mode 100644 index 2ee46c08..00000000 --- a/external/clBLAS/src/tests/correctness/corr-copy.cpp +++ /dev/null @@ -1,211 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <copy.h> - -static void -releaseMemObjects(cl_mem objX, cl_mem objY) -{ - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objY != NULL) - { - clReleaseMemObject(objY); - } -} - -template <typename T> static void -deleteBuffers(T *blasX, T *blasY, T *clblasY) -{ - if(blasX != NULL) - { - delete[] blasX; - } - if(blasY != NULL) - { - delete[] blasY; - } - if(clblasY != NULL) - { - delete[] clblasY; - } -} - -template <typename T> -void -copyCorrectnessTest(TestParams *params) -{ - cl_int err; - T *blasX, *blasY, *clblasY; - cl_mem bufX, bufY; - clMath::BlasBase *base; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - size_t lengthY = (1 + ((params->N -1) * abs(params->incy))); - - blasX = new T[lengthX + params->offBX ]; - blasY = new T[lengthY + params->offCY ]; - clblasY = new T[lengthY + params->offCY ]; - - if((blasX == NULL) || (blasY == NULL) || (clblasY == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(blasX, blasY, clblasY); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - // Populate A and blasX - randomVectors( params->N, (blasX+params->offBX), params->incx, (blasY+params->offCY), params->incy ); - memcpy(clblasY, blasY, (lengthY + params->offCY)* sizeof(*blasY)); - - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufX = base->createEnqueueBuffer(blasX, (lengthX + params->offBX)* sizeof(*blasX), 0, CL_MEM_READ_WRITE); - bufY = base->createEnqueueBuffer(blasY, (lengthY + params->offCY)* sizeof(*blasY), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xCOPY routine... "; - - ::clMath::blas::copy( params->N, blasX, params->offBX, params->incx, blasY, params->offCY, params->incy); - ::std::cerr << "Done" << ::std::endl; - - if ((bufX == NULL) || (bufY == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufX, bufY); - deleteBuffers<T>(blasX, blasY, clblasY); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xCOPY routine... "; - - DataType type; - type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT : ( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: ( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT:TYPE_COMPLEX_DOUBLE; - - // Should use bufXTemp as well - err = (cl_int)::clMath::clblas::copy(type, params->N, bufX, - params->offBX, params->incx, bufY, params->offCY, params->incy, params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufY); - deleteBuffers<T>(blasX, blasY, clblasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::COPY() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufY); - deleteBuffers<T>(blasX, blasY, clblasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufY, CL_TRUE, 0, - ((lengthY + params->offCY) * sizeof(*blasY)), clblasY, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "COPY: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufX, bufY); - - compareMatrices<T>(clblasColumnMajor, lengthY , 1, (blasY + params->offCY), (clblasY + params->offCY), lengthY, NULL); - deleteBuffers<T>(blasX, blasY, clblasY); - delete[] events; -} - -// Instantiate the test - -TEST_P(COPY, scopy) { - TestParams params; - - getParams(¶ms); - copyCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(COPY, dcopy) { - TestParams params; - - getParams(¶ms); - copyCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(COPY, ccopy) { - TestParams params; - - getParams(¶ms); - copyCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(COPY, zcopy) { - TestParams params; - - getParams(¶ms); - copyCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-dot.cpp b/external/clBLAS/src/tests/correctness/corr-dot.cpp deleted file mode 100644 index c4969252..00000000 --- a/external/clBLAS/src/tests/correctness/corr-dot.cpp +++ /dev/null @@ -1,217 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <dot.h> - -static void -releaseMemObjects(cl_mem objX, cl_mem objY, cl_mem objDP, cl_mem objScratch) -{ - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objY != NULL) - { - clReleaseMemObject(objY); - } - if(objDP != NULL) - { - clReleaseMemObject(objDP); - } - if(objScratch != NULL) - { - clReleaseMemObject(objScratch); - } -} - -template <typename T> static void -deleteBuffers(T *blasX, T *blasY, T *blasDP, T *clblasDP) -{ - if(blasX != NULL) - { - delete[] blasX; - } - if(blasY != NULL) - { - delete[] blasY; - } - if(clblasDP != NULL) - { - delete[] clblasDP; - } - if(blasDP != NULL) - { - delete(blasDP); - } -} - -template <typename T> -void -dotCorrectnessTest(TestParams *params) -{ - cl_int err; - T *blasX, *blasY, *clblasDP, *blasDP; - cl_mem bufX, bufY, bufDP, scratchBuff; - clMath::BlasBase *base; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - size_t lengthY = (1 + ((params->N -1) * abs(params->incy))); - - blasX = new T[lengthX + params->offBX ]; - blasY = new T[lengthY + params->offCY ]; - blasDP = new T[1]; - clblasDP = new T[1 + params->offa]; - - if((blasX == NULL) || (blasY == NULL) || (clblasDP == NULL) || (blasDP == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(blasX, blasY, blasDP, clblasDP); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - ::std::cerr << "Generating input data... "; - - randomVectors(params->N, (blasX + params->offBX), params->incx, (blasY + params->offCY), params->incy, true); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufX = base->createEnqueueBuffer(blasX, (lengthX + params->offBX)* sizeof(*blasX), 0, CL_MEM_READ_WRITE); - bufY = base->createEnqueueBuffer(blasY, (lengthY + params->offCY)* sizeof(*blasY), 0, CL_MEM_READ_WRITE); - bufDP = base->createEnqueueBuffer(NULL, (1 + params->offa) * sizeof(T), 0, CL_MEM_READ_WRITE); - scratchBuff = base->createEnqueueBuffer(NULL, (lengthX * sizeof(T)), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xDOT routine... "; - - *blasDP = ::clMath::blas::dot( params->N, blasX, params->offBX, params->incx, blasY, params->offCY, params->incy); - ::std::cerr << "Done" << ::std::endl; - - if ((bufX == NULL) || (bufY == NULL) || (bufDP == NULL) || (scratchBuff == NULL)) { - releaseMemObjects(bufX, bufY, bufDP, scratchBuff); - deleteBuffers<T>(blasX, blasY, blasDP, clblasDP); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xDOT routine... "; - - DataType type; - type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT : ( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: ( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT:TYPE_COMPLEX_DOUBLE; - - // Should use bufXTemp as well - err = (cl_int)::clMath::clblas::dot( type, params->N, bufDP, params->offa, bufX, - params->offBX, params->incx, bufY, params->offCY, params->incy, scratchBuff, params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufY, bufDP, scratchBuff); - deleteBuffers<T>(blasX, blasY, blasDP, clblasDP); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::DOT() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufY, bufDP, scratchBuff); - deleteBuffers<T>(blasX, blasY, blasDP, clblasDP); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufDP, CL_TRUE, 0, - (1 + params->offa) * sizeof(*clblasDP), clblasDP, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "DOT: Reading results failed...." << std::endl; - } - releaseMemObjects(bufX, bufY, bufDP, scratchBuff); - - compareMatrices<T>(clblasColumnMajor, 1 , 1, (blasDP), (clblasDP+params->offa), 1); - deleteBuffers<T>(blasX, blasY, blasDP, clblasDP); - delete[] events; -} - -// Instantiate the test - -TEST_P(DOT, sdot) { - TestParams params; - - getParams(¶ms); - dotCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(DOT, ddot) { - TestParams params; - - getParams(¶ms); - dotCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(DOT, cdotu) { - TestParams params; - - getParams(¶ms); - dotCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(DOT, zdotu) { - TestParams params; - - getParams(¶ms); - dotCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-dotc.cpp b/external/clBLAS/src/tests/correctness/corr-dotc.cpp deleted file mode 100644 index d4a68b1d..00000000 --- a/external/clBLAS/src/tests/correctness/corr-dotc.cpp +++ /dev/null @@ -1,204 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <dotc.h> - -static void -releaseMemObjects(cl_mem objX, cl_mem objY, cl_mem objDP, cl_mem objScratch) -{ - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objY != NULL) - { - clReleaseMemObject(objY); - } - if(objDP != NULL) - { - clReleaseMemObject(objDP); - } - if(objScratch != NULL) - { - clReleaseMemObject(objScratch); - } -} - -template <typename T> static void -deleteBuffers(T *blasX, T *blasY, T *blasDP, T *clblasDP) -{ - if(blasX != NULL) - { - delete[] blasX; - } - if(blasY != NULL) - { - delete[] blasY; - } - if(clblasDP != NULL) - { - delete[] clblasDP; - } - if(blasDP != NULL) - { - delete(blasDP); - } -} - -template <typename T> -void -dotcCorrectnessTest(TestParams *params) -{ - cl_int err; - T *blasX, *blasY, *clblasDP, *blasDP; - cl_mem bufX, bufY, bufDP, scratchBuff; - clMath::BlasBase *base; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - size_t lengthY = (1 + ((params->N -1) * abs(params->incy))); - - blasX = new T[lengthX + params->offBX ]; - blasY = new T[lengthY + params->offCY ]; - blasDP = new T[1]; - clblasDP = new T[1 + params->offa]; - - if((blasX == NULL) || (blasY == NULL) || (clblasDP == NULL) || (blasDP == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(blasX, blasY, blasDP, clblasDP); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - ::std::cerr << "Generating input data... "; - - randomVectors(params->N, (blasX + params->offBX), params->incx, (blasY + params->offCY), params->incy, true); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufX = base->createEnqueueBuffer(blasX, (lengthX + params->offBX)* sizeof(*blasX), 0, CL_MEM_READ_WRITE); - bufY = base->createEnqueueBuffer(blasY, (lengthY + params->offCY)* sizeof(*blasY), 0, CL_MEM_READ_WRITE); - bufDP = base->createEnqueueBuffer(NULL, (1 + params->offa) * sizeof(T), 0, CL_MEM_READ_WRITE); - scratchBuff = base->createEnqueueBuffer(NULL, (lengthX * sizeof(T)), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xDOTC routine... "; - - *blasDP = ::clMath::blas::dotc( params->N, blasX, params->offBX, params->incx, blasY, params->offCY, params->incy); - ::std::cerr << "Done" << ::std::endl; - - if ((bufX == NULL) || (bufY == NULL) || (bufDP == NULL) || (scratchBuff == NULL)) { - releaseMemObjects(bufX, bufY, bufDP, scratchBuff); - deleteBuffers<T>(blasX, blasY, blasDP, clblasDP); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xDOTC routine... "; - - DataType type; - type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT : ( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: ( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT:TYPE_COMPLEX_DOUBLE; - - // Should use bufXTemp as well - err = (cl_int)::clMath::clblas::dotc( type, params->N, bufDP, params->offa, bufX, - params->offBX, params->incx, bufY, params->offCY, params->incy, scratchBuff, params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufY, bufDP, scratchBuff); - deleteBuffers<T>(blasX, blasY, blasDP, clblasDP); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::DOTC() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufY, bufDP, scratchBuff); - deleteBuffers<T>(blasX, blasY, blasDP, clblasDP); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufDP, CL_TRUE, 0, - (1 + params->offa) * sizeof(*clblasDP), clblasDP, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "DOTC: Reading results failed...." << std::endl; - } - releaseMemObjects(bufX, bufY, bufDP, scratchBuff); - - compareMatrices<T>(clblasColumnMajor, 1 , 1, (blasDP), (clblasDP+params->offa), 1); - deleteBuffers<T>(blasX, blasY, blasDP, clblasDP); - delete[] events; -} - -// Instantiate the test - - -TEST_P(DOTC, cdotc) { - TestParams params; - - getParams(¶ms); - dotcCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(DOTC, zdotc) { - TestParams params; - - getParams(¶ms); - dotcCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-gbmv.cpp b/external/clBLAS/src/tests/correctness/corr-gbmv.cpp deleted file mode 100644 index efa8b4b1..00000000 --- a/external/clBLAS/src/tests/correctness/corr-gbmv.cpp +++ /dev/null @@ -1,248 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <gbmv.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX, cl_mem objY) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objY != NULL) - { - clReleaseMemObject(objY); - } -} - -template <typename T> static void -deleteBuffers(T *A, T *X, T *blasY, T *clblasY) -{ - if(A != NULL) - { - delete[] A; - } - if(X != NULL) - { - delete[] X; - } - if(blasY != NULL) - { - delete[] blasY; - } - if(clblasY != NULL) - { - delete[] clblasY; // To hold clblas GBMV call results - } -} - -template <typename T> -void -gbmvCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *X, *blasY, *clblasY; - cl_mem bufA, bufX, bufY; - clMath::BlasBase *base; - cl_event *events; - T alpha, beta; - size_t lengthX, lengthY, lengthA; - - base = clMath::BlasBase::getInstance(); - - if (( (typeid(T) == typeid(DoubleComplex)) || (typeid(T) == typeid(cl_double)) ) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - lengthA = ((params->order == clblasColumnMajor)? params->N : params->M) * params->lda; - - if (params->transA == clblasNoTrans) { - lengthX = (params->N - 1)*abs(params->incx) + 1; - lengthY = (params->M - 1)*abs(params->incy) + 1; - } - else { - lengthX = (params->M - 1)*abs(params->incx) + 1; - lengthY = (params->N - 1)*abs(params->incy) + 1; - } - - A = new T[lengthA + params->offA ]; - X = new T[lengthX + params->offBX ]; - blasY = new T[lengthY + params->offCY ]; - clblasY = new T[lengthY + params->offCY ]; - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - if((A == NULL) || (X == NULL) || (blasY == NULL) || (clblasY == NULL)) - { - deleteBuffers<T>(A, X, blasY, clblasY); - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - delete[] events; - SUCCEED(); - return; - } - - alpha = convertMultiplier<T>(params->alpha); - beta = convertMultiplier<T>(params->beta); - - randomGbmvMatrices(params->order, params->transA, params->M, params->N, &alpha, &beta, - (A + params->offA), params->lda, (X+params->offBX), params->incx, (blasY+params->offCY), params->incy ); - // Copy blasY to clblasY - memcpy(clblasY, blasY, (lengthY + params->offCY)* sizeof(*blasY)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufA = base->createEnqueueBuffer(A, (lengthA + params->offA)* sizeof(*A), 0, CL_MEM_READ_ONLY); - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX)* sizeof(*X), 0, CL_MEM_READ_ONLY); - bufY = base->createEnqueueBuffer(clblasY, (lengthY + params->offCY) * sizeof(*clblasY), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xGBMV routine... "; - - clblasOrder fOrder; - clblasTranspose fTrans; - fOrder = params->order; - fTrans = params->transA; - size_t fM = params->M, fN = params->N, fKL = params->KL, fKU = params->KU; - - if (fOrder != clblasColumnMajor) - { - fOrder = clblasColumnMajor; - fTrans = (params->transA == clblasNoTrans)? clblasTrans : clblasNoTrans; - fM = params->N; - fN = params->M; - fKL = params->KU; - fKU = params->KL; - - if( params->transA == clblasConjTrans ) - doConjugate( (A+params->offa), 1, lengthA, params->lda ); - } - clMath::blas::gbmv(fOrder, fTrans, fM, fN, fKL, fKU, alpha, A, params->offA, params->lda, - X, params->offBX, params->incx, beta, blasY, params->offCY, params->incy); - ::std::cerr << "Done" << ::std::endl; - - if ((bufA == NULL) || (bufX == NULL) || (bufY == NULL)) { - // Skip the test, the most probable reason is - // matrix too big for a device. - - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xGBMV routine... "; - - err = (cl_int)clMath::clblas::gbmv(params->order, params->transA, params->M, params->N, params->KL, params->KU, - alpha, bufA, params->offA, params->lda, bufX, params->offBX, params->incx, - beta, bufY, params->offCY, params->incy, - params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::GBMV() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufY, CL_TRUE, 0, - (lengthY + params->offCY) * sizeof(*clblasY), clblasY, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "GBMV: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufA, bufX, bufY); - compareMatrices<T>(clblasColumnMajor, lengthY , 1, (blasY + params->offCY), (clblasY + params->offCY), - lengthY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; -} - -// Instantiate the test - -TEST_P(GBMV, sgbmv) { - TestParams params; - - getParams(¶ms); - gbmvCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(GBMV, dgbmv) { - TestParams params; - - getParams(¶ms); - gbmvCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(GBMV, cgbmv) { - TestParams params; - - getParams(¶ms); - gbmvCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(GBMV, zgbmv) { - TestParams params; - - getParams(¶ms); - gbmvCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-gemm.cpp b/external/clBLAS/src/tests/correctness/corr-gemm.cpp deleted file mode 100644 index 5837bed9..00000000 --- a/external/clBLAS/src/tests/correctness/corr-gemm.cpp +++ /dev/null @@ -1,233 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <gemm.h> - -#include "tcase-filter.h" - -static void -releaseMemObjects(cl_mem objA, cl_mem objB, cl_mem objC) -{ - clReleaseMemObject(objA); - clReleaseMemObject(objB); - clReleaseMemObject(objC); -} - -template <typename T> static void -deleteBuffers(T *A, T *B, T *blasC, T *clblasC) -{ - delete[] A; - delete[] B; - delete[] blasC; - delete[] clblasC; -} - -template <typename T> -void -gemmCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *B, *blasC, *clblasC; - T alpha, beta; - cl_mem bufA, bufB, bufC; - clMath::BlasBase *base; - bool useAlpha; - bool useBeta; - cl_event *events; - bool isComplex; - - base = clMath::BlasBase::getInstance(); - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - isComplex = ((typeid(T) == typeid(FloatComplex)) || - (typeid(T) == typeid(DoubleComplex))); - - if (canCaseBeSkipped(params, isComplex)) { - std::cerr << ">> Test is skipped because it has no importance for this " - "level of coverage" << std::endl; - SUCCEED(); - return; - } - - useAlpha = base->useAlpha(); - useBeta = base->useBeta(); - alpha = ZERO<T>(); - beta = ZERO<T>(); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - A = new T[params->rowsA * params->columnsA]; - B = new T[params->rowsB * params->columnsB]; - blasC = new T[params->rowsC * params->columnsC]; - clblasC = new T[params->rowsC * params->columnsC]; - - srand(params->seed); - if (useAlpha) { - alpha = convertMultiplier<T>(params->alpha); - } - if (useBeta) { - beta = convertMultiplier<T>(params->beta); - } - - ::std::cerr << "Generating input data... "; - randomGemmMatrices<T>(params->order, params->transA, params->transB, - params->M, params->N, params->K, useAlpha, &alpha, A, params->lda, - B, params->ldb, useBeta, &beta, blasC, params->ldc); - memcpy(clblasC, blasC, params->rowsC * params->columnsC * sizeof(*blasC)); - ::std::cerr << "Done" << ::std::endl; - - ::std::cerr << "Calling reference xGEMM routine... "; - if (params->order == clblasColumnMajor) { - ::clMath::blas::gemm(clblasColumnMajor, params->transA, params->transB, - params->M, params->N, params->K, alpha, A, - params->lda, B, params->ldb, beta, blasC, params->ldc); - } - else { - T *reorderedA = new T[params->rowsA * params->columnsA]; - T *reorderedB = new T[params->rowsB * params->columnsB]; - T *reorderedC = new T[params->rowsC * params->columnsC]; - - reorderMatrix<T>(clblasRowMajor, params->rowsA, params->columnsA, - A, reorderedA); - reorderMatrix<T>(clblasRowMajor, params->rowsB, params->columnsB, - B, reorderedB); - reorderMatrix<T>(clblasRowMajor, params->rowsC, params->columnsC, - blasC, reorderedC); - ::clMath::blas::gemm(clblasColumnMajor, params->transA, params->transB, - params->M, params->N, params->K, alpha, reorderedA, - params->rowsA, reorderedB, params->rowsB, - beta, reorderedC, params->rowsC); - reorderMatrix<T>(clblasColumnMajor, params->rowsC, params->columnsC, - reorderedC, blasC); - - delete[] reorderedC; - delete[] reorderedB; - delete[] reorderedA; - } - ::std::cerr << "Done" << ::std::endl; - - bufA = base->createEnqueueBuffer(A, params->rowsA * params->columnsA * - sizeof(*A), params->offA * sizeof(*A), - CL_MEM_READ_ONLY); - bufB = base->createEnqueueBuffer(B, params->rowsB * params->columnsB * - sizeof(*B), params->offBX * sizeof(*B), - CL_MEM_READ_ONLY); - bufC = base->createEnqueueBuffer(clblasC, params->rowsC * params->columnsC * - sizeof(*clblasC), - params->offCY * sizeof(*clblasC), - CL_MEM_READ_WRITE); - if ((bufA == NULL) || (bufB == NULL) || (bufC == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xGEMM routine... "; - err = (cl_int)::clMath::clblas::gemm(params->order, params->transA, - params->transB, params->M, params->N, params->K, alpha, bufA, - params->offA, params->lda, bufB, params->offBX, params->ldb, beta, - bufC, params->offCY, params->ldc, params->numCommandQueues, - base->commandQueues(), 0, NULL, events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::GEMM() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufC, CL_TRUE, - params->offCY * sizeof(*clblasC), - params->rowsC * params->columnsC * sizeof(*clblasC), - clblasC, 0, NULL, NULL); - - releaseMemObjects(bufA, bufB, bufC); - compareMatrices<T>(params->order, params->M, params->N, blasC, clblasC, - params->ldc); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; -} - -// Instantiate the test - -TEST_P(GEMM, sgemm) { - TestParams params; - - getParams(¶ms); - gemmCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(GEMM, dgemm) { - TestParams params; - - getParams(¶ms); - gemmCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(GEMM, cgemm) { - TestParams params; - - getParams(¶ms); - gemmCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(GEMM, zgemm) { - TestParams params; - - getParams(¶ms); - gemmCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-gemm2.cpp b/external/clBLAS/src/tests/correctness/corr-gemm2.cpp deleted file mode 100644 index 2730d425..00000000 --- a/external/clBLAS/src/tests/correctness/corr-gemm2.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <gemm-2.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objB, cl_mem objC) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objB != NULL) - { - clReleaseMemObject(objB); - } - if(objC != NULL) - { - clReleaseMemObject(objC); -} -} - -template <typename T> static void -deleteBuffers(T *A, T *B, T *blasC, T *clblasC) -{ - if(A != NULL) - { - delete[] A; - } - if(B != NULL) - { - delete[] B; - } - if(blasC != NULL) - { - delete[] blasC; - } - if(clblasC != NULL) - { - delete[] clblasC; -} -} - -template <typename T> -void -gemm2CorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *B, *blasC, *clblasC; - T alpha, beta; - cl_mem bufA, bufB, bufC; - clMath::BlasBase *base; - bool useAlpha; - bool useBeta; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - useAlpha = base->useAlpha(); - useBeta = base->useBeta(); - alpha = ZERO<T>(); - beta = ZERO<T>(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - A = new T[params->rowsA * params->columnsA]; - B = new T[params->rowsB * params->columnsB]; - blasC = new T[params->rowsC * params->columnsC]; - clblasC = new T[params->rowsC * params->columnsC]; - - if((A == NULL) || (B == NULL) || (blasC == NULL) || (clblasC == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers(A, B, blasC, clblasC); - SUCCEED(); - return; - } - - srand(params->seed); - if (useAlpha) { - alpha = convertMultiplier<T>(params->alpha); - } - if (useBeta) { - beta = convertMultiplier<T>(params->beta); - } - - ::std::cerr << "Generating input data... "; - randomGemmMatrices<T>(params->order, params->transA, params->transB, - params->M, params->N, params->K, useAlpha, &alpha, A, params->lda, - B, params->ldb, useBeta, &beta, blasC, params->ldc); - memcpy(clblasC, blasC, params->rowsC * params->columnsC * sizeof(*blasC)); - ::std::cerr << "Done" << ::std::endl; - - ::std::cerr << "Calling reference xGEMM routine... "; - if (params->order == clblasColumnMajor) { - ::clMath::blas::gemm(clblasColumnMajor, params->transA, params->transB, - params->M, params->N, params->K, alpha, A, - params->lda, B, params->ldb, beta, blasC, params->ldc); - } - else { - T *reorderedA = new T[params->rowsA * params->columnsA]; - T *reorderedB = new T[params->rowsB * params->columnsB]; - T *reorderedC = new T[params->rowsC * params->columnsC]; - - if((reorderedA == NULL) || (reorderedB == NULL) || (reorderedC == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - SUCCEED(); - return; - } - - reorderMatrix<T>(clblasRowMajor, params->rowsA, params->columnsA, - A, reorderedA); - reorderMatrix<T>(clblasRowMajor, params->rowsB, params->columnsB, - B, reorderedB); - reorderMatrix<T>(clblasRowMajor, params->rowsC, params->columnsC, - blasC, reorderedC); - ::clMath::blas::gemm(clblasColumnMajor, params->transA, params->transB, - params->M, params->N, params->K, alpha, reorderedA, - params->rowsA, reorderedB, params->rowsB, - beta, reorderedC, params->rowsC); - reorderMatrix<T>(clblasColumnMajor, params->rowsC, params->columnsC, - reorderedC, blasC); - - delete[] reorderedC; - delete[] reorderedB; - delete[] reorderedA; - } - ::std::cerr << "Done" << ::std::endl; - - bufA = base->createEnqueueBuffer(A, params->rowsA * params->columnsA * - sizeof(*A), params->offA * sizeof(*A), - CL_MEM_READ_ONLY); - bufB = base->createEnqueueBuffer(B, params->rowsB * params->columnsB * - sizeof(*B), params->offBX * sizeof(*B), - CL_MEM_READ_ONLY); - bufC = base->createEnqueueBuffer(clblasC, params->rowsC * params->columnsC * - sizeof(*clblasC), - params->offCY * sizeof(*clblasC), - CL_MEM_READ_WRITE); - if ((bufA == NULL) || (bufB == NULL) || (bufC == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xGEMM routine... "; - err = (cl_int)::clMath::clblas::gemm2(params->order, params->transA, - params->transB, params->M, params->N, params->K, alpha, bufA, - params->offA, params->lda, bufB, params->offBX, params->ldb, beta, - bufC, params->offCY, params->ldc, params->numCommandQueues, - base->commandQueues(), 0, NULL, events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::GEMM() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufC, CL_TRUE, - params->offCY * sizeof(*clblasC), - params->rowsC * params->columnsC * sizeof(*clblasC), - clblasC, 0, NULL, NULL); - - releaseMemObjects(bufA, bufB, bufC); - compareMatrices<T>(params->order, params->M, params->N, blasC, clblasC, - params->ldc); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; -} - -// Instantiate the test - -TEST_P(GEMM2, sgemm2) { - TestParams params; - - getParams(¶ms); - gemm2CorrectnessTest<cl_float>(¶ms); -} - -TEST_P(GEMM2, dgemm2) { - TestParams params; - - getParams(¶ms); - gemm2CorrectnessTest<cl_double>(¶ms); -} - -TEST_P(GEMM2, cgemm2) { - TestParams params; - - getParams(¶ms); - gemm2CorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(GEMM2, zgemm2) { - TestParams params; - - getParams(¶ms); - gemm2CorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-gemv.cpp b/external/clBLAS/src/tests/correctness/corr-gemv.cpp deleted file mode 100644 index c1a564c3..00000000 --- a/external/clBLAS/src/tests/correctness/corr-gemv.cpp +++ /dev/null @@ -1,246 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <gemv.h> - -#include "tcase-filter.h" - -static void -releaseMemObjects(cl_mem objA, cl_mem objX, cl_mem objY) -{ - clReleaseMemObject(objA); - clReleaseMemObject(objX); - clReleaseMemObject(objY); -} - -template <typename T> static void -deleteBuffers(T *A, T *X, T *blasY, T *clblasY) -{ - delete[] A; - delete[] X; - delete[] blasY; - delete[] clblasY; -} - -template <typename T> -void -gemvCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *B, *blasC, *clblasC, *X, *Y; - T alpha, beta; - cl_mem bufA, bufB, bufC; - clMath::BlasBase *base; - bool useAlpha, useBeta; - cl_event *events; - size_t lenY, lenX; - bool isComplex; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - isComplex = ((typeid(T) == typeid(FloatComplex)) || - (typeid(T) == typeid(DoubleComplex))); - if (canCaseBeSkipped(params, isComplex)) { - std::cerr << ">> Test is skipped because it has no importance for this " - "level of coverage" << std::endl; - SUCCEED(); - return; - } - - useAlpha = base->useAlpha(); - useBeta = base->useBeta(); - beta = ZERO<T>(); - alpha = ZERO<T>(); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - A = new T[params->rowsA * params->columnsA]; - B = new T[params->rowsB * params->columnsB]; - blasC = new T[params->rowsC * params->columnsC]; - clblasC = new T[params->rowsC * params->columnsC]; - X = &B[params->offBX]; - Y = &blasC[params->offCY]; - - srand(params->seed); - if (useAlpha) { - alpha = convertMultiplier<T>(params->alpha); - } - if (useBeta) { - beta = convertMultiplier<T>(params->beta); - } - - if (params->transA == clblasNoTrans) { - lenX = params->N; - lenY = params->M; - } - else { - lenX = params->M; - lenY = params->N; - } - - ::std::cerr << "Generating input data... "; - setNans<T>(params->rowsA * params->columnsA, A); - setNans<T>(params->rowsB * params->columnsB, B); - setNans<T>(params->rowsC * params->columnsC, blasC); - - randomGemmxMatrices<T>(params->order, params->transA, params->transB, - params->transC, lenY, params->K, lenX, useAlpha, - &alpha, A, params->lda, B, params->ldb, useBeta, - &beta, blasC, params->ldc); - - // set to NAN elements which must not be accessed - // in matrix B containing vector X - setVectorNans<T>(params->offBX, abs(params->incx), B, lenX, - params->columnsB * params->rowsB); - // in matrix C containing vector Y - setVectorNans<T>(params->offCY, abs(params->incy), blasC, lenY, - params->columnsC * params->rowsC); - memcpy(clblasC, blasC, params->rowsC * params->columnsC * sizeof(*clblasC)); - - ::std::cerr << "Done" << ::std::endl; - - ::std::cerr << "Calling reference xGEMV routine... "; - if (params->order == clblasColumnMajor) { - ::clMath::blas::gemv(clblasColumnMajor, params->transA, - params->M, params->N, alpha, A, params->lda, - X, params->incx, beta, Y, params->incy); - } - else { - T *reorderedA = new T[params->rowsA * params->columnsA]; - - reorderMatrix<T>(clblasRowMajor, params->rowsA, params->columnsA, - A, reorderedA); - ::clMath::blas::gemv(clblasColumnMajor, params->transA, - params->M, params->N, alpha, reorderedA, params->rowsA, - X, params->incx, beta, Y, params->incy); - - delete[] reorderedA; - } - ::std::cerr << "Done" << ::std::endl; - - bufA = base->createEnqueueBuffer(A, params->rowsA * params->columnsA * - sizeof(*A), params->offA * sizeof(*A), - CL_MEM_READ_ONLY); - bufB = base->createEnqueueBuffer(B, params->rowsB * params->columnsB * - sizeof(*B), 0, CL_MEM_READ_ONLY); - bufC = base->createEnqueueBuffer(clblasC, params->rowsC * params->columnsC * - sizeof(*clblasC), 0, CL_MEM_READ_WRITE); - if ((bufA == NULL) || (bufB == NULL) || (bufC == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xGEMV routine... "; - err = (cl_int)::clMath::clblas::gemv(params->order, params->transA, - params->M, params->N, alpha, bufA, params->offA, params->lda, - bufB, params->offBX, params->incx, beta, bufC, params->offCY, - params->incy, params->numCommandQueues, base->commandQueues(), 0, - NULL, events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::GEMV() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufC, CL_TRUE, 0, - params->rowsC * params->columnsC * sizeof(*clblasC), - clblasC, 0, NULL, NULL); - - releaseMemObjects(bufA, bufB, bufC); - - compareVectors(params->offCY, lenY, abs(params->incy), - params->columnsC * params->rowsC, blasC, clblasC); - - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; -} - -// Instantiate the test - -TEST_P(GEMV, sgemv) { - TestParams params; - - getParams(¶ms); - gemvCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(GEMV, dgemv) { - TestParams params; - - getParams(¶ms); - gemvCorrectnessTest<cl_double>(¶ms); -} - - -TEST_P(GEMV, cgemv) { - TestParams params; - - getParams(¶ms); - gemvCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(GEMV, zgemv) { - TestParams params; - - getParams(¶ms); - gemvCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-ger.cpp b/external/clBLAS/src/tests/correctness/corr-ger.cpp deleted file mode 100644 index 1ffe440a..00000000 --- a/external/clBLAS/src/tests/correctness/corr-ger.cpp +++ /dev/null @@ -1,265 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <ger.h> -#include<cltypes.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objx, cl_mem objy) -{ - if( objA!=NULL) - clReleaseMemObject(objA); - if( objx!=NULL) - clReleaseMemObject(objx); - if( objy!=NULL) - clReleaseMemObject(objy); -} - -template <typename T> static void -deleteBuffers(T *A, T *x, T *y, T *backA) -{ - - if(A != NULL) - { - delete[] A; - } - if(backA != NULL) - { - delete[] backA; - } - if(x != NULL) - { - delete[] x; - } - if(y != NULL) - { - delete[] y; -} -} - -template <typename T> -void -gerCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *x, *y, *backA; - //size_t N, M; - - T alpha_; - cl_mem bufA, bufx, bufy; - clMath::BlasBase *base; - cl_event *events; -// int ka, kxy; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthA; - if( params->order == clblasColumnMajor ) - lengthA = params->N * params->lda; - else lengthA = params->M * params->lda; - - size_t lengthx = (1 + (((params->M)-1) * abs(params->incx))); - size_t lengthy = (1 + (((params->N)-1) * abs(params->incy))); - - bool useAlpha = base->useAlpha(); - - if (useAlpha) { - alpha_ = convertMultiplier<T>(params->alpha); - } - - - A = new T[lengthA + params->offa]; - x = new T[lengthx + params->offBX]; - y = new T[lengthy + params->offCY]; - backA = new T[lengthA + params->offa]; - - if((A == NULL) || (backA == NULL) || (x == NULL) || (y == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(A, backA, x, y); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - int creationFlags = 0; - creationFlags = creationFlags | RANDOM_INIT; - creationFlags = ( (params-> order) == clblasRowMajor)? (creationFlags | ROW_MAJOR_ORDER) : (creationFlags); - BlasRoutineID BlasFn = CLBLAS_GER; - - populate( (A + params->offa), params->M, params->N, params-> lda, BlasFn, creationFlags); - populate( (x + params->offBX), lengthx, 1, lengthx, BlasFn ); - populate( (y + params->offCY), lengthy, 1, lengthy, BlasFn ); - - // Copy C to backX - memcpy(backA, A, (lengthA + params->offa) * sizeof(T)); - - // Allocate buffers - bufA = base->createEnqueueBuffer(A, (lengthA + params->offa) * sizeof(*A), 0, CL_MEM_READ_WRITE); - bufx = base->createEnqueueBuffer(x, (lengthx + params->offBX) * sizeof(*x), 0, CL_MEM_READ_ONLY); - bufy = base->createEnqueueBuffer(y, (lengthy + params->offCY) * sizeof(*y), 0, CL_MEM_READ_ONLY); - - - ::std::cerr << "Done" << ::std::endl; - ::std::cerr << "Calling reference xGER routine... "; - - - clblasOrder fOrder; - size_t fN, fM; - size_t fOffx, fOffy; - int fIncx, fIncy; - T *fX, *fY; - fOrder = params->order; - fM = params->M; - fN = params->N; - fIncx = params->incx; - fIncy = params->incy; - fX = x; - fY = y; - fOffx = params->offBX; - fOffy = params->offCY; - - if (fOrder != clblasColumnMajor) { - - fOrder = clblasColumnMajor; - fM = params->N; - fN = params->M; - fX = y; - fY = x; - fIncx = params->incy; - fIncy = params->incx; - fOffx = params->offCY; - fOffy = params->offBX; - } - - // Call reference blas routine - clMath::blas::ger(fOrder, fM, fN, alpha_, fX , fOffx, fIncx, fY, fOffy, fIncy, A, params->offa, params->lda); - ::std::cerr << "Done" << ::std::endl; - - if ((bufA == NULL) || (bufx == NULL) || (bufy == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufx, bufy); - deleteBuffers<T>(A, x, y, backA); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xGER routine... "; - - err = (cl_int)::clMath::clblas::ger( params->order, params->M, params->N, alpha_, - bufx, params->offBX, params->incx, bufy, params->offCY, params->incy,bufA, params->offa, params->lda, - params->numCommandQueues, base->commandQueues(), 0, NULL, events ); - - if (err != CL_SUCCESS) { - - releaseMemObjects(bufA, bufx, bufy); - deleteBuffers<T>(A, x, y, backA); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::GER() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - - releaseMemObjects(bufA, bufx, bufy); - deleteBuffers<T>(A, x, y, backA); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufA, CL_TRUE, 0, - (lengthA + params->offa)* sizeof(*backA), backA, 0, - NULL, NULL); - - releaseMemObjects(bufA, bufx, bufy); - - // handle lda correctly based on row-major/col-major.. - compareMatrices<T>(params->order, params->M , params->N, A+ params->offa, backA + params->offa, params->lda); - deleteBuffers<T>(A, x, y, backA); - delete[] events; -} - -// Instantiate the test - - -TEST_P(GER, sger) { - TestParams params; - - getParams(¶ms); - gerCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(GER, dger) { - TestParams params; - - getParams(¶ms); - gerCorrectnessTest<cl_double>(¶ms); -} - - -TEST_P(GER, cgeru) { - TestParams params; - - getParams(¶ms); - gerCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(GER, zgeru) { - TestParams params; - - getParams(¶ms); - gerCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-gerc.cpp b/external/clBLAS/src/tests/correctness/corr-gerc.cpp deleted file mode 100644 index 0070a778..00000000 --- a/external/clBLAS/src/tests/correctness/corr-gerc.cpp +++ /dev/null @@ -1,252 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <gerc.h> -#include<cltypes.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objx, cl_mem objy) -{ - if( objA!=NULL) - clReleaseMemObject(objA); - if( objx!=NULL) - clReleaseMemObject(objx); - if( objy!=NULL) - clReleaseMemObject(objy); -} - -template <typename T> static void -deleteBuffers(T *A, T *x, T *y, T *backA) -{ - - if(A != NULL) - { - delete[] A; - } - if(backA != NULL) - { - delete[] backA; - } - if(x != NULL) - { - delete[] x; - } - if(y != NULL) - { - delete[] y; -} -} - -template <typename T> -void -gercCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *x, *y, *backA; - //size_t N, M; - - T alpha_; - cl_mem bufA, bufx, bufy; - clMath::BlasBase *base; - cl_event *events; -// int ka, kxy; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthA; - if( params->order == clblasColumnMajor ) - lengthA = params->N * params->lda; - else - lengthA = params->M * params->lda; - - size_t lengthx = (1 + (((params->M)-1) * abs(params->incx))); - size_t lengthy = (1 + (((params->N)-1) * abs(params->incy))); - - bool useAlpha = base->useAlpha(); - - if (useAlpha) { - alpha_ = convertMultiplier<T>(params->alpha); - } - - - A = new T[lengthA + params->offa]; - x = new T[lengthx + params->offBX]; - y = new T[lengthy + params->offCY]; - backA = new T[lengthA + params->offa]; - - if((A == NULL) || (backA == NULL) || (x == NULL) || (y == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(A, x, y, backA); - delete[] events; - SUCCEED(); - return; - } - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - int creationFlags = 0; - creationFlags = creationFlags | RANDOM_INIT; - creationFlags = ( (params-> order) == clblasRowMajor)? (creationFlags | ROW_MAJOR_ORDER) : (creationFlags); - BlasRoutineID BlasFn = CLBLAS_GER; - - populate( (A + params->offa), params->M, params->N, params-> lda, BlasFn, creationFlags); - populate( (x + params->offBX), lengthx, 1, lengthx, BlasFn ); - populate( (y + params->offCY), lengthy, 1, lengthy, BlasFn ); - - // Copy C to backX - memcpy(backA, A, (lengthA + params->offa) * sizeof(T)); - - // Allocate buffers - bufA = base->createEnqueueBuffer(A, (lengthA + params->offa) * sizeof(*A), 0, CL_MEM_READ_WRITE); - bufx = base->createEnqueueBuffer(x, (lengthx + params->offBX) * sizeof(*x), 0, CL_MEM_READ_ONLY); - bufy = base->createEnqueueBuffer(y, (lengthy + params->offCY) * sizeof(*y), 0, CL_MEM_READ_ONLY); - - - ::std::cerr << "Done" << ::std::endl; - ::std::cerr << "Calling reference xGER routine... "; - - - clblasOrder fOrder; - size_t fN, fM; - size_t fOffx, fOffy; - int fIncx, fIncy; - T *fX, *fY; - fOrder = params->order; - fM = params->M; - fN = params->N; - fIncx = params->incx; - fIncy = params->incy; - fX = x; - fY = y; - fOffx = params->offBX; - fOffy = params->offCY; - - if (fOrder != clblasColumnMajor) { - - doConjugate( (y + params->offCY), (1 + (params->N-1) * abs(params->incy)), 1, 1 ); - fOrder = clblasColumnMajor; - fM = params->N; - fN = params->M; - fX = y; - fY = x; - fIncx = params->incy; - fIncy = params->incx; - fOffx = params->offCY; - fOffy = params->offBX; - // Note this according to the Legacy guide - clMath::blas::ger(fOrder, fM, fN, alpha_, fX , fOffx, fIncx, fY, fOffy, fIncy, A, params->offa, params->lda); - } - else { - clMath::blas::gerc(fOrder, fM, fN, alpha_, fX , fOffx, fIncx, fY, fOffy, fIncy, A, params->offa, params->lda); - } - ::std::cerr << "Done" << ::std::endl; - - if ((bufA == NULL) || (bufx == NULL) || (bufy == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufx, bufy); - deleteBuffers<T>(A, x, y, backA); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xGER routine... "; - - err = (cl_int)::clMath::clblas::gerc( params->order, params->M, params->N, alpha_, - bufx, params->offBX, params->incx, bufy, params->offCY, params->incy,bufA, params->offa, params->lda, - params->numCommandQueues, base->commandQueues(), 0, NULL, events ); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufx, bufy); - deleteBuffers<T>(A, x, y, backA); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::GER() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - - releaseMemObjects(bufA, bufx, bufy); - deleteBuffers<T>(A, x, y, backA); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufA, CL_TRUE, 0, - (lengthA + params->offa)* sizeof(*backA), backA, 0, - NULL, NULL); - - releaseMemObjects(bufA, bufx, bufy); - - // handle lda correctly based on row-major/col-major.. - compareMatrices<T>(params->order, params->M , params->N, A+ params->offa, backA + params->offa, params->lda); - deleteBuffers<T>(A, x, y, backA); - delete[] events; -} - -// Instantiate the test - - -TEST_P(GERC, cgerc) { - TestParams params; - - getParams(¶ms); - gercCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(GERC, zgerc) { - TestParams params; - - getParams(¶ms); - gercCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-hbmv.cpp b/external/clBLAS/src/tests/correctness/corr-hbmv.cpp deleted file mode 100644 index 9b7ff8e1..00000000 --- a/external/clBLAS/src/tests/correctness/corr-hbmv.cpp +++ /dev/null @@ -1,223 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <hbmv.h> -#include <gbmv.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX, cl_mem objY) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objY != NULL) - { - clReleaseMemObject(objY); - } -} - -template <typename T> static void -deleteBuffers(T *A, T *X, T *blasY, T *clblasY) -{ - if(A != NULL) - { - delete[] A; - } - if(X != NULL) - { - delete[] X; - } - if(blasY != NULL) - { - delete[] blasY; - } - if(clblasY != NULL) - { - delete[] clblasY; // To hold clblas GBMV call results - } -} - -template <typename T> -void -hbmvCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *X, *blasY, *clblasY; - cl_mem bufA, bufX, bufY; - clMath::BlasBase *base; - cl_event *events; - T alpha, beta; - size_t lengthX, lengthY, lengthA; - - base = clMath::BlasBase::getInstance(); - - if (( (typeid(T) == typeid(DoubleComplex)) || (typeid(T) == typeid(cl_double)) ) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - lengthA = params->N * params->lda; - lengthX = (params->N - 1)*abs(params->incx) + 1; - lengthY = (params->N - 1)*abs(params->incy) + 1; - - - A = new T[lengthA + params->offA ]; - X = new T[lengthX + params->offBX ]; - blasY = new T[lengthY + params->offCY ]; - clblasY = new T[lengthY + params->offCY ]; - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - if((A == NULL) || (X == NULL) || (blasY == NULL) || (clblasY == NULL)) - { - deleteBuffers<T>(A, X, blasY, clblasY); - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - delete[] events; - SUCCEED(); - return; - } - - alpha = convertMultiplier<T>(params->alpha); - beta = convertMultiplier<T>(params->beta); - - randomGbmvMatrices(params->order, clblasNoTrans, params->N, params->N, &alpha, &beta, - (A + params->offA), params->lda, (X+params->offBX), params->incx, (blasY+params->offCY), params->incy ); - // Copy blasY to clblasY - memcpy(clblasY, blasY, (lengthY + params->offCY)* sizeof(*blasY)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufA = base->createEnqueueBuffer(A, (lengthA + params->offA)* sizeof(*A), 0, CL_MEM_READ_ONLY); - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX)* sizeof(*X), 0, CL_MEM_READ_ONLY); - bufY = base->createEnqueueBuffer(clblasY, (lengthY + params->offCY) * sizeof(*clblasY), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xHBMV routine... "; - - clblasOrder fOrder; - clblasUplo fUplo; - fOrder = params->order; - fUplo = params->uplo; - size_t fN = params->N, fK = params->K; - - if (fOrder != clblasColumnMajor) - { - fOrder = clblasColumnMajor; - fUplo = (params->uplo == clblasLower)? clblasUpper : clblasLower; - doConjugate( (A + params->offA), params->N, params->lda, params->lda ); - } - - clMath::blas::hbmv(fOrder, fUplo, fN, fK, alpha, A, params->offA, params->lda, - X, params->offBX, params->incx, beta, blasY, params->offCY, params->incy); - ::std::cerr << "Done" << ::std::endl; - - if ((bufA == NULL) || (bufX == NULL) || (bufY == NULL)) { - // Skip the test, the most probable reason is - // matrix too big for a device. - - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xHBMV routine... "; - - err = (cl_int)clMath::clblas::hbmv(params->order, params->uplo, params->N, params->K, - alpha, bufA, params->offA, params->lda, bufX, params->offBX, params->incx, - beta, bufY, params->offCY, params->incy, - params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::GBMV() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufY, CL_TRUE, 0, - (lengthY + params->offCY) * sizeof(*clblasY), clblasY, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "GBMV: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufA, bufX, bufY); - compareMatrices<T>(clblasColumnMajor, lengthY , 1, (blasY + params->offCY), (clblasY + params->offCY), - lengthY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; -} - -// Instantiate the test -TEST_P(HBMV, chbmv) { - TestParams params; - - getParams(¶ms); - hbmvCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(HBMV, zhbmv) { - TestParams params; - - getParams(¶ms); - hbmvCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-hemm.cpp b/external/clBLAS/src/tests/correctness/corr-hemm.cpp deleted file mode 100644 index feb5f2ab..00000000 --- a/external/clBLAS/src/tests/correctness/corr-hemm.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <hemm.h> -#include<cltypes.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objB, cl_mem objC) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objB != NULL) - { - clReleaseMemObject(objB); - } - if(objC != NULL) - { - clReleaseMemObject(objC); -} -} - -template <typename T> static void -deleteBuffers(T *A, T *B, T *C, T *backC) -{ - if(A != NULL) - { - delete[] A; - } - if(B != NULL) - { - delete[] B; - } - if(C != NULL) - { - delete[] C; - } - if(backC != NULL) - { - delete[] backC;// To hold the original C -} -} - -template <typename T> -void -hemmCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *B, *C, *backC; - T alpha_, beta_; - cl_mem bufA, bufB, bufC; - clMath::BlasBase *base; - cl_event *events; - size_t ka, kbc; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - events = new cl_event[params->numCommandQueues]; - if (events == NULL) - { - std::cerr << ">> WARNING: Unable to allocate memory for events" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - if( params->side == clblasLeft ) - ka = params->M; - else ka = params->N; - - if( params->order == clblasColumnMajor ) - kbc = params->N; - else kbc = params->M; - - size_t lengthA = ka * params->lda; - size_t lengthB = kbc * params->ldb; - size_t lengthC = kbc * params->ldc; - - alpha_ = convertMultiplier<T>(params->alpha); - beta_ = convertMultiplier<T>(params->beta); - - - A = new T[ lengthA + params->offA ]; - B = new T[ lengthB + params->offBX ]; - C = new T[ lengthC + params->offCY ]; - backC = new T[ lengthC + params->offCY ]; - - if((A == NULL) || (B == NULL) || (C == NULL) || (backC == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(A, B, C, backC); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - - ::std::cerr << "Generating input data... " << std::endl; - - int creationFlags = 0, AcreationFlags; - creationFlags = creationFlags | RANDOM_INIT; - creationFlags = ( (params-> order) == clblasRowMajor)? (creationFlags | ROW_MAJOR_ORDER) : (creationFlags); - AcreationFlags = ( (params-> uplo) == clblasLower)? (creationFlags | LOWER_HALF_ONLY) : (creationFlags | UPPER_HALF_ONLY); - BlasRoutineID BlasFn = CLBLAS_HEMM; - - populate( A + params->offA , ka, ka, params-> lda, BlasFn, AcreationFlags); - populate( B + params->offBX , params-> M, params-> N, params-> ldb, BlasFn, creationFlags); - populate( C + params->offCY , params-> M, params-> N, params-> ldc, BlasFn, creationFlags); - - memcpy(backC, C, (lengthC + params->offCY) * sizeof(T)); - //printMatrixBlock( params->order, 0, 0, params->M, params->N, params->ldc, backC); - - // Allocate buffers - bufA = base->createEnqueueBuffer(A, (lengthA + params->offA) * sizeof(T), 0, CL_MEM_READ_ONLY); - bufB = base->createEnqueueBuffer(B, (lengthB + params->offBX) * sizeof(T), 0, CL_MEM_READ_ONLY); - bufC = base->createEnqueueBuffer(backC, (lengthC + params->offCY) * sizeof(T), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Done" << ::std::endl; - ::std::cerr << "Calling reference xHEMM routine... "; - - clblasOrder fOrder; - clblasUplo fUplo; - clblasSide fSide; - size_t fN, fM; - - fOrder = params->order; - fUplo = params->uplo; - fSide = params->side; - fM = params->M; - fN = params->N; - - if (fOrder != clblasColumnMajor) { - - fOrder = clblasColumnMajor; - fM = params->N; - fN = params->M; - fSide = (params->side == clblasLeft)? clblasRight: clblasLeft; - fUplo = (params->uplo == clblasUpper)? clblasLower: clblasUpper; - } - - // Call reference blas routine - clMath::blas::hemm(fOrder, fSide, fUplo, fM, fN, alpha_, - A, params->offA, params->lda, B, params->offBX, params->ldb, beta_, C, params->offCY, params->ldc); - ::std::cerr << "Done" << ::std::endl; - - if ((bufA == NULL) || (bufB == NULL) || (bufC == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, C, backC); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xHEMM routine... "; - - err = (cl_int)::clMath::clblas::hemm( params->order, params->side, params->uplo, params->M, params->N, alpha_, - bufA, params->offA, params->lda, bufB, params->offBX, params->ldb, beta_, bufC, params->offCY, params->ldc, - params->numCommandQueues, base->commandQueues(), 0, NULL, events ); - - if (err != CL_SUCCESS) { - - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, C, backC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::HEMM() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, C, backC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufC, CL_TRUE, 0, - (lengthC + params->offCY) * sizeof(T), backC, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "WARNING: corr-hemm: Erorr reading buffer..." << err << ::std::endl; - } - //printMatrixBlock( params->order, 0, 0, params->M, params->N, params->ldc, backC); - - releaseMemObjects(bufA, bufB, bufC); - - // handle lda correctly based on row-major/col-major.. - compareMatrices<T>(params->order, params->M , params->N, (C + params->offCY), (backC + params->offCY), params->ldc); - deleteBuffers<T>(A, B, C, backC); - delete[] events; -} - -// Instantiate the test - -TEST_P(HEMM, chemm) { - TestParams params; - - getParams(¶ms); - hemmCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(HEMM, zhemm) { - TestParams params; - - getParams(¶ms); - hemmCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-hemv.cpp b/external/clBLAS/src/tests/correctness/corr-hemv.cpp deleted file mode 100644 index 41bcb62b..00000000 --- a/external/clBLAS/src/tests/correctness/corr-hemv.cpp +++ /dev/null @@ -1,256 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <hemv.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX, cl_mem objY) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objY != NULL) - { - clReleaseMemObject(objY); -} -} - -template <typename T> static void -deleteBuffers(T *A, T *X, T *blasY, T *clblasY) -{ - if(A != NULL) - { - delete[] A; - } - if(X != NULL) - { - delete[] X; - } - if(blasY != NULL) - { - delete[] blasY; - } - if(clblasY != NULL) - { - delete[] clblasY; // To hold clblas HEMV call results -} -} -/* -template <typename T> static -void printVector(T *data, size_t length) -{ - for(int i =0; i < length; i ++) - { - printf("(%20f, %20f)\n", data[i].s[0], data[i].s[1]); - } -} -*/ -template <typename T> -void -hemvCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *X, *blasY, *clblasY; - cl_mem bufA, bufX, bufY; - clMath::BlasBase *base; - cl_event *events; - T alpha, beta; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthA = params->N * params->lda; - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - size_t lengthY = (1 + ((params->N -1) * abs(params->incy))); - - A = new T[lengthA + params->offA ]; - X = new T[lengthX + params->offBX ]; - blasY = new T[lengthY + params->offCY ]; - clblasY = new T[lengthY + params->offCY ]; - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - if((A == NULL) || (X == NULL) || (blasY == NULL) || (clblasY == NULL)) - { - deleteBuffers<T>(A, X, blasY, clblasY); - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - delete[] events; - SUCCEED(); - return; - } - - alpha = convertMultiplier<T>(params->alpha); - beta = convertMultiplier<T>(params->beta); -// beta.s[0] = 0.0f; -// beta.s[1] = 0.0f; - - randomHemvMatrices(params->order, params->uplo, params->N, true, &alpha, (A + params->offA), params->lda, - (X + params->offBX), params->incx, true, &beta, (blasY + params->offCY), params->incy); - // Copy blasY to clblasY - memcpy(clblasY, blasY, (lengthY + params->offCY)* sizeof(*blasY)); - ::std::cerr << "Done" << ::std::endl; - /* - printf("\n\n before acml call\nA\n"); - printMatrixBlock( params->order, 0, 0, params->N, params->N, params->lda, A+params->offA); - printf("\nX\n"); - printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, X+params->offBX); - printf("\nY\n"); - printMatrixBlock( clblasColumnMajor, 0, 0, lengthY, 1, lengthY, blasY+params->offCY); - printf("\nY\n"); - printMatrixBlock( clblasColumnMajor, 0, 0, lengthY, 1, lengthY, clblasY + params->offCY); - */ - // Allocate buffers - bufA = base->createEnqueueBuffer(A, (lengthA + params->offA)* sizeof(*A), 0, CL_MEM_READ_ONLY); - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX)* sizeof(*X), 0, CL_MEM_READ_ONLY); - bufY = base->createEnqueueBuffer(clblasY, (lengthY + params->offCY) * sizeof(*clblasY), 0, CL_MEM_READ_WRITE); - - //printData( "bufX", blasX, lengthX, 1, lengthX); - //printData( "clblasX", clblasX, lengthX, 1, lengthX); - - ::std::cerr << "Calling reference xHEMV routine... "; - - clblasOrder order; - clblasUplo fUplo; - - order = params->order; - fUplo = params->uplo; - - if (order != clblasColumnMajor) - { - order = clblasColumnMajor; - fUplo = (params->uplo == clblasUpper)? clblasLower : clblasUpper; - doConjugate( (A + params->offA), params->N, params->N, params->lda ); - } - ::clMath::blas::hemv( order, fUplo, params->N, alpha, A, params->offA, params->lda, X, params->offBX, params->incx, - beta, blasY, params->offCY, params->incy); - ::std::cerr << "Done" << ::std::endl; - /* - printf("\n\n after acml call\n"); - printf("\nY\n"); - printMatrixBlock( clblasColumnMajor, 0, 0, lengthY, 1, lengthY, blasY+params->offCY); - printf("Y in different format\n"); - printVector(blasY+params->offCY, lengthY); - */ - if ((bufA == NULL) || (bufX == NULL) || (bufY == NULL)) { - // Skip the test, the most probable reason is - // matrix too big for a device. - - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xHEMV routine... "; - - err = (cl_int)::clMath::clblas::hemv(params->order, params->uplo, params->N, alpha, bufA, - params->offA, params->lda, bufX, params->offBX, params->incx, beta, bufY, params->offCY, params->incy, - params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::HEMV() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufY, CL_TRUE, 0, - (lengthY + params->offCY) * sizeof(*clblasY), clblasY, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "HEMV: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufA, bufX, bufY); - /* - printf("\n\n after our call\n"); - printf("\nY\n"); - printMatrixBlock( clblasColumnMajor, 0, 0, lengthY, 1, lengthY, clblasY+params->offCY); - printf("Y in different format\n"); - printVector(clblasY+params->offCY, lengthY); - */ - compareMatrices<T>(clblasColumnMajor, lengthY , 1, (blasY + params->offCY), (clblasY + params->offCY), - lengthY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; -} - -// Instantiate the test - -TEST_P(HEMV, chemv) { - TestParams params; - - getParams(¶ms); - hemvCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(HEMV, zhemv) { - TestParams params; - - getParams(¶ms); - hemvCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-her.cpp b/external/clBLAS/src/tests/correctness/corr-her.cpp deleted file mode 100644 index 34b57994..00000000 --- a/external/clBLAS/src/tests/correctness/corr-her.cpp +++ /dev/null @@ -1,210 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <her.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX) -{ - - if( objA!=NULL) - clReleaseMemObject(objA); - if( objX!=NULL) - clReleaseMemObject(objX); -} - -template <typename T> static void -deleteBuffers(T *A, T *X, T *backA) -{ - if(A != NULL) - { - delete[] A; - } - if(X != NULL) - { - delete[] X; - } - if(backA != NULL) - { - delete[] backA; -} -} - -template <typename T> -void -herCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *X, *backA; - T alpha_; - cl_mem bufA, bufX; - clMath::BlasBase *base; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthA = params->N * params->lda; - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - alpha_ = convertMultiplier<T>(params->alpha); - - A = new T[lengthA + params->offa ]; - backA = new T[lengthA + params->offa ]; - X = new T[lengthX + params->offBX ]; - - if((A == NULL) || (backA == NULL) || (X == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(A, X, backA); - delete[] events; - SUCCEED(); - return; - } - srand(params->seed); - - ::std::cerr << "Generating input data... "; - randomHerMatrices( params->order, params->uplo, params->N, &alpha_, (A + params->offa), params->lda, (X + params->offBX), params->incx ); - memcpy(backA, A, (lengthA + params->offa)* sizeof(*A)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufA = base->createEnqueueBuffer(A, (lengthA + params->offa) * sizeof(*A), 0, CL_MEM_READ_WRITE); - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX) * sizeof(*X), 0, CL_MEM_READ_ONLY); - - ::std::cerr << "Calling reference xHER routine... "; - - clblasOrder fOrder; - clblasUplo fUplo; - fOrder = params->order; - fUplo = params->uplo; - - if (fOrder != clblasColumnMajor) { - - doConjugate( (X + params->offBX), (1 + (params->N-1) * abs(params->incx)), 1, 1 ); - fOrder = clblasColumnMajor; - fUplo = (fUplo == clblasLower)? clblasUpper : clblasLower; - } - clMath::blas::her( fOrder, fUplo, params->N, CREAL(alpha_), X , params->offBX, params->incx, A, params->offa, params->lda ); - ::std::cerr << "Done" << ::std::endl; - - if ((bufA == NULL) || (bufX == NULL) ) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufX); - deleteBuffers<T>(backA, A, X); - delete[] events; - if(bufA == NULL) - { - ::std::cerr << "BufA is null, lengthA is " << lengthA << ::std::endl; - } - if(bufX == NULL) - { - ::std::cerr << "BufX is null, lengthX is " << lengthX << ::std::endl; - } - - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xHER routine... "; - - err = (cl_int)::clMath::clblas::her( params->order, params->uplo, params->N, CREAL(alpha_), - bufX, params->offBX, params->incx, bufA, params->offa, params->lda, - params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX); - deleteBuffers<T>(backA, A, X); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::HER() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX); - deleteBuffers<T>(backA, A, X); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufA, CL_TRUE, 0, - (lengthA + params->offa) * sizeof(*A), backA, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "HER: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufA, bufX); - - printf("Comparing the results\n"); - compareMatrices<T>(params->order, params->N , params->N, (A + params->offa), (backA + params->offa), - params->lda); - - deleteBuffers<T>( A, backA, X); - delete[] events; -} - -// Instantiate the test - -TEST_P(HER, cher) { - TestParams params; - - getParams(¶ms); - herCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(HER, zher) { - TestParams params; - - getParams(¶ms); - herCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-her2.cpp b/external/clBLAS/src/tests/correctness/corr-her2.cpp deleted file mode 100644 index 5d18e1d7..00000000 --- a/external/clBLAS/src/tests/correctness/corr-her2.cpp +++ /dev/null @@ -1,224 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <her2.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX, cl_mem objY) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objY != NULL) - { - clReleaseMemObject(objY); -} -} - -template <typename T> static void -deleteBuffers(T *blasA, T *clblasA, T *X, T *Y) -{ - if(blasA != NULL) - { - delete[] blasA; - } - if(clblasA != NULL) - { - delete[] clblasA; - } - if(X != NULL) - { - delete[] X; - } - if(Y != NULL) - { - delete[] Y; -} -} - -template <typename T> -void -her2CorrectnessTest(TestParams *params) -{ - cl_int err; - T *blasA, *clblasA, *X, *Y; - cl_mem bufA, bufX, bufY; - clMath::BlasBase *base; - cl_event *events; - T alpha; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double2)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthA = params->N * params->lda; - size_t lengthX = (1 + ((params->N - 1) * abs(params->incx))); - size_t lengthY = (1 + ((params->N - 1) * abs(params->incy))); - - blasA = new T[lengthA + params->offa ]; - clblasA = new T[lengthA + params->offa ]; - X = new T[lengthX + params->offBX ]; - Y = new T[lengthY + params->offCY ]; - - srand(params->seed); - - if((blasA == NULL) || (clblasA == NULL) || (X == NULL) || (Y == NULL)) - { - deleteBuffers<T>(blasA, clblasA, X, Y); - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - delete[] events; - SUCCEED(); - return; - } - alpha = convertMultiplier<T>(params->alpha); - - ::std::cerr << "Generating input data... "; - - randomHer2Matrices<T>(params->order, params->uplo, params->N, &alpha, (blasA + params->offa), params->lda, - (X + params->offBX), params->incx, (Y + params->offCY), params->incy); - - // Copy blasA to clblasA - memcpy(clblasA, blasA, (lengthA + params->offa)* sizeof(*blasA)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufA = base->createEnqueueBuffer(clblasA, (lengthA + params->offa)* sizeof(*clblasA), 0,CL_MEM_READ_WRITE); - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX)* sizeof(*X), 0, CL_MEM_READ_ONLY); - bufY = base->createEnqueueBuffer(Y, (lengthY + params->offCY)* sizeof(*Y), 0, CL_MEM_READ_ONLY); - - ::std::cerr << "Calling reference xHER2 routine... "; - - clblasOrder order; - clblasUplo fUplo; - order = params->order; - fUplo = params->uplo; - - if (order != clblasColumnMajor) - { - doConjugate( (X + params->offBX), 1, (1 + (params->N-1) * abs(params->incx)), (1 + (params->N-1) * abs(params->incx)) ); - doConjugate( (Y + params->offCY), 1, (1 + (params->N-1) * abs(params->incy)), (1 + (params->N-1) * abs(params->incy)) ); - order = clblasColumnMajor; - fUplo = (fUplo == clblasLower)? clblasUpper : clblasLower; - ::clMath::blas::her2( order, fUplo, params->N, alpha, Y, params->offCY, params->incy, X, params->offBX, params->incx, blasA, params->offa, params->lda); - } - else { - ::clMath::blas::her2( order, fUplo, params->N, alpha, X, params->offBX, params->incx, Y, params->offCY, params->incy, blasA, params->offa, params->lda); - } - ::std::cerr << "Done" << ::std::endl; - - if ((bufA == NULL) || (bufX == NULL) || (bufY == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(blasA, clblasA, X, Y); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xHER2 routine... "; - - err = (cl_int)::clMath::clblas::her2( params->order, params->uplo, params->N, alpha, - bufX, params->offBX, params->incx, bufY, params->offCY, params->incy, bufA, params->offa, params->lda, - params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(blasA, clblasA, X, Y); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::HER2() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(blasA, clblasA, X, Y); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufA, CL_TRUE, 0, - (lengthA + params->offa) * sizeof(*clblasA), clblasA, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "HER2: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufA, bufX, bufY); - - compareMatrices<T>(params->order, params->N , params->N, (blasA + params->offa), (clblasA + params->offa), - params->lda); - - deleteBuffers<T>(blasA, clblasA, X, Y); - delete[] events; -} - -// Instantiate the test - -TEST_P(HER2, cher2) { - TestParams params; - - getParams(¶ms); - her2CorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(HER2, zher2) { - TestParams params; - - getParams(¶ms); - her2CorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-her2k.cpp b/external/clBLAS/src/tests/correctness/corr-her2k.cpp deleted file mode 100644 index d7db83af..00000000 --- a/external/clBLAS/src/tests/correctness/corr-her2k.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <her2k.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objC, cl_mem objB) -{ - if(objA != NULL) - clReleaseMemObject(objA); - if(objC != NULL) - clReleaseMemObject(objC); - if(objB != NULL) - clReleaseMemObject(objB); -} - -template <typename T> static void -deleteBuffers(T *A, T *B, T *blasC, T *clblasC) -{ - if(A != NULL) - delete[] A; - if(B != NULL) - delete[] B; - if(blasC != NULL) - delete[] blasC; - if(clblasC != NULL) - delete[] clblasC; -} - -template <typename T> -void -her2kCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *B, *blasC, *clblasC; - T alpha, beta; - cl_mem bufA, bufC, bufB; - clMath::BlasBase *base; - cl_event *events; - - if (params->transA == clblasTrans) { - ::std::cerr << ">> her2k(TRANSPOSE) for complex numbers " - "is not allowed." << ::std::endl << - ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - A = new T[params->rowsA * params->columnsA]; - B = new T[params->rowsB * params->columnsB]; - blasC = new T[params->rowsC * params->columnsC]; - clblasC = new T[params->rowsC * params->columnsC]; - - if((A == NULL) || (B == NULL) || (blasC == NULL) || (clblasC == NULL)) - { - deleteBuffers<T>(A, B, blasC, clblasC); - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - - alpha = convertMultiplier<T>(params->alpha); - beta = convertMultiplier<T>(params->beta); - - ::std::cerr << "Generating input data... "; - - clblasTranspose ftransB = (params->transA==clblasNoTrans)? clblasConjTrans: clblasNoTrans; - - randomGemmMatrices<T>(params->order, params->transA, ftransB, - params->N, params->N, params->K, true, &alpha, A, params->lda, - B, params->ldb, true, &beta, blasC, params->ldc); - - memcpy(clblasC, blasC, params->rowsC * params->columnsC * sizeof(*blasC)); - ::std::cerr << "Done" << ::std::endl; - - bufA = base->createEnqueueBuffer(A, params->rowsA * params->columnsA * sizeof(*A), params->offA * sizeof(*A), - CL_MEM_READ_ONLY); - bufB = base->createEnqueueBuffer(B, params->rowsB * params->columnsB * sizeof(*B), params->offBX * sizeof(*B), - CL_MEM_READ_ONLY); - bufC = base->createEnqueueBuffer(clblasC, params->rowsC * params->columnsC * sizeof(*clblasC), - params->offCY * sizeof(*clblasC), - CL_MEM_READ_WRITE); - - if ((bufA == NULL) || (bufB == NULL)|| (bufC == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling reference xHER2K routine... "; - T fAlpha = alpha; - if (params->order == clblasColumnMajor) { - ::clMath::blas::her2k(clblasColumnMajor, params->uplo, params->transA, - params->N, params->K, fAlpha, A, 0, params->lda, B, 0, params->ldb, - CREAL(beta), blasC, 0, params->ldc); - } - else { - - CIMAG( fAlpha ) *= -1.0; // According to netlib C- interface - clblasTranspose fTransA = (params->transA == clblasNoTrans) ? clblasConjTrans : clblasNoTrans; - clblasUplo fUplo = (params->uplo == clblasUpper) ? clblasLower : clblasUpper; - - ::clMath::blas::her2k(clblasColumnMajor, fUplo, fTransA, params->N, params->K, fAlpha, - A, 0, params->lda, B, 0, params->ldb, CREAL(beta), blasC, 0, params->ldc); - - } - ::std::cerr << "Done" << ::std::endl; - - ::std::cerr << "Calling clblas xHER2K routine... "; - err = (cl_int)::clMath::clblas::her2k(params->order, params->uplo, - params->transA, params->N, params->K, - alpha, bufA, params->offA, params->lda, bufB, params->offBX, params->ldb, - CREAL(beta), bufC, params->offCY, - params->ldc, params->numCommandQueues, - base->commandQueues(), 0, NULL, - events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::HER2K() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufC, CL_TRUE, params->offCY * sizeof(*clblasC), - params->rowsC * params->columnsC * sizeof(*clblasC), clblasC, 0, NULL, NULL); - - releaseMemObjects(bufA, bufB, bufC); - compareMatrices<T>(params->order, params->N, params->N, blasC, clblasC, params->ldc); - - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; -} - -// Instantiate the test - -TEST_P(HER2K, cher2k) { - TestParams params; - - getParams(¶ms); - her2kCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(HER2K, zher2k) { - TestParams params; - - getParams(¶ms); - her2kCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-herk.cpp b/external/clBLAS/src/tests/correctness/corr-herk.cpp deleted file mode 100644 index 2b5d8ab0..00000000 --- a/external/clBLAS/src/tests/correctness/corr-herk.cpp +++ /dev/null @@ -1,240 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <herk.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objC) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objC != NULL) - { - clReleaseMemObject(objC); - } -} - -template <typename T> static void -deleteBuffers(T *A, T *blasC, T *clblasC) -{ - if(A != NULL) - { - delete[] A; - } - if(blasC != NULL) - { - delete[] blasC; - } - if(clblasC != NULL) - { - delete[] clblasC; - } -} - -template <typename T> -void -herkCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *blasC, *clblasC; - T alpha, beta; - cl_mem bufA, bufC; - clMath::BlasBase *base; - bool useAlpha; - bool useBeta; - cl_event *events; - - if (params->transA == clblasTrans) { - ::std::cerr << ">> herk(TRANSPOSE) for complex numbers " - "is not allowed." << ::std::endl << - ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - base = clMath::BlasBase::getInstance(); - alpha = ZERO<T>(); - beta = ZERO<T>(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - A = new T[params->rowsA * params->columnsA]; - blasC = new T[params->rowsC * params->columnsC]; - clblasC = new T[params->rowsC * params->columnsC]; - - if((A == NULL) || (blasC == NULL) || (clblasC == NULL)) - { - deleteBuffers<T>(A, blasC, clblasC); - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - - useAlpha = true; - useBeta = true; - - alpha = convertMultiplier<T>(params->alpha); - beta = convertMultiplier<T>(params->beta); - - ::std::cerr << "Generating input data... "; - - randomGemmMatrices<T>(params->order, params->transA, clblasNoTrans, - params->N, params->N, params->K, useAlpha, &alpha, A, params->lda, - NULL, 0, useBeta, &beta, blasC, params->ldc); - memcpy(clblasC, blasC, params->rowsC * params->columnsC * sizeof(*blasC)); - ::std::cerr << "Done" << ::std::endl; - - ::std::cerr << "Calling reference xHERK routine... "; - if (params->order == clblasColumnMajor) { - ::clMath::blas::herk(clblasColumnMajor, params->uplo, params->transA, - params->N, params->K, CREAL(alpha), A, params->lda, - CREAL(beta), blasC, params->ldc); - } - else { - /* - T *reorderedA = new T[params->rowsA * params->columnsA]; - T *reorderedC = new T[params->rowsC * params->columnsC]; - - reorderMatrix<T>(clblasRowMajor, params->rowsA, params->columnsA, - A, reorderedA); - reorderMatrix<T>(clblasRowMajor, params->rowsC, params->columnsC, - blasC, reorderedC); - ::clMath::blas::herk(clblasColumnMajor, params->uplo, params->transA, - params->N, params->K, CREAL(alpha), reorderedA, - params->rowsA, - CREAL(beta), reorderedC, params->rowsC); - reorderMatrix<T>(clblasColumnMajor, params->rowsC, params->columnsC, - reorderedC, blasC); - - delete[] reorderedC; - delete[] reorderedA; - */ - clblasTranspose fTransA = (params->transA == clblasNoTrans) ? clblasConjTrans : clblasNoTrans; - clblasUplo fUplo = (params->uplo == clblasUpper) ? clblasLower : clblasUpper; - - ::clMath::blas::herk(clblasColumnMajor, fUplo, fTransA, params->N, params->K, CREAL(alpha), - A, params->lda, CREAL(beta), blasC, params->ldc); - - } - ::std::cerr << "Done" << ::std::endl; - - bufA = base->createEnqueueBuffer(A, params->rowsA * params->columnsA * - sizeof(*A), params->offA * sizeof(*A), - CL_MEM_READ_ONLY); - bufC = base->createEnqueueBuffer(clblasC, params->rowsC * params->columnsC * - sizeof(*clblasC), - params->offCY * sizeof(*clblasC), - CL_MEM_READ_WRITE); - - if ((bufA == NULL) || (bufC == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufC); - deleteBuffers<T>(A, blasC, clblasC); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xHERK routine... "; - err = (cl_int)::clMath::clblas::herk(params->order, params->uplo, - params->transA, params->N, params->K, - CREAL(alpha), bufA, params->offA, params->lda, - CREAL(beta), bufC, params->offCY, - params->ldc, params->numCommandQueues, - base->commandQueues(), 0, NULL, - events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufC); - deleteBuffers<T>(A, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::HERK() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufC); - deleteBuffers<T>(A, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufC, CL_TRUE, - params->offCY * sizeof(*clblasC), - params->rowsC * params->columnsC * sizeof(*clblasC), - clblasC, 0, NULL, NULL); - - releaseMemObjects(bufA, bufC); - compareMatrices<T>(params->order, params->N, params->N, blasC, clblasC, - params->ldc); - - deleteBuffers<T>(A, blasC, clblasC); - delete[] events; -} - -// Instantiate the test - -TEST_P(HERK, cherk) { - TestParams params; - - getParams(¶ms); - herkCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(HERK, zherk) { - TestParams params; - - getParams(¶ms); - herkCorrectnessTest<DoubleComplex>(¶ms); -} - diff --git a/external/clBLAS/src/tests/correctness/corr-hpmv.cpp b/external/clBLAS/src/tests/correctness/corr-hpmv.cpp deleted file mode 100644 index 95317741..00000000 --- a/external/clBLAS/src/tests/correctness/corr-hpmv.cpp +++ /dev/null @@ -1,221 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <hpmv.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX, cl_mem objY) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objY != NULL) - { - clReleaseMemObject(objY); - } -} - -template <typename T> static void -deleteBuffers(T *A, T *X, T *blasY, T *clblasY) -{ - if(A != NULL) - { - delete[] A; - } - if(X != NULL) - { - delete[] X; - } - if(blasY != NULL) - { - delete[] blasY; - } - if(clblasY != NULL) - { - delete[] clblasY; // To hold clblas HPMV call results - } -} - -template <typename T> -void -hpmvCorrectnessTest(TestParams *params) -{ - cl_int err; - T *AP, *X, *blasY, *clblasY; - cl_mem bufAP, bufX, bufY; - clMath::BlasBase *base; - cl_event *events; - T alpha, beta; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthA = (params->N * (params->N + 1)) / 2; - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - size_t lengthY = (1 + ((params->N -1) * abs(params->incy))); - - AP = new T[lengthA + params->offA ]; - X = new T[lengthX + params->offBX ]; - blasY = new T[lengthY + params->offCY ]; - clblasY = new T[lengthY + params->offCY ]; - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - if((AP == NULL) || (X == NULL) || (blasY == NULL) || (clblasY == NULL)) - { - deleteBuffers<T>(AP, X, blasY, clblasY); - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - delete[] events; - SUCCEED(); - return; - } - - alpha = convertMultiplier<T>(params->alpha); - beta = convertMultiplier<T>(params->beta); - - randomHemvMatrices(params->order, params->uplo, params->N, true, &alpha, (AP + params->offA), params->lda, - (X + params->offBX), params->incx, true, &beta, (blasY + params->offCY), params->incy); - // Copy blasY to clblasY - memcpy(clblasY, blasY, (lengthY + params->offCY)* sizeof(*blasY)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufAP = base->createEnqueueBuffer(AP, (lengthA + params->offA)* sizeof(*AP), 0, CL_MEM_READ_ONLY); - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX)* sizeof(*X), 0, CL_MEM_READ_ONLY); - bufY = base->createEnqueueBuffer(clblasY, (lengthY + params->offCY) * sizeof(*clblasY), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xHPMV routine... "; - - clblasOrder order; - clblasUplo fUplo; - - order = params->order; - fUplo = params->uplo; - - if (order != clblasColumnMajor) - { - order = clblasColumnMajor; - fUplo = (params->uplo == clblasUpper)? clblasLower : clblasUpper; - doConjugate( (AP + params->offA), lengthA, 1, 1 ); - } - ::clMath::blas::hpmv( order, fUplo, params->N, alpha, AP, params->offA, X, params->offBX, params->incx, - beta, blasY, params->offCY, params->incy); - ::std::cerr << "Done" << ::std::endl; - - if ((bufAP == NULL) || (bufX == NULL) || (bufY == NULL)) { - // Skip the test, the most probable reason is - // matrix too big for a device. - - releaseMemObjects(bufAP, bufX, bufY); - deleteBuffers<T>(AP, X, blasY, clblasY); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xHPMV routine... "; - - err = (cl_int)::clMath::clblas::hpmv(params->order, params->uplo, params->N, alpha, bufAP, - params->offA, bufX, params->offBX, params->incx, beta, bufY, params->offCY, params->incy, - params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufAP, bufX, bufY); - deleteBuffers<T>(AP, X, blasY, clblasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::HPMV() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufAP, bufX, bufY); - deleteBuffers<T>(AP, X, blasY, clblasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufY, CL_TRUE, 0, - (lengthY + params->offCY) * sizeof(*clblasY), clblasY, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "HPMV: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufAP, bufX, bufY); - - compareMatrices<T>(clblasColumnMajor, lengthY , 1, (blasY + params->offCY), (clblasY + params->offCY), - lengthY); - deleteBuffers<T>(AP, X, blasY, clblasY); - delete[] events; -} - -// Instantiate the test - -TEST_P(HPMV, chpmv) { - TestParams params; - - getParams(¶ms); - hpmvCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(HPMV, zhpmv) { - TestParams params; - - getParams(¶ms); - hpmvCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-hpr.cpp b/external/clBLAS/src/tests/correctness/corr-hpr.cpp deleted file mode 100644 index 7a513c30..00000000 --- a/external/clBLAS/src/tests/correctness/corr-hpr.cpp +++ /dev/null @@ -1,209 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <hpr.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX) -{ - if( objA!=NULL) - clReleaseMemObject(objA); - if( objX!=NULL) - clReleaseMemObject(objX); -} - -template <typename T> static void -deleteBuffers(T *A, T *X, T *backA) -{ - if(A != NULL) - { - delete[] A; - } - if(X != NULL) - { - delete[] X; - } - if(backA != NULL) - { - delete[] backA; - } -} - -template <typename T> -void -hprCorrectnessTest(TestParams *params) -{ - cl_int err; - T *AP, *X, *backA; - T alpha_; - cl_mem bufAP, bufX; - clMath::BlasBase *base; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double2)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthAP = (params->N *( params->N + 1 ))/2 ; - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - alpha_ = convertMultiplier<T>(params->alpha); - - AP = new T[lengthAP + params->offa ]; - backA = new T[lengthAP + params->offa ]; - X = new T[lengthX + params->offBX ]; - - if((AP == NULL) || (backA == NULL) || (X == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(AP, X, backA); - delete[] events; - SUCCEED(); - return; - } - srand(params->seed); - - ::std::cerr << "Generating input data... "; - randomHerMatrices( params->order, params->uplo, params->N, &alpha_, (AP + params->offa), params->lda, (X + params->offBX), params->incx ); - memcpy(backA, AP, (lengthAP + params->offa)* sizeof(T)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufAP = base->createEnqueueBuffer(AP, (lengthAP + params->offa) * sizeof(T), 0, CL_MEM_READ_WRITE); - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX) * sizeof(*X), 0, CL_MEM_READ_ONLY); - - ::std::cerr << "Calling reference xHPR routine... "; - - clblasOrder fOrder; - clblasUplo fUplo; - fOrder = params->order; - fUplo = params->uplo; - - if (fOrder != clblasColumnMajor) { - - doConjugate( (X + params->offBX), (1 + (params->N-1) * abs(params->incx)), 1, 1 ); - fOrder = clblasColumnMajor; - fUplo = (fUplo == clblasLower)? clblasUpper : clblasLower; - } - clMath::blas::hpr( fOrder, fUplo, params->N, CREAL(alpha_), X , params->offBX, params->incx, AP, params->offa); - ::std::cerr << "Done" << ::std::endl; - - if ((bufAP == NULL) || (bufX == NULL) ) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufAP, bufX); - deleteBuffers<T>(backA, AP, X); - delete[] events; - if(bufAP == NULL) - { - ::std::cerr << "BufA is null, lengthA is " << lengthAP << ::std::endl; - } - if(bufX == NULL) - { - ::std::cerr << "BufX is null, lengthX is " << lengthX << ::std::endl; - } - - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xHPR routine... "; - - err = (cl_int)::clMath::clblas::hpr( params->order, params->uplo, params->N, CREAL(alpha_), - bufX, params->offBX, params->incx, bufAP, params->offa, - params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufAP, bufX); - deleteBuffers<T>(backA, AP, X); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::HPR() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufAP, bufX); - deleteBuffers<T>(backA, AP, X); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufAP, CL_TRUE, 0, - (lengthAP + params->offa) * sizeof(T), backA, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "HPR: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufAP, bufX); - - printf("Comparing the results\n"); - - compareMatrices<T>(clblasColumnMajor, lengthAP, 1, (AP + params->offa), (backA + params->offa), lengthAP); - - deleteBuffers<T>( AP, backA, X); - delete[] events; -} - -// Instantiate the test - -TEST_P(HPR, chpr) { - TestParams params; - - getParams(¶ms); - hprCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(HPR, zhpr) { - TestParams params; - - getParams(¶ms); - hprCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-hpr2.cpp b/external/clBLAS/src/tests/correctness/corr-hpr2.cpp deleted file mode 100644 index 313f167d..00000000 --- a/external/clBLAS/src/tests/correctness/corr-hpr2.cpp +++ /dev/null @@ -1,222 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <hpr2.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX, cl_mem objY) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objY != NULL) - { - clReleaseMemObject(objY); -} -} - -template <typename T> static void -deleteBuffers(T *blasA, T *clblasA, T *X, T *Y) -{ - if(blasA != NULL) - { - delete[] blasA; - } - if(clblasA != NULL) - { - delete[] clblasA; - } - if(X != NULL) - { - delete[] X; - } - if(Y != NULL) - { - delete[] Y; -} -} - -template <typename T> -void -hpr2CorrectnessTest(TestParams *params) -{ - cl_int err; - T *blasAP, *clblasAP, *X, *Y; - cl_mem bufAP, bufX, bufY; - clMath::BlasBase *base; - cl_event *events; - T alpha; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double2)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthAP = (params->N *( params->N + 1 ))/2 ; - size_t lengthX = (1 + ((params->N - 1) * abs(params->incx))); - size_t lengthY = (1 + ((params->N - 1) * abs(params->incy))); - - blasAP = new T[lengthAP + params->offa ]; - clblasAP = new T[lengthAP + params->offa ]; - X = new T[lengthX + params->offBX ]; - Y = new T[lengthY + params->offCY ]; - - srand(params->seed); - - if((blasAP == NULL) || (clblasAP == NULL) || (X == NULL) || (Y == NULL)) - { - deleteBuffers<T>(blasAP, clblasAP, X, Y); - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - delete[] events; - SUCCEED(); - return; - } - alpha = convertMultiplier<T>(params->alpha); - - ::std::cerr << "Generating input data... "; - randomHer2Matrices<T>(params->order, params->uplo, params->N, &alpha, (blasAP + params->offa), params->lda, - (X + params->offBX), params->incx, (Y + params->offCY), params->incy); - - // Copy blasA to clblasA - memcpy(clblasAP, blasAP, (lengthAP + params->offa)* sizeof(*blasAP)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufAP = base->createEnqueueBuffer(clblasAP, (lengthAP + params->offa)* sizeof(*clblasAP), 0,CL_MEM_READ_WRITE); - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX)* sizeof(*X), 0, CL_MEM_READ_ONLY); - bufY = base->createEnqueueBuffer(Y, (lengthY + params->offCY)* sizeof(*Y), 0, CL_MEM_READ_ONLY); - - ::std::cerr << "Calling reference xHPR2 routine... "; - - clblasOrder order; - clblasUplo fUplo; - order = params->order; - fUplo = params->uplo; - - if (order != clblasColumnMajor) - { - doConjugate( (X + params->offBX), 1, (1 + (params->N-1) * abs(params->incx)), (1 + (params->N-1) * abs(params->incx)) ); - doConjugate( (Y + params->offCY), 1, (1 + (params->N-1) * abs(params->incy)), (1 + (params->N-1) * abs(params->incy)) ); - order = clblasColumnMajor; - fUplo = (fUplo == clblasLower)? clblasUpper : clblasLower; - ::clMath::blas::hpr2( order, fUplo, params->N, alpha, Y, params->offCY, params->incy, X, params->offBX, params->incx, blasAP, params->offa); - } - else { - ::clMath::blas::hpr2( order, fUplo, params->N, alpha, X, params->offBX, params->incx, Y, params->offCY, params->incy, blasAP, params->offa); - } - ::std::cerr << "Done" << ::std::endl; - - if ((bufAP == NULL) || (bufX == NULL) || (bufY == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufAP, bufX, bufY); - deleteBuffers<T>(blasAP, clblasAP, X, Y); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xHPR2 routine... "; - - err = (cl_int)::clMath::clblas::hpr2( params->order, params->uplo, params->N, alpha, - bufX, params->offBX, params->incx, bufY, params->offCY, params->incy, bufAP, params->offa, - params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufAP, bufX, bufY); - deleteBuffers<T>(blasAP, clblasAP, X, Y); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::HPR2() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufAP, bufX, bufY); - deleteBuffers<T>(blasAP, clblasAP, X, Y); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufAP, CL_TRUE, 0, - (lengthAP + params->offa) * sizeof(*clblasAP), clblasAP, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "HPR2: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufAP, bufX, bufY); - - compareMatrices<T>(clblasColumnMajor, lengthAP, 1, (blasAP + params->offa), (clblasAP + params->offa), lengthAP); - - deleteBuffers<T>(blasAP, clblasAP, X, Y); - delete[] events; -} - -// Instantiate the test - -TEST_P(HPR2, chpr2) { - TestParams params; - - getParams(¶ms); - hpr2CorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(HPR2, zhpr2) { - TestParams params; - - getParams(¶ms); - hpr2CorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-iamax.cpp b/external/clBLAS/src/tests/correctness/corr-iamax.cpp deleted file mode 100644 index 81f2bd32..00000000 --- a/external/clBLAS/src/tests/correctness/corr-iamax.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <iamax.h> - -static void -releaseMemObjects(cl_mem objX, cl_mem objiAmax, cl_mem objScratch) -{ - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objiAmax != NULL) - { - clReleaseMemObject(objiAmax); - } - if(objScratch != NULL) - { - clReleaseMemObject(objScratch); - } -} - -template <typename T> static void -deleteBuffers(T *blasX, int *blasiAmax=NULL, int *clblasiAmax=NULL) -{ - if(blasX != NULL) - { - delete[] blasX; - } - if(clblasiAmax != NULL) - { - delete[] clblasiAmax; - } - if(blasiAmax != NULL) - { - delete(blasiAmax); - } -} - -template <typename T> -void -iamaxCorrectnessTest(TestParams *params) -{ - cl_int err; - T *blasX; - int *clblasiAmax, *blasiAmax; - cl_mem bufX, bufiAmax, scratchBuff; - clMath::BlasBase *base; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - - blasX = new T[lengthX + params->offBX ]; - blasiAmax = new int[1]; - clblasiAmax = new int[1 + params->offa]; - - if((blasX == NULL) || (clblasiAmax == NULL) || (blasiAmax == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(blasX, blasiAmax, clblasiAmax); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - ::std::cerr << "Generating input data... "; - - randomVectors<T>(params->N, (blasX + params->offBX), params->incx, NULL, 0); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufX = base->createEnqueueBuffer(blasX, (lengthX + params->offBX)* sizeof(T), 0, CL_MEM_READ_ONLY); - bufiAmax = base->createEnqueueBuffer(NULL, (1 + params->offa) * sizeof(int), 0, CL_MEM_READ_WRITE); - scratchBuff = base->createEnqueueBuffer(NULL, (2 * lengthX * sizeof(T)), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xiAMAX routine... "; - - *blasiAmax = ::clMath::blas::iamax( params->N, blasX, params->offBX, params->incx); - ::std::cerr << "Done" << ::std::endl; - - if ((bufX == NULL) || (bufiAmax == NULL) || (scratchBuff == NULL)) { - releaseMemObjects(bufX, bufiAmax, scratchBuff); - deleteBuffers<T>(blasX, blasiAmax, clblasiAmax); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xiAMAX routine... "; - - DataType type; - type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT : ( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: ( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT:TYPE_COMPLEX_DOUBLE; - - // Should use bufXTemp as well - err = (cl_int)::clMath::clblas::iamax( type, params->N, bufiAmax, params->offa, - bufX, params->offBX, params->incx, scratchBuff, - params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufiAmax, scratchBuff); - deleteBuffers<T>(blasX, blasiAmax, clblasiAmax); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::iAMAX() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufiAmax, scratchBuff); - deleteBuffers<T>(blasX, blasiAmax, clblasiAmax); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufiAmax, CL_TRUE, 0, - (1 + params->offa) * sizeof(*clblasiAmax), clblasiAmax, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "iAMAX: Reading results failed...." << std::endl; - } - - compareValues<int>((blasiAmax), (clblasiAmax+params->offa), 0); - releaseMemObjects(bufX, bufiAmax, scratchBuff); - deleteBuffers<T>(blasX, blasiAmax, clblasiAmax); - delete[] events; -} - -// Instantiate the test - -TEST_P(iAMAX, isamax) { - TestParams params; - - getParams(¶ms); - iamaxCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(iAMAX, idamax) { - TestParams params; - - getParams(¶ms); - iamaxCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(iAMAX, icamax) { - TestParams params; - - getParams(¶ms); - iamaxCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(iAMAX, izamax) { - TestParams params; - - getParams(¶ms); - iamaxCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-nrm2.cpp b/external/clBLAS/src/tests/correctness/corr-nrm2.cpp deleted file mode 100644 index 588ee825..00000000 --- a/external/clBLAS/src/tests/correctness/corr-nrm2.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <nrm2.h> -#include "delta.h" - -static void -releaseMemObjects(cl_mem objX, cl_mem objNrm2, cl_mem objScratch) -{ - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objNrm2 != NULL) - { - clReleaseMemObject(objNrm2); - } - if(objScratch != NULL) - { - clReleaseMemObject(objScratch); - } -} - -template <typename T> static void -deleteBuffers(T *blasX, T *blasNRM2=NULL, T *clblasNRM2=NULL) -{ - if(blasX != NULL) - { - delete[] blasX; - } - if(clblasNRM2 != NULL) - { - delete[] clblasNRM2; - } - if(blasNRM2 != NULL) - { - delete(blasNRM2); - } -} - -template <typename T1, typename T2> -void -nrm2CorrectnessTest(TestParams *params) -{ - cl_int err; - T1 *blasX; - T2 *clblasNRM2, *blasNRM2; - cl_mem bufX, bufNRM2, scratchBuff; - clMath::BlasBase *base; - cl_event *events; - cl_double deltaForType = 0.0; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T1) == typeid(cl_double) || - typeid(T1) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - - blasX = new T1[lengthX + params->offBX ]; - blasNRM2 = new T2[1]; - clblasNRM2 = new T2[1 + params->offa]; - - if((blasX == NULL) || (clblasNRM2 == NULL) || (blasNRM2 == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T1>(blasX); - deleteBuffers<T2>(blasNRM2, clblasNRM2); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - ::std::cerr << "Generating input data... "; - - randomVectors<T1>(params->N, (blasX + params->offBX), params->incx, (T1*)NULL, 0, true); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufX = base->createEnqueueBuffer(blasX, (lengthX + params->offBX)* sizeof(*blasX), 0, CL_MEM_READ_WRITE); - bufNRM2 = base->createEnqueueBuffer(NULL, (1 + params->offa) * sizeof(T2), 0, CL_MEM_READ_WRITE); - scratchBuff = base->createEnqueueBuffer(NULL, (lengthX * 2 * sizeof(T1)), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xNRM2 routine... "; - - *blasNRM2 = ::clMath::blas::nrm2( params->N, blasX, params->offBX, params->incx); - ::std::cerr << "Done" << ::std::endl; - - if ((bufX == NULL) || (bufNRM2 == NULL) || (scratchBuff == NULL)) { - releaseMemObjects(bufX, bufNRM2, scratchBuff); - deleteBuffers<T1>(blasX); - deleteBuffers<T2>(blasNRM2, clblasNRM2); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xNRM2 routine... "; - - DataType type; - type = ( typeid(T1) == typeid(cl_float))? TYPE_FLOAT : ( typeid(T1) == typeid(cl_double))? TYPE_DOUBLE: ( typeid(T1) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT:TYPE_COMPLEX_DOUBLE; - - err = (cl_int)::clMath::clblas::nrm2( type, params->N, bufNRM2, params->offa, bufX, - params->offBX, params->incx, scratchBuff, params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufNRM2, scratchBuff); - deleteBuffers<T1>(blasX); - deleteBuffers<T2>(blasNRM2, clblasNRM2); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::NRM2() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufNRM2, scratchBuff); - deleteBuffers<T1>(blasX); - deleteBuffers<T2>(blasNRM2, clblasNRM2); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufNRM2, CL_TRUE, 0, - (1 + params->offa) * sizeof(*clblasNRM2), clblasNRM2, 0, NULL, NULL); - if (err != CL_SUCCESS) { - ::std::cerr << "NRM2: Reading results failed...." << std::endl; - } - releaseMemObjects(bufX, bufNRM2, scratchBuff); - - deltaForType = DELTA_0<T1>(); - - // Since every element of X encounters a division, delta would be sum of deltas for every element in X - cl_double delta = 0; - for(unsigned int i=0; i<(params->N); i++) { - delta += deltaForType * returnMax<T1>(blasX[params->offBX + i]); - } - compareValues<T2>( (blasNRM2), (clblasNRM2+params->offa), delta); - - deleteBuffers<T1>(blasX); - deleteBuffers<T2>(blasNRM2, clblasNRM2); - delete[] events; -} - -// Instantiate the test - -TEST_P(NRM2, snrm2) { - TestParams params; - - getParams(¶ms); - nrm2CorrectnessTest<cl_float, cl_float>(¶ms); -} - -TEST_P(NRM2, dnrm2) { - TestParams params; - - getParams(¶ms); - nrm2CorrectnessTest<cl_double, cl_double>(¶ms); -} - -TEST_P(NRM2, scnrm2) { - TestParams params; - - getParams(¶ms); - nrm2CorrectnessTest<FloatComplex, cl_float>(¶ms); -} - -TEST_P(NRM2, dznrm2) { - TestParams params; - - getParams(¶ms); - nrm2CorrectnessTest<DoubleComplex, cl_double>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-rot.cpp b/external/clBLAS/src/tests/correctness/corr-rot.cpp deleted file mode 100644 index c9df97ac..00000000 --- a/external/clBLAS/src/tests/correctness/corr-rot.cpp +++ /dev/null @@ -1,234 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <rot.h> -#include <matrix.h> -//#include "delta.h" - -static void -releaseMemObjects(cl_mem bufX, cl_mem bufY) -{ - if(bufX != NULL) - { - clReleaseMemObject(bufX); - } - if(bufY != NULL) - { - clReleaseMemObject(bufY); - } -} - -template <typename T> static void -deleteBuffers(T *X, T *Y, T *back_X, T *back_Y) -{ - if(X != NULL) - { - delete[] X; - } - if(Y != NULL) - { - delete[] Y; - } - if(back_X != NULL) - { - delete[] back_X; - } - if(back_Y != NULL) - { - delete[] back_Y; - } -} - -template <typename T> -void -rotCorrectnessTest(TestParams *params) -{ - cl_int err; - T *X, *Y, *back_X, *back_Y; - T alpha, beta; - cl_mem bufX, bufY; - clMath::BlasBase *base; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) - { - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthx = 1 + (params->N - 1) * abs(params->incx); - size_t lengthy = 1 + (params->N - 1) * abs(params->incy); - - X = new T[lengthx + params->offa]; - Y = new T[lengthy + params->offb]; - - back_X = new T[lengthx + params->offa]; - back_Y = new T[lengthy + params->offb]; - - if((X == NULL) || (Y == NULL) || - (back_X == NULL) || (back_Y == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(X, Y, back_X, back_Y); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - //Filling random values for SA and SB. C & S are only for output sake - randomVectors(params->N, (X + params->offa), params->incx, (Y+params->offb), params->incy); - - alpha = convertMultiplier<T>(params->alpha); - beta = convertMultiplier<T>(params->beta); - - memcpy(back_X, X, (lengthx + params->offa) * sizeof(T)); - memcpy(back_Y, Y, (lengthy + params->offb) * sizeof(T)); - - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufX = base->createEnqueueBuffer(X, (lengthx + params->offa) * sizeof(T), 0, CL_MEM_READ_WRITE); - bufY = base->createEnqueueBuffer(Y, (lengthy + params->offb) * sizeof(T), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xROT routine... "; - - ::clMath::blas::rot(params->N, back_X, params->offa, params->incx, back_Y, params->offb, params->incy, - alpha, beta); - ::std::cerr << "Done" << ::std::endl; - - // Hold X vector - - if ((bufX == NULL) || (bufY == NULL)) - { - releaseMemObjects(bufX, bufY); - deleteBuffers(X, Y, back_X, back_Y); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xROT routine... "; - - - err = (cl_int)::clMath::clblas::rot( params->N, bufX, params->offa, params->incx, bufY, params->offb, params->incy, - alpha, beta, params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) - { - releaseMemObjects(bufX, bufY); - deleteBuffers(X, Y, back_X, back_Y); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::ROT() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) - { - releaseMemObjects(bufX, bufY); - deleteBuffers(X, Y, back_X, back_Y ); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufX, CL_TRUE, 0, - (lengthx + params->offa) * sizeof(T), X, 0, NULL, NULL); - - err |= clEnqueueReadBuffer(base->commandQueues()[0], bufY, CL_TRUE, 0, - (lengthy + params->offb) * sizeof(T), Y, 0, NULL, NULL); - - if (err != CL_SUCCESS) - { - ::std::cerr << "ROT: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufX, bufY); - - - compareMatrices<T>(clblasRowMajor, lengthx , 1, (back_X + params->offa), (X + params->offa), 1); - compareMatrices<T>(clblasRowMajor, lengthy , 1, (back_Y + params->offb), (Y + params->offb), 1); - - deleteBuffers<T>(X, Y, back_X, back_Y); - delete[] events; -} - -// Instantiate the test -TEST_P(ROT, srot) -{ - TestParams params; - - getParams(¶ms); - rotCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(ROT, drot) -{ - TestParams params; - - getParams(¶ms); - rotCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(ROT, csrot) -{ - TestParams params; - - getParams(¶ms); - rotCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(ROT, zdrot) -{ - TestParams params; - - getParams(¶ms); - rotCorrectnessTest<DoubleComplex>(¶ms); -} - - diff --git a/external/clBLAS/src/tests/correctness/corr-rotg.cpp b/external/clBLAS/src/tests/correctness/corr-rotg.cpp deleted file mode 100644 index 21ef905b..00000000 --- a/external/clBLAS/src/tests/correctness/corr-rotg.cpp +++ /dev/null @@ -1,288 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <rotg.h> -#include <matrix.h> -#include "delta.h" - -static void -releaseMemObjects(cl_mem bufSA, cl_mem bufSB, cl_mem bufC, cl_mem bufS) -{ - if(bufSA != NULL) - { - clReleaseMemObject(bufSA); - } - if(bufSB != NULL) - { - clReleaseMemObject(bufSB); - } - if(bufC != NULL) - { - clReleaseMemObject(bufC); - } - if(bufS != NULL) - { - clReleaseMemObject(bufS); - } -} - -template <typename T> static void -deleteBuffers(T *A, T *B, T *C=NULL, T *D=NULL, T *E=NULL, T *F=NULL) -{ - if(A != NULL) - { - delete[] A; - } - if(B != NULL) - { - delete[] B; - } - if(C != NULL) - { - delete[] C; - } - if(D != NULL) - { - delete[] D; - } - if(E != NULL) - { - delete[] E; - } - if(F != NULL) - { - delete[] F; - } -} - -// type T1 indicates the basic type, -// while T2 indicates type of buffer C. C is not complex for complex types -template <typename T1, typename T2> -void -rotgCorrectnessTest(TestParams *params) -{ - cl_int err; - T1 *SA, *SB, *S, *back_SA, *back_SB, *back_S; - T2 *C, *back_C; - cl_mem bufSA, bufSB, bufC, bufS; - clMath::BlasBase *base; - cl_event *events; - cl_double deltaForType = 0.0; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T1) == typeid(cl_double) || - typeid(T1) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) - { - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t length = 1;//only one element need to be accessed always - - SA = new T1[length + params->offBX ]; - SB = new T1[length + params->offCY ]; - C = new T2[length + params->offa ]; - S = new T1[length + params->offb ]; - - back_SA = new T1[length + params->offBX ]; - back_SB = new T1[length + params->offCY ]; - back_C = new T2[length + params->offa ]; - back_S = new T1[length + params->offb ]; - - if((SA == NULL) || (SB == NULL) || (C == NULL) || (S == NULL) || - (back_SA == NULL) || (back_SB == NULL) || (back_C == NULL) || (back_S == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T1>(SA, SB, S, back_SA, back_SB, back_S); - deleteBuffers<T2>(C, back_C); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - //Filling random values for SA and SB. C & S are only for output sake - randomVectors(1, (SA+params->offBX), 1, (SB+params->offCY), 1); - S[params->offb] = back_S[params->offb] = ZERO<T1>(); - C[params->offa] = back_C[params->offa] = ZERO<T2>(); - - back_SA[params->offBX] = SA[params->offBX]; - back_SB[params->offCY] = SB[params->offCY]; - ::std::cerr << "Done" << ::std::endl; - - //printing the inputs, as they change after processing - ::std::cerr << "A = "; - printElement<T1>(SA[params->offBX]); - ::std::cerr << "\tB = "; - printElement<T1>(SB[params->offCY]); - ::std::cerr << "\tC = "; - printElement<T2>(C[params->offa]); - ::std::cerr << "\tS = "; - printElement<T1>(S[params->offb]); - ::std::cout << std::endl << std::endl; - - // Allocate buffers - bufSA = base->createEnqueueBuffer(SA, (length + params->offBX) * sizeof(T1), 0, CL_MEM_READ_WRITE); - bufSB = base->createEnqueueBuffer(SB, (length + params->offCY) * sizeof(T1), 0, CL_MEM_READ_WRITE); - bufC = base->createEnqueueBuffer(C, (length + params->offa ) * sizeof(T2), 0, CL_MEM_WRITE_ONLY); - bufS = base->createEnqueueBuffer(S, (length + params->offb ) * sizeof(T1), 0, CL_MEM_WRITE_ONLY); - - ::std::cerr << "Calling reference xROTG routine... "; - - ::clMath::blas::rotg(back_SA, params->offBX, back_SB, params->offCY, back_C, params->offa, back_S, params->offb); - ::std::cerr << "Done" << ::std::endl; - - // Hold X vector - - if ((bufSA == NULL) || (bufSB == NULL) || (bufC == NULL) || (bufS == NULL)) - { - releaseMemObjects(bufSA, bufSB, bufC, bufS); - deleteBuffers<T1>(SA, SB, S, back_SA, back_SB, back_S); - deleteBuffers<T2>(C, back_C); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xROTG routine... "; - - DataType type; - type = ( typeid(T1) == typeid(cl_float)) ? TYPE_FLOAT : - ( typeid(T1) == typeid(cl_double)) ? TYPE_DOUBLE: - ( typeid(T1) == typeid(cl_float2)) ? TYPE_COMPLEX_FLOAT: - TYPE_COMPLEX_DOUBLE; - - err = (cl_int)::clMath::clblas::rotg( type, bufSA, params->offBX, bufSB, params->offCY, - bufC, params->offa, bufS, params->offb, - params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufSA, bufSB, bufC, bufS); - deleteBuffers<T1>(SA, SB, S, back_SA, back_SB, back_S); - deleteBuffers<T2>(C, back_C); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::ROTG() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufSA, bufSB, bufC, bufS); - deleteBuffers<T1>(SA, SB, S, back_SA, back_SB, back_S); - deleteBuffers<T2>(C, back_C); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufSA, CL_TRUE, 0, - (length + params->offBX) * sizeof(T1), SA, 0, NULL, NULL); - - err |= clEnqueueReadBuffer(base->commandQueues()[0], bufSB, CL_TRUE, 0, - (length + params->offCY) * sizeof(T1), SB, 0, NULL, NULL); - - err |= clEnqueueReadBuffer(base->commandQueues()[0], bufC, CL_TRUE, 0, - (length + params->offa) * sizeof(T2), C, 0, NULL, NULL); - - err |= clEnqueueReadBuffer(base->commandQueues()[0], bufS, CL_TRUE, 0, - (length + params->offb) * sizeof(T1), S, 0, NULL, NULL); - - if (err != CL_SUCCESS) - { - ::std::cerr << "ROTG: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufSA, bufSB, bufC, bufS); - - deltaForType = DELTA_0<T1>(); - cl_double delta; - - delta = deltaForType * returnMax<T1>(back_SA[params->offBX]); - compareValues<T1>( (back_SA + params->offBX), (SA + params->offBX), delta); - - delta = deltaForType * returnMax<T1>(back_SB[params->offCY]); - compareValues<T1>( (back_SB + params->offCY), (SB + params->offCY), delta); - - delta = deltaForType * returnMax<T2>(back_C[params->offa]); - compareValues<T2>( (back_C + params->offa), (C + params->offa), delta); - - delta = deltaForType * returnMax<T1>(back_S[params->offb]); - compareValues<T1>( (back_S + params->offb), (S + params->offb), delta); - - deleteBuffers<T1>(SA, SB, S, back_SA, back_SB, back_S); - deleteBuffers<T2>(C, back_C); - delete[] events; -} - -// Instantiate the test - -TEST_P(ROTG, srotg) { - TestParams params; - - getParams(¶ms); - rotgCorrectnessTest<cl_float, cl_float>(¶ms); -} - -TEST_P(ROTG, drotg) { - TestParams params; - - getParams(¶ms); - rotgCorrectnessTest<cl_double, cl_double>(¶ms); -} - -TEST_P(ROTG, crotg) { - TestParams params; - - getParams(¶ms); - rotgCorrectnessTest<FloatComplex, cl_float>(¶ms); -} - -TEST_P(ROTG, zrotg) { - TestParams params; - - getParams(¶ms); - rotgCorrectnessTest<DoubleComplex, cl_double>(¶ms); -} - diff --git a/external/clBLAS/src/tests/correctness/corr-rotm.cpp b/external/clBLAS/src/tests/correctness/corr-rotm.cpp deleted file mode 100644 index 4a1a02e2..00000000 --- a/external/clBLAS/src/tests/correctness/corr-rotm.cpp +++ /dev/null @@ -1,232 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <rotm.h> -#include <matrix.h> - -static void -releaseMemObjects(cl_mem bufX, cl_mem bufY, cl_mem bufParam) -{ - if(bufX != NULL) - { - clReleaseMemObject(bufX); - } - if(bufY != NULL) - { - clReleaseMemObject(bufY); - } - if(bufParam != NULL) - { - clReleaseMemObject(bufParam); - } -} - -template <typename T> static void -deleteBuffers(T *X, T *Y, T *PARAM, T *back_X, T *back_Y, T *back_PARAM) -{ - if(X != NULL) - { - delete[] X; - } - if(Y != NULL) - { - delete[] Y; - } - if(PARAM != NULL) - { - delete[] PARAM; - } - if(back_X != NULL) - { - delete[] back_X; - } - if(back_Y != NULL) - { - delete[] back_Y; - } - if(back_PARAM != NULL) - { - delete[] back_PARAM; - } -} - -template <typename T> -void -rotmCorrectnessTest(TestParams *params) -{ - cl_int err; - T *X, *Y, *back_X, *back_Y; - T *PARAM, *back_PARAM; - T sflagParam; - cl_mem bufX, bufY, bufParam; - clMath::BlasBase *base; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double)) && - !base->isDevSupportDoublePrecision()) - { - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthx = 1 + (params->N - 1) * abs(params->incx); - size_t lengthy = 1 + (params->N - 1) * abs(params->incy); - - X = new T[lengthx + params->offa]; - Y = new T[lengthy + params->offb]; - PARAM = new T[5 + params->offc]; //params always has 5 elements - - back_X = new T[lengthx + params->offa]; - back_Y = new T[lengthy + params->offb]; - back_PARAM = new T[5 + params->offc]; //params always has 5 elements - - if((X == NULL) || (Y == NULL) || (PARAM == NULL) || - (back_X == NULL) || (back_Y == NULL) || (back_PARAM == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(X, Y, PARAM, back_X, back_Y, back_PARAM); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - randomVectors(params->N, (X + params->offa), params->incx, (Y+params->offb), params->incy); - randomVectors(4, (PARAM + params->offc + 1), 1); //1st element is initialized separately - - sflagParam = convertMultiplier<T>(params->alpha); - PARAM[params->offc] = sflagParam; // initializing first element - - memcpy(back_X, X, (lengthx + params->offa)*sizeof(T)); - memcpy(back_Y, Y, (lengthy + params->offb)*sizeof(T)); - memcpy(back_PARAM, PARAM, (params->offc + 5)*sizeof(T)); - - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufX = base->createEnqueueBuffer(X, (lengthx + params->offa) * sizeof(T), 0, CL_MEM_READ_WRITE); - bufY = base->createEnqueueBuffer(Y, (lengthy + params->offb) * sizeof(T), 0, CL_MEM_READ_WRITE); - bufParam = base->createEnqueueBuffer(PARAM, (5 + params->offc) * sizeof(T), 0, CL_MEM_READ_ONLY); - - ::std::cerr << "Calling reference xROTM routine... "; - - ::clMath::blas::rotm(params->N, back_X, params->offa, params->incx, back_Y, params->offb, params->incy, - back_PARAM, params->offc); - ::std::cerr << "Done" << ::std::endl; - - if ((bufX == NULL) || (bufY == NULL) || (bufParam == NULL)) - { - releaseMemObjects(bufX, bufY, bufParam); - deleteBuffers(X, Y, PARAM, back_X, back_Y, back_PARAM); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xROTM routine... "; - - DataType type; - type = ( typeid(T) == typeid(cl_float)) ? TYPE_FLOAT : - TYPE_DOUBLE; - - err = (cl_int)::clMath::clblas::rotm( type, params->N, bufX, params->offa, params->incx, bufY, params->offb, params->incy, - bufParam, params->offc, params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) - { - releaseMemObjects(bufX, bufY, bufParam); - deleteBuffers(X, Y, PARAM, back_X, back_Y, back_PARAM); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::ROTM() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) - { - releaseMemObjects(bufX, bufY, bufParam); - deleteBuffers(X, Y, PARAM, back_X, back_Y, back_PARAM); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufX, CL_TRUE, 0, - (lengthx + params->offa) * sizeof(T), X, 0, NULL, NULL); - - err |= clEnqueueReadBuffer(base->commandQueues()[0], bufY, CL_TRUE, 0, - (lengthy + params->offb) * sizeof(T), Y, 0, NULL, NULL); - - if (err != CL_SUCCESS) - { - ::std::cerr << "ROTM: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufX, bufY, bufParam); - - compareMatrices<T>(clblasColumnMajor, lengthx , 1, (back_X + params->offa), (X + params->offa), lengthx); - compareMatrices<T>(clblasColumnMajor, lengthy , 1, (back_Y + params->offb), (Y + params->offb), lengthy); - - deleteBuffers<T>(X, Y, PARAM, back_X, back_Y, back_PARAM); - delete[] events; -} - -// Instantiate the test -TEST_P(ROTM, srotm) -{ - TestParams params; - - getParams(¶ms); - rotmCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(ROTM, drotm) -{ - TestParams params; - - getParams(¶ms); - rotmCorrectnessTest<cl_double>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-rotmg.cpp b/external/clBLAS/src/tests/correctness/corr-rotmg.cpp deleted file mode 100644 index 851310c9..00000000 --- a/external/clBLAS/src/tests/correctness/corr-rotmg.cpp +++ /dev/null @@ -1,283 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <rotmg.h> -#include <matrix.h> -#include "delta.h" - -static void -releaseMemObjects(cl_mem bufD1, cl_mem bufD2, cl_mem bufX, cl_mem bufY, cl_mem bufParam) -{ - if(bufD1 != NULL) - { - clReleaseMemObject(bufD1); - } - if(bufD2 != NULL) - { - clReleaseMemObject(bufD2); - } - if(bufX != NULL) - { - clReleaseMemObject(bufX); - } - if(bufY != NULL) - { - clReleaseMemObject(bufY); - } - if(bufParam != NULL) - { - clReleaseMemObject(bufParam); - } -} - -template <typename T> static void -deleteBuffers(T *D1, T *D2, T *X, T *Y, T *PARAM) -{ - if(D1 != NULL) - { - delete[] D1; - } - if(D2 != NULL) - { - delete[] D2; - } - if(X != NULL) - { - delete[] X; - } - if(Y != NULL) - { - delete[] Y; - } - if(PARAM != NULL) - { - delete[] PARAM; - } -} - -template <typename T> -void -rotmgCorrectnessTest(TestParams *params) -{ - cl_int err; - T *D1, *D2, *X, *Y, *PARAM; - T *back_D1, *back_D2, *back_X, *back_Y, *back_PARAM; - T sflagParam; - cl_mem bufD1, bufD2, bufX, bufY, bufParam; - clMath::BlasBase *base; - cl_event *events; - cl_double deltaForType = 0.0; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double)) && - !base->isDevSupportDoublePrecision()) - { - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - X = new T[1 + params->offBX]; - Y = new T[1 + params->offCY]; - D1 = new T[1 + params->offa]; - D2 = new T[1 + params->offb]; - PARAM = new T[5 + params->offc]; //params always has 5 elements - - back_X = new T[1 + params->offBX]; - back_Y = new T[1 + params->offCY]; - back_D1 = new T[1 + params->offa]; - back_D2 = new T[1 + params->offb]; - back_PARAM = new T[5 + params->offc]; //params always has 5 elements - - if((D1 == NULL) || (D2 == NULL) || (X == NULL) || (Y == NULL) || (PARAM == NULL) || - (back_D1 == NULL) || (back_D2 == NULL) ||(back_X == NULL) || (back_Y == NULL) || (back_PARAM == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(D1, D2, X, Y, PARAM); - deleteBuffers<T>(back_D1, back_D2, back_X, back_Y, back_PARAM); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - //Filling random values for SA and SB. C & S are only for output sake - randomRotmg( (D1 + params->offa), (D2 + params->offb), - (X + params->offBX), (Y + params->offCY), (PARAM + params->offc) ); - - sflagParam = convertMultiplier<T>(params->alpha); - PARAM[params->offc] = sflagParam; // initializing first element - - memcpy(back_X, X, (1 + params->offBX)*sizeof(T)); - memcpy(back_Y, Y, (1 + params->offCY)*sizeof(T)); - memcpy(back_D1, D1, (1 + params->offa)*sizeof(T)); - memcpy(back_D2, D2, (1 + params->offb)*sizeof(T)); - memcpy(back_PARAM, PARAM, (params->offc + 5)*sizeof(T)); - - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufD1 = base->createEnqueueBuffer(D1, (1 + params->offa) * sizeof(T), 0, CL_MEM_READ_WRITE); - bufD2 = base->createEnqueueBuffer(D2, (1 + params->offb) * sizeof(T), 0, CL_MEM_READ_WRITE); - bufX = base->createEnqueueBuffer(X, (1 + params->offBX) * sizeof(T), 0, CL_MEM_READ_WRITE); - bufY = base->createEnqueueBuffer(Y, (1 + params->offCY) * sizeof(T), 0, CL_MEM_READ_ONLY); - bufParam = base->createEnqueueBuffer(PARAM, (5 + params->offc) * sizeof(T), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xROTMG routine... "; - - ::clMath::blas::rotmg(back_D1, params->offa, back_D2, params->offb, back_X, params->offBX, back_Y, params->offCY, - back_PARAM, params->offc); - ::std::cerr << "Done" << ::std::endl; - - // Hold X vector - - if ((bufD1 == NULL) || (bufD2 == NULL) || (bufX == NULL) || (bufY == NULL) || (bufParam == NULL)) - { - releaseMemObjects(bufD1, bufD2, bufX, bufY, bufParam); - deleteBuffers<T>(D1, D2, X, Y, PARAM); - deleteBuffers<T>(back_D1, back_D2, back_X, back_Y, back_PARAM); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xROTMG routine... "; - - DataType type; - type = ( typeid(T) == typeid(cl_float)) ? TYPE_FLOAT : - TYPE_DOUBLE; - - err = (cl_int)::clMath::clblas::rotmg( type, bufD1, params->offa, bufD2, params->offb, bufX, params->offBX, - bufY, params->offCY, bufParam, params->offc, - params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) - { - releaseMemObjects(bufD1, bufD2, bufX, bufY, bufParam); - deleteBuffers<T>(D1, D2, X, Y, PARAM); - deleteBuffers<T>(back_D1, back_D2, back_X, back_Y, back_PARAM); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::ROTMG() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) - { - releaseMemObjects(bufD1, bufD2, bufX, bufY, bufParam); - deleteBuffers<T>(D1, D2, X, Y, PARAM); - deleteBuffers<T>(back_D1, back_D2, back_X, back_Y, back_PARAM); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufD1, CL_TRUE, 0, - (1 + params->offa) * sizeof(T), D1, 0, NULL, NULL); - - err |= clEnqueueReadBuffer(base->commandQueues()[0], bufD2, CL_TRUE, 0, - (1 + params->offb) * sizeof(T), D2, 0, NULL, NULL); - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufX, CL_TRUE, 0, - (1 + params->offBX) * sizeof(T), X, 0, NULL, NULL); - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufY, CL_TRUE, 0, - (1 + params->offCY) * sizeof(T), Y, 0, NULL, NULL); - - err |= clEnqueueReadBuffer(base->commandQueues()[0], bufParam, CL_TRUE, 0, - (5 + params->offc) * sizeof(T), PARAM, 0, NULL, NULL); - - if (err != CL_SUCCESS) - { - ::std::cerr << "ROTMG: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufD1, bufD2, bufX, bufY, bufParam); - - deltaForType = DELTA_0<T>(); - - #ifndef CORR_TEST_WITH_ACML - // Acml doesn't store answer in D1, D2 and X1. So skipping those checks - cl_double delta; - delta = deltaForType * returnMax<T>(back_D1[params->offa]); - compareValues<T>( (back_D1 + params->offa), (D1 + params->offa), delta); - - delta = deltaForType * returnMax<T>(back_D2[params->offb]); - compareValues<T>( (back_D2 + params->offb), (D2 + params->offb), delta); - - delta = deltaForType * returnMax<T>(back_X[params->offBX]); - compareValues<T>( (back_X + params->offBX), (X + params->offBX), delta); - - delta = deltaForType * returnMax<T>(back_Y[params->offCY]); - compareValues<T>( (back_Y + params->offCY), (Y + params->offCY), delta); - #endif - - // Creating delta array for PARAM array - cl_double deltaArr[5]; - for(int i=0; i<5; i++) { - deltaArr[i] = deltaForType * returnMax<T>(back_PARAM[i + (params->offc)]); - } - compareMatrices<T>(clblasColumnMajor, 5 , 1, (back_PARAM + params->offc), (PARAM + params->offc), 5, deltaArr); - - deleteBuffers<T>(D1, D2, X, Y, PARAM); - deleteBuffers<T>(back_D1, back_D2, back_X, back_Y, back_PARAM); - - delete[] events; -} - -// Instantiate the test -TEST_P(ROTMG, srotmg) -{ - TestParams params; - - getParams(¶ms); - rotmgCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(ROTMG, drotmg) -{ - TestParams params; - - getParams(¶ms); - rotmgCorrectnessTest<cl_double>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-sbmv.cpp b/external/clBLAS/src/tests/correctness/corr-sbmv.cpp deleted file mode 100644 index b17f6699..00000000 --- a/external/clBLAS/src/tests/correctness/corr-sbmv.cpp +++ /dev/null @@ -1,224 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <sbmv.h> -#include <gbmv.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX, cl_mem objY) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objY != NULL) - { - clReleaseMemObject(objY); - } -} - -template <typename T> static void -deleteBuffers(T *A, T *X, T *blasY, T *clblasY) -{ - if(A != NULL) - { - delete[] A; - } - if(X != NULL) - { - delete[] X; - } - if(blasY != NULL) - { - delete[] blasY; - } - if(clblasY != NULL) - { - delete[] clblasY; // To hold clblas GBMV call results - } -} - -template <typename T> -void -sbmvCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *X, *blasY, *clblasY; - cl_mem bufA, bufX, bufY; - clMath::BlasBase *base; - cl_event *events; - T alpha, beta; - size_t lengthX, lengthY, lengthA; - - base = clMath::BlasBase::getInstance(); - - if (((typeid(T) == typeid(cl_double))) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - lengthA = params->N * params->lda; - lengthX = (params->N - 1)*abs(params->incx) + 1; - lengthY = (params->N - 1)*abs(params->incy) + 1; - - A = new T[ lengthA + params->offA ]; - X = new T[ lengthX + params->offBX ]; - blasY = new T[ lengthY + params->offCY ]; - clblasY = new T[ lengthY + params->offCY ]; - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - if((A == NULL) || (X == NULL) || (blasY == NULL) || (clblasY == NULL)) - { - deleteBuffers<T>(A, X, blasY, clblasY); - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - delete[] events; - SUCCEED(); - return; - } - - alpha = convertMultiplier<T>(params->alpha); - beta = convertMultiplier<T>(params->beta); - - randomGbmvMatrices(params->order, clblasNoTrans, params->N, params->N, &alpha, &beta, - (A + params->offA), params->lda, (X+params->offBX), params->incx, (blasY+params->offCY), params->incy ); - // Copy blasY to clblasY - memcpy(clblasY, blasY, (lengthY + params->offCY)* sizeof(*blasY)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufA = base->createEnqueueBuffer(A, (lengthA + params->offA)* sizeof(*A), 0, CL_MEM_READ_ONLY); - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX)* sizeof(*X), 0, CL_MEM_READ_ONLY); - bufY = base->createEnqueueBuffer(clblasY, (lengthY + params->offCY) * sizeof(*clblasY), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xSBMV routine... "; - - clblasOrder fOrder; - clblasUplo fUplo; - fOrder = params->order; - fUplo = params->uplo; - size_t fN = params->N, fK = params->K; - - if (fOrder != clblasColumnMajor) - { - fOrder = clblasColumnMajor; - fUplo = (params->uplo == clblasLower)? clblasUpper : clblasLower; - fN = params->N; - } - - clMath::blas::sbmv(fOrder, fUplo, fN, fK, alpha, A, params->offA, params->lda, - X, params->offBX, params->incx, beta, blasY, params->offCY, params->incy); - ::std::cerr << "Done" << ::std::endl; - - if ((bufA == NULL) || (bufX == NULL) || (bufY == NULL)) { - // Skip the test, the most probable reason is - // matrix too big for a device. - - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xSBMV routine... "; - - err = (cl_int)clMath::clblas::sbmv(params->order, params->uplo, params->N, params->K, - alpha, bufA, params->offA, params->lda, bufX, params->offBX, params->incx, - beta, bufY, params->offCY, params->incy, - params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::SBMV() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufY, CL_TRUE, 0, - (lengthY + params->offCY) * sizeof(*clblasY), clblasY, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "SBMV: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufA, bufX, bufY); - compareMatrices<T>(clblasColumnMajor, lengthY , 1, (blasY + params->offCY), (clblasY + params->offCY), - lengthY); - deleteBuffers<T>(A, X, blasY, clblasY); - delete[] events; -} - -// Instantiate the test - -TEST_P(SBMV, ssbmv) { - TestParams params; - - getParams(¶ms); - sbmvCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(SBMV, dsbmv) { - TestParams params; - - getParams(¶ms); - sbmvCorrectnessTest<cl_double>(¶ms); -} - diff --git a/external/clBLAS/src/tests/correctness/corr-scal.cpp b/external/clBLAS/src/tests/correctness/corr-scal.cpp deleted file mode 100644 index ad156f63..00000000 --- a/external/clBLAS/src/tests/correctness/corr-scal.cpp +++ /dev/null @@ -1,215 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <scal.h> - -static void -releaseMemObjects(cl_mem objX) -{ - if(objX != NULL) - { - clReleaseMemObject(objX); - } -} - -template <typename T> static void -deleteBuffers(T *blasX, T *clblasX) -{ - if(blasX != NULL) - { - delete[] blasX; - } - if(clblasX != NULL) - { - delete[] clblasX; - } -} - -template <typename T> -void scalCorrectnessTest(TestParams *params) -{ - cl_int err; - T *blasX, *clblasX; - cl_mem bufX; - clMath::BlasBase *base; - cl_event *events; - T alpha; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - bool is_css_zds = (params->K == 1)? true: false; // K indicates csscal/zdscal - - blasX = new T[lengthX + params->offBX ]; - clblasX = new T[lengthX + params->offBX ]; - - if( (blasX == NULL) || (clblasX == NULL) ) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(blasX, clblasX); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - randomVectors(params->N, (blasX+params->offBX), params->incx); - alpha = convertMultiplier<T>(params->alpha); - memcpy(clblasX, blasX, (lengthX + params->offBX)* sizeof(*blasX)); - ::std::cerr << "Done" << ::std::endl; - bufX = base->createEnqueueBuffer(clblasX, (lengthX + params->offBX)* sizeof(*clblasX), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xSCAL routine... "; - // Both blas and clBlas wrapper functions consider the real part of alpha in case of css/zdscal - // This is to make sure both get the same scalar alpha. check wrapper functions - ::clMath::blas::scal(is_css_zds, params->N, alpha, blasX, params->offBX, params->incx); - ::std::cerr << "Done" << ::std::endl; - - if (bufX == NULL) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufX); - deleteBuffers<T>(blasX, clblasX); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xSCAL routine... "; - // Both blas and clBlas wrapper functions consider the real part of alpha in case of css/zdscal - // This is to make sure both get the same scalar alpha. check wrapper functions - err = (cl_int)::clMath::clblas::scal(is_css_zds, params->N, alpha, bufX, params->offBX, - params->incx, params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufX); - deleteBuffers<T>(blasX, clblasX); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::SCAL() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufX); - deleteBuffers<T>(blasX, clblasX); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufX, CL_TRUE, 0, - (lengthX + params->offBX) * sizeof(*clblasX), clblasX, 0, NULL, NULL); - if (err != CL_SUCCESS) { - ::std::cerr << "SCAL: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufX); - - compareMatrices<T>(clblasColumnMajor, lengthX , 1, (blasX + params->offBX), - (clblasX + params->offBX), lengthX); - deleteBuffers<T>(blasX, clblasX); - delete[] events; -} - -// Instantiate the test - -TEST_P(SCAL, sscal) { - TestParams params; - - getParams(¶ms); - params.K = 0; // K will indicate wheather routine is csscal/zdscal - scalCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(SCAL, dscal) { - TestParams params; - - getParams(¶ms); - params.K = 0; // K will indicate wheather routine is csscal/zdscal - scalCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(SCAL, cscal) { - TestParams params; - - getParams(¶ms); - params.K = 0; // K will indicate wheather routine is csscal/zdscal - scalCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(SCAL, zscal) { - TestParams params; - - getParams(¶ms); - params.K = 0; // K will indicate wheather routine is csscal/zdscal - scalCorrectnessTest<DoubleComplex>(¶ms); -} - - -// For these 2 routines alpha is scalar -TEST_P(SCAL, csscal) { - TestParams params; - - getParams(¶ms); - params.K = 1; // K will indicate wheather routine is csscal/zdscal - scalCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(SCAL, zdscal) { - TestParams params; - - getParams(¶ms); - params.K = 1; // K will indicate wheather routine is csscal/zdscal - scalCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-spmv.cpp b/external/clBLAS/src/tests/correctness/corr-spmv.cpp deleted file mode 100644 index dcbad3b5..00000000 --- a/external/clBLAS/src/tests/correctness/corr-spmv.cpp +++ /dev/null @@ -1,220 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <spmv.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX, cl_mem objY) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objY != NULL) - { - clReleaseMemObject(objY); - } -} - -template <typename T> static void -deleteBuffers(T *A, T *X, T *blasY, T *clblasY) -{ - if(A != NULL) - { - delete[] A; - } - if(X != NULL) - { - delete[] X; - } - if(blasY != NULL) - { - delete[] blasY; - } - if(clblasY != NULL) - { - delete[] clblasY; // To hold clblas SPMV call results - } -} - -template <typename T> -void -spmvCorrectnessTest(TestParams *params) -{ - cl_int err; - T *AP, *X, *blasY, *clblasY; - cl_mem bufAP, bufX, bufY; - clMath::BlasBase *base; - cl_event *events; - T alpha, beta; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthA = (params->N * (params->N + 1)) / 2; - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - size_t lengthY = (1 + ((params->N -1) * abs(params->incy))); - - AP = new T[lengthA + params->offA ]; - X = new T[lengthX + params->offBX ]; - blasY = new T[lengthY + params->offCY ]; - clblasY = new T[lengthY + params->offCY ]; - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - if((AP == NULL) || (X == NULL) || (blasY == NULL) || (clblasY == NULL)) - { - deleteBuffers<T>(AP, X, blasY, clblasY); - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - delete[] events; - SUCCEED(); - return; - } - - alpha = convertMultiplier<T>(params->alpha); - beta = convertMultiplier<T>(params->beta); - - randomSpmvMatrices(params->order, params->uplo, params->N, true, &alpha, (AP + params->offA), - (X + params->offBX), params->incx, true, &beta, (blasY + params->offCY), params->incy); - // Copy blasY to clblasY - memcpy(clblasY, blasY, (lengthY + params->offCY)* sizeof(*blasY)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufAP = base->createEnqueueBuffer(AP, (lengthA + params->offA)* sizeof(*AP), 0, CL_MEM_READ_ONLY); - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX)* sizeof(*X), 0, CL_MEM_READ_ONLY); - bufY = base->createEnqueueBuffer(clblasY, (lengthY + params->offCY) * sizeof(*clblasY), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xSPMV routine... "; - - clblasOrder order; - clblasUplo fUplo; - - order = params->order; - fUplo = params->uplo; - - if (order != clblasColumnMajor) - { - order = clblasColumnMajor; - fUplo = (params->uplo == clblasUpper)? clblasLower : clblasUpper; - } - ::clMath::blas::spmv( order, fUplo, params->N, alpha, AP, params->offA, X, params->offBX, params->incx, - beta, blasY, params->offCY, params->incy); - ::std::cerr << "Done" << ::std::endl; - - if ((bufAP == NULL) || (bufX == NULL) || (bufY == NULL)) { - // Skip the test, the most probable reason is - // matrix too big for a device. - - releaseMemObjects(bufAP, bufX, bufY); - deleteBuffers<T>(AP, X, blasY, clblasY); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xSPMV routine... "; - - err = (cl_int)::clMath::clblas::spmv(params->order, params->uplo, params->N, alpha, bufAP, - params->offA, bufX, params->offBX, params->incx, beta, bufY, params->offCY, params->incy, - params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufAP, bufX, bufY); - deleteBuffers<T>(AP, X, blasY, clblasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::SPMV() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufAP, bufX, bufY); - deleteBuffers<T>(AP, X, blasY, clblasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufY, CL_TRUE, 0, - (lengthY + params->offCY) * sizeof(*clblasY), clblasY, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "SPMV: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufAP, bufX, bufY); - - compareMatrices<T>(clblasColumnMajor, lengthY , 1, (blasY + params->offCY), (clblasY + params->offCY), - lengthY); - deleteBuffers<T>(AP, X, blasY, clblasY); - delete[] events; -} - -// Instantiate the test - -TEST_P(SPMV, sspmv) { - TestParams params; - - getParams(¶ms); - spmvCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(SPMV, dspmv) { - TestParams params; - - getParams(¶ms); - spmvCorrectnessTest<cl_double>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-spr.cpp b/external/clBLAS/src/tests/correctness/corr-spr.cpp deleted file mode 100644 index 4b00a02b..00000000 --- a/external/clBLAS/src/tests/correctness/corr-spr.cpp +++ /dev/null @@ -1,228 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <spr.h> - -static void -releaseMemObjects(cl_mem objAP, cl_mem objX) -{ - if(objAP != NULL) - { - clReleaseMemObject(objAP); - } - if(objX != NULL) - { - clReleaseMemObject(objX); -} - -} - -template <typename T> static void -deleteBuffers(T *blasAP, T *clblasAP, T *X) -{ - if(blasAP != NULL) - { - delete[] blasAP; - } - if(clblasAP != NULL) - { - delete[] clblasAP; - } - if(X != NULL) - { - delete[] X; -} -} - -template <typename T> -void -sprCorrectnessTest(TestParams *params) -{ - cl_int err; - T *blasAP, *clblasAP, *X; -// T *tempA; - cl_mem bufAP, bufX; - clMath::BlasBase *base; - cl_event *events; - bool useAlpha; - T alpha; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthAP = ( ( params->N*( params->N + 1 ) )/2 ); - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - - blasAP = new T[lengthAP + params->offa]; - clblasAP = new T[lengthAP + params->offa]; - X = new T[lengthX + params->offBX]; -// tempA = new T[lengthA + params->offa ]; - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - - memset(blasAP, -1, (lengthAP + params->offa)); - memset(clblasAP, -1, (lengthAP + params->offa)); - memset(X, -1, (lengthX + params->offBX)); - - alpha = convertMultiplier<T>(params->alpha); - useAlpha = true; - - #ifdef DEBUG_SPR - printf("ALPHA in CORR_SPR.CPP %f\n", alpha); - #endif - - if((blasAP == NULL) || (X == NULL) || (clblasAP == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(blasAP, clblasAP, X); - delete[] events; - SUCCEED(); - return; - } - - randomSyrMatrices<T>(params->order, params->uplo, params->N, useAlpha, &alpha, - (blasAP + params->offa), 0, (X + params->offBX), params->incx); - - memcpy(clblasAP, blasAP, (lengthAP + params->offa)* sizeof(*blasAP)); - - ::std::cerr << "Done" << ::std::endl; - - bufAP = base->createEnqueueBuffer(clblasAP, (lengthAP + params->offa) * sizeof(*clblasAP), 0, CL_MEM_READ_WRITE); - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX)* sizeof(*X), 0, CL_MEM_READ_ONLY); - - ::std::cerr << "Calling reference xSPR routine... "; - - clblasOrder order; - clblasUplo fUplo; - order = params->order; - fUplo = params->uplo; - - - if (order != clblasColumnMajor) - { - - order = clblasColumnMajor; - fUplo = (params->uplo == clblasUpper)? clblasLower : clblasUpper; - - if( params->transA == clblasConjTrans ) - doConjugate( (blasAP +params->offa), (( params->N * (params->N + 1)) / 2) , 1, 1 ); - - } - - clMath::blas::spr( clblasColumnMajor, fUplo, params->N, alpha, X, params->offBX, params->incx, blasAP, params->offa); - ::std::cerr << "Done" << ::std::endl; - - if ((bufAP == NULL) || (bufX == NULL) ) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufAP, bufX); - deleteBuffers<T>(blasAP, clblasAP, X); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xSPR routine... "; - - err = (cl_int)::clMath::clblas::spr( params->order, params->uplo, params->N, alpha, - bufX, params->offBX, params->incx, bufAP, params->offa, - params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufAP, bufX); - deleteBuffers<T>(blasAP, clblasAP, X); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::SYR() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufAP, bufX); - deleteBuffers<T>(blasAP, clblasAP, X); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufAP, CL_TRUE, 0, - (lengthAP + params->offa) * sizeof(*clblasAP), clblasAP, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "SPR: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufAP, bufX); - printf("Comparing the results\n"); - compareMatrices<T>(clblasColumnMajor, lengthAP , 1, (blasAP + params->offa), (clblasAP + params->offa), - lengthAP); - - deleteBuffers<T>(blasAP, clblasAP, X); - delete[] events; -} - -// Instantiate the test - -TEST_P(SPR, sspr) { - TestParams params; - - getParams(¶ms); - sprCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(SPR, dspr) { - TestParams params; - - getParams(¶ms); - sprCorrectnessTest<cl_double>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-spr2.cpp b/external/clBLAS/src/tests/correctness/corr-spr2.cpp deleted file mode 100644 index c000e64c..00000000 --- a/external/clBLAS/src/tests/correctness/corr-spr2.cpp +++ /dev/null @@ -1,216 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <spr2.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX, cl_mem objY) -{ - if(objA != NULL) { - clReleaseMemObject(objA); - } - if(objX != NULL) { - clReleaseMemObject(objX); - } - if(objY != NULL) { - clReleaseMemObject(objY); -} -} - -template <typename T> static void -deleteBuffers(T *blasA, T *clblasA, T *X, T *Y) -{ - if(blasA != NULL) { - delete[] blasA; - } - if(clblasA != NULL) { - delete[] clblasA; - } - if(X != NULL) { - delete[] X; - } - if(Y != NULL) { - delete[] Y; -} -} - -template <typename T> -void -spr2CorrectnessTest(TestParams *params) -{ - cl_int err; - T *blasAP, *clblasAP, *X, *Y; - cl_mem bufAP, bufX, bufY; - clMath::BlasBase *base; - cl_event *events; - bool useAlpha; - T alpha; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthAP = (params->N *( params->N + 1 ))/2 ; - size_t lengthX = (1 + ((params->N - 1) * abs(params->incx))); - size_t lengthY = (1 + ((params->N - 1) * abs(params->incy))); - - blasAP = new T[lengthAP + params->offa ]; - clblasAP = new T[lengthAP + params->offa ]; - X = new T[lengthX + params->offBX ]; - Y = new T[lengthY + params->offCY ]; - - srand(params->seed); - - if((blasAP == NULL) || (clblasAP == NULL) || (X == NULL) || (Y == NULL)) - { - deleteBuffers<T>(blasAP, clblasAP, X, Y); - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - delete[] events; - SUCCEED(); - return; - } - - alpha = convertMultiplier<T>(params->alpha); - useAlpha = true; - - ::std::cerr << "Generating input data... "; - randomSyr2Matrices<T>(params->order, params->uplo, params->N, useAlpha, &alpha, (blasAP + params->offa), params->lda, - (X + params->offBX), params->incx, (Y + params->offCY), params->incy); - - // Copy blasAP to clblasAP - memcpy(clblasAP, blasAP, (lengthAP + params->offa)* sizeof(*blasAP)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufAP = base->createEnqueueBuffer(clblasAP, (lengthAP + params->offa)* sizeof(*clblasAP), 0,CL_MEM_READ_WRITE); - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX)* sizeof(*X), 0, CL_MEM_READ_ONLY); - bufY = base->createEnqueueBuffer(Y, (lengthY + params->offCY)* sizeof(*Y), 0, CL_MEM_READ_ONLY); - - ::std::cerr << "Calling reference xSPR2 routine... "; - - clblasOrder order; - clblasUplo fUplo; - - order = params->order; - fUplo = params->uplo; - - if (order != clblasColumnMajor) - { - order = clblasColumnMajor; - fUplo = (params->uplo == clblasUpper)? clblasLower : clblasUpper; - } - - ::clMath::blas::spr2( order, fUplo, params->N, alpha, X, params->offBX, params->incx, - Y, params->offCY, params->incy, blasAP, params->offa); - ::std::cerr << "Done" << ::std::endl; - - if ((bufAP == NULL) || (bufX == NULL) || (bufY == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufAP, bufX, bufY); - deleteBuffers<T>(blasAP, clblasAP, X, Y); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xSPR2 routine... "; - - err = (cl_int)::clMath::clblas::spr2( params->order, params->uplo, params->N, alpha, - bufX, params->offBX, params->incx, bufY, params->offCY, params->incy, bufAP, params->offa, - params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufAP, bufX, bufY); - deleteBuffers<T>(blasAP, clblasAP, X, Y); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::SPR2() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufAP, bufX, bufY); - deleteBuffers<T>(blasAP, clblasAP, X, Y); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufAP, CL_TRUE, 0, - (lengthAP + params->offa) * sizeof(*clblasAP), clblasAP, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "SPR2: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufAP, bufX, bufY); - - compareMatrices<T>(clblasColumnMajor, lengthAP, 1, (blasAP + params->offa), (clblasAP + params->offa), lengthAP); - - deleteBuffers<T>(blasAP, clblasAP, X, Y); - delete[] events; -} - -// Instantiate the test - -TEST_P(SPR2, sspr2) { - TestParams params; - - getParams(¶ms); - spr2CorrectnessTest<cl_float>(¶ms); -} - -TEST_P(SPR2, dspr2) { - TestParams params; - - getParams(¶ms); - spr2CorrectnessTest<cl_double>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-swap.cpp b/external/clBLAS/src/tests/correctness/corr-swap.cpp deleted file mode 100644 index e62f88a1..00000000 --- a/external/clBLAS/src/tests/correctness/corr-swap.cpp +++ /dev/null @@ -1,221 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <swap.h> - -static void -releaseMemObjects(cl_mem objX, cl_mem objY) -{ - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objY != NULL) - { - clReleaseMemObject(objY); - } -} - -template <typename T> static void -deleteBuffers(T *X, T *Y, T *blasX, T *blasY) -{ - if(X != NULL) - { - delete[] X; - } - if(Y != NULL) - { - delete[] Y; - } - if(blasX != NULL) - { - delete[] blasX; - } - if(blasY != NULL) - { - delete[] blasY; - } -} - -template <typename T> -void -swapCorrectnessTest(TestParams *params) -{ - cl_int err; - T *X, *Y, *blasX, *blasY; - cl_mem bufX, bufY; - clMath::BlasBase *base; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - size_t lengthY = (1 + ((params->N -1) * abs(params->incy))); - - X = new T[lengthX + params->offBX ]; - Y = new T[lengthY + params->offCY ]; - blasX = new T[lengthX + params->offBX ]; - blasY = new T[lengthY + params->offCY ]; - - if((X == NULL) || (blasX == NULL) || (Y == NULL) || (blasY == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(X, Y, blasX, blasY); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - // Populate A and blasX - randomVectors(params->N, (X+params->offBX), params->incx, (Y+params->offCY), params->incy); - - memcpy(blasX, X, (lengthX + params->offBX) * sizeof(T)); - memcpy(blasY, Y, (lengthY + params->offCY) * sizeof(T)); - - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX)* sizeof(T), 0, CL_MEM_READ_WRITE); - bufY = base->createEnqueueBuffer(Y, (lengthY + params->offCY)* sizeof(T), 0, CL_MEM_READ_WRITE); - - if ((bufX == NULL) || (bufY == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufX, bufY); - deleteBuffers<T>(X, Y, blasX, blasY); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling reference xSWAP routine... "; - - ::clMath::blas::swap( params->N, blasX, params->offBX, params->incx, - blasY, params->offCY, params->incy); - ::std::cerr << "Done" << ::std::endl; - - - ::std::cerr << "Calling clblas xSWAP routine... "; - - DataType type; - type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT : (( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: (( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT:TYPE_COMPLEX_DOUBLE)); - - err = (cl_int)::clMath::clblas::swap( type, params->N, bufX, params->offBX, params->incx, bufY, params->offCY, params->incy, - params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufY); - deleteBuffers<T>(X, Y, blasX, blasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::SWAP() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufX, bufY); - deleteBuffers<T>(X, Y, blasX, blasY); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufX, CL_TRUE, 0, - (lengthX + params->offBX) * sizeof(T), X, 0, NULL, NULL); - err |= clEnqueueReadBuffer(base->commandQueues()[0], bufY, CL_TRUE, 0, - (lengthY + params->offCY) * sizeof(T), Y, 0, NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "SWAP: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufX, bufY); - - - compareMatrices<T>(clblasColumnMajor, lengthX , 1, (blasX + params->offBX), (X + params->offBX), lengthX); - compareMatrices<T>(clblasColumnMajor, lengthY , 1, (blasY + params->offCY), (Y + params->offCY), lengthY); - deleteBuffers<T>(X, Y, blasX, blasY); - delete[] events; -} - -// Instantiate the test - -TEST_P(SWAPXY, sswap) { - TestParams params; - - getParams(¶ms); - swapCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(SWAPXY, dswap) { - TestParams params; - - getParams(¶ms); - swapCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(SWAPXY, cswap) { - TestParams params; - - getParams(¶ms); - swapCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(SWAPXY, zswap) { - TestParams params; - - getParams(¶ms); - swapCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-symm.cpp b/external/clBLAS/src/tests/correctness/corr-symm.cpp deleted file mode 100644 index cb747689..00000000 --- a/external/clBLAS/src/tests/correctness/corr-symm.cpp +++ /dev/null @@ -1,281 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <symm.h> -#include<cltypes.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objB, cl_mem objC) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objB != NULL) - { - clReleaseMemObject(objB); - } - if(objC != NULL) - { - clReleaseMemObject(objC); -} -} - -template <typename T> static void -deleteBuffers(T *A, T *B, T *C, T *backC) -{ - if(A != NULL) - { - delete[] A; - } - if(B != NULL) - { - delete[] B; - } - if(C != NULL) - { - delete[] C; -} - if(backC != NULL) - { - delete[] backC; - } -} - -template <typename T> -void -symmCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *B, *C, *backC; - T alpha_, beta_; - cl_mem bufA, bufB, bufC; - clMath::BlasBase *base; - cl_event *events; - size_t ka, kbc; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - events = new cl_event[params->numCommandQueues]; - if (events == NULL) - { - } - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - if( params->side == clblasLeft ) - ka = params->M; - else ka = params->N; - - if( params->order == clblasColumnMajor ) - kbc = params->N; - else kbc = params->M; - - size_t lengthA = ka * params->lda; - size_t lengthB = kbc * params->ldb; - size_t lengthC = kbc * params->ldc; - - alpha_ = convertMultiplier<T>(params->alpha); - beta_ = convertMultiplier<T>(params->beta); - - A = new T[ lengthA + params->offa ]; - B = new T[ lengthB + params->offb ]; - C = new T[ lengthC + params->offc ]; - backC = new T[ lengthC + params->offc ]; - - if((A == NULL) || (B == NULL) || (C == NULL) || (backC == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(A, B, C, backC); - delete[] events; - SUCCEED(); - return; - } - srand(params->seed); - ::std::cerr << "Generating input data... "; - - int creationFlags = 0, AcreationFlags; - creationFlags = creationFlags | RANDOM_INIT; - creationFlags = ( (params-> order) == clblasRowMajor)? (creationFlags | ROW_MAJOR_ORDER) : (creationFlags); - AcreationFlags = ( (params-> uplo) == clblasLower)? (creationFlags | LOWER_HALF_ONLY) : (creationFlags | UPPER_HALF_ONLY); - BlasRoutineID BlasFn = CLBLAS_SYMM; - -#ifdef __TEST_CSYMM_ACML_NANBUG__ - // - // NOTE: Whether this clearing to zero is present or not - // ACML returns "nan" for few csymm cases. This is here - // to make things easier and rule of out-of-bound inputs - // - memset(A, 0, (lengthA + params->offa)*sizeof(T)); - memset(B, 0, (lengthB + params->offb)*sizeof(T)); - memset(C, 0, (lengthC + params->offc)*sizeof(T)); -#else - populate( A + params->offa , ka, ka, params-> lda, BlasFn, AcreationFlags); - populate( B + params->offb , params-> M, params-> N, params-> ldb, BlasFn, creationFlags); - populate( C + params->offc , params-> M, params-> N, params-> ldc, BlasFn, creationFlags); -#endif - - // Copy C to backX - memcpy(backC, C, (lengthC + params->offc) * sizeof(T)); - - // Allocate buffers - bufA = base->createEnqueueBuffer(A, (lengthA + params->offa) * sizeof(T), 0, CL_MEM_READ_ONLY); - bufB = base->createEnqueueBuffer(B, (lengthB + params->offb) * sizeof(T), 0, CL_MEM_READ_ONLY); - bufC = base->createEnqueueBuffer(backC, (lengthC + params->offc) * sizeof(T), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Done" << ::std::endl; - ::std::cerr << "Calling reference xSYMM routine... "; - - clblasOrder fOrder; - clblasUplo fUplo; - clblasSide fSide; - size_t fN, fM; - - fOrder = params->order; - fUplo = params->uplo; - fSide = params->side; - fM = params->M; - fN = params->N; - - if (fOrder != clblasColumnMajor) { - - fOrder = clblasColumnMajor; - fM = params->N; - fN = params->M; - fSide = (params->side == clblasLeft)? clblasRight: clblasLeft; - fUplo = (params->uplo == clblasUpper)? clblasLower: clblasUpper; - } - - // Call reference blas routine - clMath::blas::symm(fOrder, fSide, fUplo, fM, fN, alpha_, - A, params->offa, params->lda, B, params->offb, params->ldb, beta_, C, params->offc, params->ldc); - ::std::cerr << "Done" << ::std::endl; - - if ((bufA == NULL) || (bufB == NULL) || (bufC == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, C, backC); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xSYMM routine... "; - - err = (cl_int)::clMath::clblas::symm( params->order, params->side, params->uplo, params->M, params->N, alpha_, - bufA, params->offa, params->lda, bufB, params->offb, params->ldb, beta_, bufC, params->offc, params->ldc, - params->numCommandQueues, base->commandQueues(), 0, NULL, events ); - - if (err != CL_SUCCESS) { - - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, C, backC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::SYMM() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, C, backC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufC, CL_TRUE, 0, - (lengthC + params->offc) * sizeof(T), backC, 0, - NULL, NULL); - - releaseMemObjects(bufA, bufB, bufC); - - // handle lda correctly based on row-major/col-major.. - compareMatrices<T>(params->order, params->M , params->N, (C + params->offc), (backC + params->offc), params->ldc); - deleteBuffers<T>(A, B, C, backC); - delete[] events; -} - -// Instantiate the test - -#ifndef __TEST_CSYMM_ACML_NANBUG__ -TEST_P(SYMM, ssymm) { - TestParams params; - - getParams(¶ms); - symmCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(SYMM, dsymm) { - TestParams params; - - getParams(¶ms); - symmCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(SYMM, csymm) { - TestParams params; - - getParams(¶ms); - symmCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(SYMM, zsymm) { - TestParams params; - - getParams(¶ms); - symmCorrectnessTest<DoubleComplex>(¶ms); -} -#else -TEST_P(SYMM, csymm) { - TestParams params; - - getParams(¶ms); - symmCorrectnessTest<FloatComplex>(¶ms); -} - -#endif - diff --git a/external/clBLAS/src/tests/correctness/corr-symv.cpp b/external/clBLAS/src/tests/correctness/corr-symv.cpp deleted file mode 100644 index b5fb4201..00000000 --- a/external/clBLAS/src/tests/correctness/corr-symv.cpp +++ /dev/null @@ -1,223 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <symv.h> - -#include "tcase-filter.h" - -static void -releaseMemObjects(cl_mem objA, cl_mem objX, cl_mem objY) -{ - clReleaseMemObject(objA); - clReleaseMemObject(objX); - clReleaseMemObject(objY); -} - -template <typename T> static void -deleteBuffers(T *A, T *X, T *blasY, T *clblasY) -{ - delete[] A; - delete[] X; - delete[] blasY; - delete[] clblasY; -} - -template <typename T> -void -symvCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *B, *blasC, *clblasC, *X, *Y; - T alpha, beta; - cl_mem bufA, bufB, bufC; - clMath::BlasBase *base; - bool useAlpha, useBeta; - cl_event *events; - bool isComplex; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - isComplex = ((typeid(T) == typeid(FloatComplex)) || - (typeid(T) == typeid(DoubleComplex))); - if (canCaseBeSkipped(params, isComplex)) { - std::cerr << ">> Test is skipped because it has no importance for this " - "level of coverage" << std::endl; - SUCCEED(); - return; - } - - useAlpha = base->useAlpha(); - useBeta = base->useBeta(); - alpha = ZERO<T>(); - beta = ZERO<T>(); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - A = new T[params->rowsA * params->columnsA]; - // X and Y are rows or columns in matrixes B and C - B = new T[params->rowsB * params->columnsB]; - blasC = new T[params->rowsC * params->columnsC]; - clblasC = new T[params->rowsC * params->columnsC]; - X = &B[params->offBX]; - Y = &blasC[params->offCY]; - - srand(params->seed); - if (useAlpha) { - alpha = convertMultiplier<T>(params->alpha); - } - if (useBeta) { - beta = convertMultiplier<T>(params->beta); - } - - ::std::cerr << "Generating input data... "; - setNans<T>(params->rowsA * params->columnsA, A); - setNans<T>(params->rowsB * params->columnsB, B); - setNans<T>(params->rowsC * params->columnsC, blasC); - randomGemmMatrices(params->order, clblasNoTrans, clblasNoTrans, - params->N, params->N, params->N, useAlpha, &alpha, A, - params->lda, B, params->ldb, useBeta, &beta, blasC, - params->ldc); - // set to NAN elements which must not be accessed - // in matrix A - setTriangleNans<T>(params->order, params->uplo, params->N, A, params->lda); - - // in matrix B containing vector X - setVectorNans<T>(params->offBX, abs(params->incx), B, params->N, - params->columnsB * params->rowsB); - // in matrix C containing vector Y - setVectorNans<T>(params->offCY, abs(params->incy), blasC, params->N, - params->columnsC * params->rowsC); - memcpy(clblasC, blasC, params->rowsC * params->columnsC * sizeof(*clblasC)); - ::std::cerr << "Done" << ::std::endl; - - ::std::cerr << "Calling reference xSYMV routine... "; - - if (params->order == clblasColumnMajor) { - ::clMath::blas::symv(clblasColumnMajor, params->uplo, - params->N, alpha, A, params->lda, - X, params->incx, beta, Y, params->incy); - } - else { - T *reorderedA = new T[params->rowsA * params->columnsA]; - - reorderMatrix<T>(clblasRowMajor, params->rowsA, params->columnsA, - A, reorderedA); - ::clMath::blas::symv(clblasColumnMajor, params->uplo, - params->N, alpha, reorderedA, params->rowsA, - X, params->incx, beta, Y, params->incy); - - delete[] reorderedA; - } - ::std::cerr << "Done" << ::std::endl; - - bufA = base->createEnqueueBuffer(A, params->rowsA * params->columnsA * - sizeof(*A), params->offA * sizeof(*A), - CL_MEM_READ_ONLY); - bufB = base->createEnqueueBuffer(B, params->rowsB * params->columnsB * - sizeof(*X), 0, CL_MEM_READ_ONLY); - bufC = base->createEnqueueBuffer(clblasC, params->rowsC * params->columnsC * - sizeof(*clblasC), 0, CL_MEM_READ_WRITE); - if ((bufA == NULL) || (bufB == NULL) || (bufC == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xSYMV routine... "; - err = (cl_int)::clMath::clblas::symv(params->order, params->uplo, - params->N, alpha, bufA, params->offA, params->lda, bufB, params->offBX, - params->incx, beta, bufC, params->offCY, params->incy, - params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::SYMV() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufC, CL_TRUE, 0, - params->rowsC * params->columnsC * sizeof(*clblasC), - clblasC, 0, NULL, NULL); - - releaseMemObjects(bufA, bufB, bufC); - - compareVectors(params->offCY, params->N, abs(params->incy), - params->columnsC * params->rowsC, blasC, clblasC); - - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; -} - -// Instantiate the test - -TEST_P(SYMV, ssymv) { - TestParams params; - - getParams(¶ms); - symvCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(SYMV, dsymv) { - TestParams params; - - getParams(¶ms); - symvCorrectnessTest<cl_double>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-syr.cpp b/external/clBLAS/src/tests/correctness/corr-syr.cpp deleted file mode 100644 index 12967c9e..00000000 --- a/external/clBLAS/src/tests/correctness/corr-syr.cpp +++ /dev/null @@ -1,266 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <syr.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objX != NULL) - { - clReleaseMemObject(objX); -} - -} - -template <typename T> static void -deleteBuffers(T *blasA, T *clblasA, T *X) -{ - if(blasA != NULL) - { - delete[] blasA; - } - if(clblasA != NULL) - { - delete[] clblasA; - } - if(X != NULL) - { - delete[] X; -} -} - -template <typename T> -void -syrCorrectnessTest(TestParams *params) -{ - cl_int err; - T *blasA, *clblasA, *X; -// T *tempA; - cl_mem bufA, bufX; - clMath::BlasBase *base; - cl_event *events; - bool useAlpha; - T alpha; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthA = params->N * params->lda; - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - - blasA = new T[lengthA + params->offa ]; - clblasA = new T[lengthA + params->offa ]; - X = new T[lengthX + params->offBX ]; -// tempA = new T[lengthA + params->offa ]; - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - - memset(blasA, -1, (lengthA + params->offa)); - memset(clblasA, -1, (lengthA + params->offa)); - memset(X, -1, (lengthX + params->offBX)); - - alpha = convertMultiplier<T>(params->alpha); - useAlpha = true; - - #ifdef DEBUG_SYR - printf("ALPHA in CORR_SYR.CPP %f\n", alpha); - #endif - - if((blasA == NULL) || (X == NULL) || (clblasA == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(blasA, clblasA, X); - delete[] events; - SUCCEED(); - return; - } - - randomSyrMatrices<T>(params->order, params->uplo, params->N, useAlpha, &alpha, - (blasA + params->offa), params->lda, (X + params->offBX), params->incx); - -/* - // Set data in A and X using populate() routine - int creationFlags = 0; - creationFlags = creationFlags | RANDOM_INIT; - - // Default is Column-Major - creationFlags = ( (params-> order) == clblasRowMajor)? (creationFlags | ROW_MAJOR_ORDER) : (creationFlags); - creationFlags = ( (params-> uplo) == clblasLower)? (creationFlags | LOWER_HALF_ONLY) : (creationFlags | UPPER_HALF_ONLY); - BlasRoutineID BlasFn = CLBLAS_SYR; - // Populate A and blasX - populate( blasA + params->offa, params-> N, params-> N, params-> lda, BlasFn, creationFlags); - populate( X , (lengthX + params->offBX), 1, (lengthX + params->offBX), BlasFn); -*/ - // Copy blasA to clblasA - memcpy(clblasA, blasA, (lengthA + params->offa)* sizeof(*blasA)); - // memcpy(tempA, blasA, (lengthA + params->offa)* sizeof(*blasA)); - - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufA = base->createEnqueueBuffer(clblasA, (lengthA + params->offa) * sizeof(*clblasA), 0, CL_MEM_READ_WRITE); - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX)* sizeof(*X), 0, CL_MEM_READ_ONLY); - - ::std::cerr << "Calling reference xSYR routine... "; - - clblasOrder order; - clblasUplo fUplo; - - order = params->order; - fUplo = params->uplo; - - //printf("\n\n before acml call\nA\n"); - // printMatrixBlock( params->order, 0, 0, params->N, params->N, params->lda, blasA); - //printf("\nX\n"); - //printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, X); - - if (order == clblasColumnMajor) - { - ::clMath::blas::syr( clblasColumnMajor, fUplo, params->N, alpha, X, params->offBX, params->incx, blasA, params->offa, params->lda); - } - else - { - T *reorderedA = new T[lengthA + params->offa]; - - //reorderMatrix<T>(clblasRowMajor, params->N, params->lda, blasA, reorderedA); - - fUplo = (fUplo == clblasUpper) ? clblasLower : clblasUpper; - //::clMath::blas::syr( clblasColumnMajor, fUplo, params->N, alpha, X, params->offBX, params->incx, reorderedA, params->offa, params->lda); - - ::clMath::blas::syr( clblasColumnMajor, fUplo, params->N, alpha, X, params->offBX, params->incx, blasA, params->offa, params->lda); - - //reorderMatrix<T>(clblasColumnMajor, params->lda, params->N, reorderedA, blasA); - - delete[] reorderedA; - } - //printf("After acml\n"); - //printMatrixBlock( params->order, 0, 0, params->N, params->N, params->lda, blasA); - - ::std::cerr << "Done" << ::std::endl; - - if ((bufA == NULL) || (bufX == NULL) ) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufX); - deleteBuffers<T>(blasA, clblasA, X); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xSYR routine... "; - - err = (cl_int)::clMath::clblas::syr( params->order, params->uplo, params->N, alpha, - bufX, params->offBX, params->incx, bufA, params->offa, params->lda, - params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX); - deleteBuffers<T>(blasA, clblasA, X); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::SYR() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX); - deleteBuffers<T>(blasA, clblasA, X); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufA, CL_TRUE, 0, - (lengthA + params->offa) * sizeof(*clblasA), clblasA, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "SYR: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufA, bufX); - //printMatrixBlock( params->order, 0, 0, params->N, params->N, params->lda, clblasA); - //getchar(); - -// printf("Comparing with the temp buffer\n"); -// compareMatrices<T>(clblasColumnMajor, 1, (params->lda - params->N), (blasA + params->offa + params->N), (tempA + params->offa + params->N), -// params->lda); -// delete[] tempA; - printf("Comparing the results\n"); - compareMatrices<T>(params->order, params->N , params->N, (blasA + params->offa), (clblasA + params->offa), - params->lda); - - deleteBuffers<T>(blasA, clblasA, X); - delete[] events; -} - -// Instantiate the test - -TEST_P(SYR, ssyr) { - TestParams params; - - getParams(¶ms); - syrCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(SYR, dsyr) { - TestParams params; - - getParams(¶ms); - syrCorrectnessTest<cl_double>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-syr2.cpp b/external/clBLAS/src/tests/correctness/corr-syr2.cpp deleted file mode 100644 index 4148ed56..00000000 --- a/external/clBLAS/src/tests/correctness/corr-syr2.cpp +++ /dev/null @@ -1,218 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <syr2.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX, cl_mem objY) -{ - if(objA != NULL) { - clReleaseMemObject(objA); - } - if(objX != NULL) { - clReleaseMemObject(objX); - } - if(objY != NULL) { - clReleaseMemObject(objY); -} -} - -template <typename T> static void -deleteBuffers(T *blasA, T *clblasA, T *X, T *Y) -{ - if(blasA != NULL) { - delete[] blasA; - } - if(clblasA != NULL) { - delete[] clblasA; - } - if(X != NULL) { - delete[] X; - } - if(Y != NULL) { - delete[] Y; -} -} - -template <typename T> -void -syr2CorrectnessTest(TestParams *params) -{ - cl_int err; - T *blasA, *clblasA, *X, *Y; - cl_mem bufA, bufX, bufY; - clMath::BlasBase *base; - cl_event *events; - bool useAlpha; - T alpha; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthA = params->N * params->lda; - size_t lengthX = (1 + ((params->N - 1) * abs(params->incx))); - size_t lengthY = (1 + ((params->N - 1) * abs(params->incy))); - - blasA = new T[lengthA + params->offa ]; - clblasA = new T[lengthA + params->offa ]; - X = new T[lengthX + params->offBX ]; - Y = new T[lengthY + params->offCY ]; - - srand(params->seed); - - if((blasA == NULL) || (clblasA == NULL) || (X == NULL) || (Y == NULL)) - { - deleteBuffers<T>(blasA, clblasA, X, Y); - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - delete[] events; - SUCCEED(); - return; - } - - alpha = convertMultiplier<T>(params->alpha); - useAlpha = true; - - ::std::cerr << "Generating input data... "; - - randomSyr2Matrices<T>(params->order, params->uplo, params->N, useAlpha, &alpha, (blasA + params->offa), params->lda, - (X + params->offBX), params->incx, (Y + params->offCY), params->incy); - - // Copy blasA to clblasA - memcpy(clblasA, blasA, (lengthA + params->offa)* sizeof(*blasA)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufA = base->createEnqueueBuffer(clblasA, (lengthA + params->offa)* sizeof(*clblasA), 0,CL_MEM_READ_WRITE); - bufX = base->createEnqueueBuffer(X, (lengthX + params->offBX)* sizeof(*X), 0, CL_MEM_READ_ONLY); - bufY = base->createEnqueueBuffer(Y, (lengthY + params->offCY)* sizeof(*Y), 0, CL_MEM_READ_ONLY); - - ::std::cerr << "Calling reference xSYR2 routine... "; - - clblasOrder order; - clblasUplo fUplo; - - order = params->order; - fUplo = params->uplo; - - if (order != clblasColumnMajor) - { - order = clblasColumnMajor; - fUplo = (params->uplo == clblasUpper)? clblasLower : clblasUpper; - } - - ::clMath::blas::syr2( order, fUplo, params->N, alpha, X, params->offBX, params->incx, - Y, params->offCY, params->incy, blasA, params->offa, params->lda); - ::std::cerr << "Done" << ::std::endl; - - if ((bufA == NULL) || (bufX == NULL) || (bufY == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(blasA, clblasA, X, Y); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xSYR2 routine... "; - - err = (cl_int)::clMath::clblas::syr2( params->order, params->uplo, params->N, alpha, - bufX, params->offBX, params->incx, bufY, params->offCY, params->incy, bufA, params->offa, params->lda, - params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(blasA, clblasA, X, Y); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::SYR2() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufY); - deleteBuffers<T>(blasA, clblasA, X, Y); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufA, CL_TRUE, 0, - (lengthA + params->offa) * sizeof(*clblasA), clblasA, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "SYR2: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufA, bufX, bufY); - - compareMatrices<T>(clblasColumnMajor, params->N , params->N, (blasA + params->offa), (clblasA + params->offa), - params->lda); - - deleteBuffers<T>(blasA, clblasA, X, Y); - delete[] events; -} - -// Instantiate the test - -TEST_P(SYR2, ssyr2) { - TestParams params; - - getParams(¶ms); - syr2CorrectnessTest<cl_float>(¶ms); -} - -TEST_P(SYR2, dsyr2) { - TestParams params; - - getParams(¶ms); - syr2CorrectnessTest<cl_double>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-syr2k.cpp b/external/clBLAS/src/tests/correctness/corr-syr2k.cpp deleted file mode 100644 index d42c7f4c..00000000 --- a/external/clBLAS/src/tests/correctness/corr-syr2k.cpp +++ /dev/null @@ -1,260 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <syr2k.h> - -#include "tcase-filter.h" - -static void -releaseMemObjects(cl_mem objA, cl_mem objB, cl_mem objC) -{ - clReleaseMemObject(objA); - clReleaseMemObject(objB); - clReleaseMemObject(objC); -} - -template <typename T> static void -deleteBuffers(T *A, T *B, T *blasC, T *clblasC) -{ - delete[] A; - delete[] B; - delete[] blasC; - delete[] clblasC; -} - -template <typename T> -void -syr2kCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *B, *blasC, *clblasC; - T alpha, beta, a; - cl_mem bufA, bufB, bufC; - clMath::BlasBase *base; - bool useAlpha; - bool useBeta; - cl_event *events; - clblasTranspose transB; - bool isComplex; - - base = clMath::BlasBase::getInstance(); - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - if ((typeid(T) == typeid(FloatComplex)) || - (typeid(T) == typeid(DoubleComplex))) { - if (params->transA == clblasConjTrans) { - ::std::cerr << ">> syr2k(CONJUGATE_TRANSPOSE) for complex numbers " - "is not allowed." << ::std::endl << - ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - } - - isComplex = ((typeid(T) == typeid(FloatComplex)) || - (typeid(T) == typeid(DoubleComplex))); - if (canCaseBeSkipped(params, isComplex)) { - std::cerr << ">> Test is skipped because it has no importance for this " - "level of coverage" << std::endl; - SUCCEED(); - return; - } - - useAlpha = base->useAlpha(); - useBeta = base->useBeta(); - alpha = ZERO<T>(); - beta = ZERO<T>(); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - A = new T[params->rowsA * params->columnsA]; - B = new T[params->rowsB * params->columnsB]; - blasC = new T[params->rowsC * params->columnsC]; - clblasC = new T[params->rowsC * params->columnsC]; - - srand(params->seed); - if (useAlpha) { - alpha = convertMultiplier<T>(params->alpha); - } - if (useBeta) { - beta = convertMultiplier<T>(params->beta); - } - - ::std::cerr << "Generating input data... "; - if (!useAlpha) { - alpha = random<T>(100); - if (module(alpha) == 0.0) { - alpha = ONE<T>(); - } - } - - a = alpha * 2; - transB = (params->transA == clblasNoTrans) ? clblasTrans : - clblasNoTrans; - randomGemmMatrices<T>(params->order, params->transA, transB, - params->N, params->N, params->K, true, &a, A, params->lda, - B, params->ldb, useBeta, &beta, blasC, params->ldc); - memcpy(clblasC, blasC, params->rowsC * params->columnsC * sizeof(*blasC)); - ::std::cerr << "Done" << ::std::endl; - - ::std::cerr << "Calling reference xSYR2K routine... "; - if (params->order == clblasColumnMajor) { - ::clMath::blas::syr2k(clblasColumnMajor, params->uplo, params->transA, - params->N, params->K, alpha, A, - params->lda, B, params->ldb, beta, blasC, params->ldc); - } - else { - T *reorderedA = new T[params->rowsA * params->columnsA]; - T *reorderedB = new T[params->rowsB * params->columnsB]; - T *reorderedC = new T[params->rowsC * params->columnsC]; - - reorderMatrix<T>(clblasRowMajor, params->rowsA, params->columnsA, - A, reorderedA); - reorderMatrix<T>(clblasRowMajor, params->rowsB, params->columnsB, - B, reorderedB); - reorderMatrix<T>(clblasRowMajor, params->rowsC, params->columnsC, - blasC, reorderedC); - ::clMath::blas::syr2k(clblasColumnMajor, params->uplo, params->transA, - params->N, params->K, alpha, reorderedA, - params->rowsA, reorderedB, params->rowsB, - beta, reorderedC, params->rowsC); - reorderMatrix<T>(clblasColumnMajor, params->rowsC, params->columnsC, - reorderedC, blasC); - - delete[] reorderedC; - delete[] reorderedB; - delete[] reorderedA; - } - ::std::cerr << "Done" << ::std::endl; - - bufA = base->createEnqueueBuffer(A, params->rowsA * params->columnsA * - sizeof(*A), params->offA * sizeof(*A), - CL_MEM_READ_ONLY); - bufB = base->createEnqueueBuffer(B, params->rowsB * params->columnsB * - sizeof(*B), params->offBX * sizeof(*B), - CL_MEM_READ_ONLY); - bufC = base->createEnqueueBuffer(clblasC, params->rowsC * params->columnsC * - sizeof(*clblasC), - params->offCY * sizeof(*clblasC), - CL_MEM_READ_WRITE); - if ((bufA == NULL) || (bufB == NULL) || (bufC == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xSYR2K routine... "; - err = (cl_int)::clMath::clblas::syr2k(params->order, params->uplo, - params->transA, params->N, params->K, - alpha, bufA, params->offA, - params->lda, bufB, params->offBX, - params->ldb, beta, bufC, - params->offCY, params->ldc, - params->numCommandQueues, - base->commandQueues(), - 0, NULL, events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::SYR2K() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB, bufC); - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufC, CL_TRUE, - params->offCY * sizeof(*clblasC), - params->rowsC * params->columnsC * sizeof(*clblasC), - clblasC, 0, NULL, NULL); - - releaseMemObjects(bufA, bufB, bufC); - compareMatrices<T>(params->order, params->N, params->N, blasC, clblasC, - params->ldc); - - deleteBuffers<T>(A, B, blasC, clblasC); - delete[] events; -} - -// Instantiate the test - -TEST_P(SYR2K, ssyr2k) { - TestParams params; - - getParams(¶ms); - syr2kCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(SYR2K, dsyr2k) { - TestParams params; - - getParams(¶ms); - syr2kCorrectnessTest<cl_double>(¶ms); -} - - -TEST_P(SYR2K, csyr2k) { - TestParams params; - - getParams(¶ms); - syr2kCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(SYR2K, zsyr2k) { - TestParams params; - - getParams(¶ms); - syr2kCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-syrk.cpp b/external/clBLAS/src/tests/correctness/corr-syrk.cpp deleted file mode 100644 index 8977718d..00000000 --- a/external/clBLAS/src/tests/correctness/corr-syrk.cpp +++ /dev/null @@ -1,244 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <syrk.h> - -#include "tcase-filter.h" - -static void -releaseMemObjects(cl_mem objA, cl_mem objC) -{ - clReleaseMemObject(objA); - clReleaseMemObject(objC); -} - -template <typename T> static void -deleteBuffers(T *A, T *blasC, T *clblasC) -{ - delete[] A; - delete[] blasC; - delete[] clblasC; -} - -template <typename T> -void -syrkCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *blasC, *clblasC; - T alpha, beta; - cl_mem bufA, bufC; - clMath::BlasBase *base; - bool useAlpha; - bool useBeta; - cl_event *events; - bool isComplex; - - base = clMath::BlasBase::getInstance(); - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - if ((typeid(T) == typeid(FloatComplex)) || - (typeid(T) == typeid(DoubleComplex))) { - if (params->transA == clblasConjTrans) { - ::std::cerr << ">> syrk(CONJUGATE_TRANSPOSE) for complex numbers " - "is not allowed." << ::std::endl << - ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - } - - isComplex = ((typeid(T) == typeid(FloatComplex)) || - (typeid(T) == typeid(DoubleComplex))); - if (canCaseBeSkipped(params, isComplex)) { - std::cerr << ">> Test is skipped because it has no importance for this " - "level of coverage" << std::endl; - SUCCEED(); - return; - } - - useAlpha = base->useAlpha(); - useBeta = base->useBeta(); - alpha = ZERO<T>(); - beta = ZERO<T>(); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - A = new T[params->rowsA * params->columnsA]; - blasC = new T[params->rowsC * params->columnsC]; - clblasC = new T[params->rowsC * params->columnsC]; - - srand(params->seed); - if (useAlpha) { - alpha = convertMultiplier<T>(params->alpha); - } - if (useBeta) { - beta = convertMultiplier<T>(params->beta); - } - - ::std::cerr << "Generating input data... "; - if (!useAlpha) { - alpha = random<T>(100); - if (module(alpha) == 0.0) { - alpha = ONE<T>(); - } - } - - randomGemmMatrices<T>(params->order, params->transA, clblasNoTrans, - params->N, params->N, params->K, useAlpha, &alpha, A, params->lda, - NULL, 0, useBeta, &beta, blasC, params->ldc); - memcpy(clblasC, blasC, params->rowsC * params->columnsC * sizeof(*blasC)); - ::std::cerr << "Done" << ::std::endl; - - ::std::cerr << "Calling reference xSYRK routine... "; - if (params->order == clblasColumnMajor) { - ::clMath::blas::syrk(clblasColumnMajor, params->uplo, params->transA, - params->N, params->K, alpha, A, params->lda, - beta, blasC, params->ldc); - } - else { - T *reorderedA = new T[params->rowsA * params->columnsA]; - T *reorderedC = new T[params->rowsC * params->columnsC]; - - reorderMatrix<T>(clblasRowMajor, params->rowsA, params->columnsA, - A, reorderedA); - reorderMatrix<T>(clblasRowMajor, params->rowsC, params->columnsC, - blasC, reorderedC); - ::clMath::blas::syrk(clblasColumnMajor, params->uplo, params->transA, - params->N, params->K, alpha, reorderedA, - params->rowsA, - beta, reorderedC, params->rowsC); - reorderMatrix<T>(clblasColumnMajor, params->rowsC, params->columnsC, - reorderedC, blasC); - - delete[] reorderedC; - delete[] reorderedA; - } - ::std::cerr << "Done" << ::std::endl; - - bufA = base->createEnqueueBuffer(A, params->rowsA * params->columnsA * - sizeof(*A), params->offA * sizeof(*A), - CL_MEM_READ_ONLY); - bufC = base->createEnqueueBuffer(clblasC, params->rowsC * params->columnsC * - sizeof(*clblasC), - params->offCY * sizeof(*clblasC), - CL_MEM_READ_WRITE); - if ((bufA == NULL) || (bufC == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufC); - deleteBuffers<T>(A, blasC, clblasC); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xSYRK routine... "; - err = (cl_int)::clMath::clblas::syrk(params->order, params->uplo, - params->transA, params->N, params->K, - alpha, bufA, params->offA, params->lda, - beta, bufC, params->offCY, - params->ldc, params->numCommandQueues, - base->commandQueues(), 0, NULL, - events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufC); - deleteBuffers<T>(A, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::SYRK() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufC); - deleteBuffers<T>(A, blasC, clblasC); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufC, CL_TRUE, - params->offCY * sizeof(*clblasC), - params->rowsC * params->columnsC * sizeof(*clblasC), - clblasC, 0, NULL, NULL); - - releaseMemObjects(bufA, bufC); - compareMatrices<T>(params->order, params->N, params->N, blasC, clblasC, - params->ldc); - - deleteBuffers<T>(A, blasC, clblasC); - delete[] events; -} - -// Instantiate the test - -TEST_P(SYRK, ssyrk) { - TestParams params; - - getParams(¶ms); - syrkCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(SYRK, dsyrk) { - TestParams params; - - getParams(¶ms); - syrkCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(SYRK, csyrk) { - TestParams params; - - getParams(¶ms); - syrkCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(SYRK, zsyrk) { - TestParams params; - - getParams(¶ms); - syrkCorrectnessTest<DoubleComplex>(¶ms); -} - diff --git a/external/clBLAS/src/tests/correctness/corr-tbmv.cpp b/external/clBLAS/src/tests/correctness/corr-tbmv.cpp deleted file mode 100644 index 7b438ffe..00000000 --- a/external/clBLAS/src/tests/correctness/corr-tbmv.cpp +++ /dev/null @@ -1,233 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <tbmv.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX, cl_mem objXtemp) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objXtemp != NULL) - { - clReleaseMemObject(objXtemp); - } - -} - -template <typename T> static void -deleteBuffers(T *A, T *blasX, T *clblasX) -{ - if(A != NULL) - { - delete[] A; - } - if(blasX != NULL) - { - delete[] blasX; - } - if(clblasX != NULL) - { - delete[] clblasX; // To hold clblas TBMV call results - } -} - -template <typename T> -void -tbmvCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *blasX, *clblasX; - cl_mem bufA, bufX, bufXtemp; - clMath::BlasBase *base; - cl_event *events; - size_t lengthX, lengthA; - - base = clMath::BlasBase::getInstance(); - - if (( (typeid(T) == typeid(DoubleComplex)) || (typeid(T) == typeid(cl_double)) ) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - lengthA = params->N * params->lda ; - - lengthX = (params->N - 1)*abs(params->incx) + 1; - - A = new T[ lengthA + params->offA ]; - blasX = new T[ lengthX + params->offBX ]; - clblasX = new T[ lengthX + params->offBX ]; - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - if((A == NULL) || (blasX == NULL) || (clblasX == NULL)) - { - deleteBuffers<T>(A, blasX, clblasX); - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped!!!!!!!!!!!!" << ::std::endl; - delete[] events; - SUCCEED(); - return; - } - randomTbmvMatrices( params->N, (A + params->offA), params->lda, (blasX + params->offBX), params->incx ); - - // Copy blasY to clblasY - memcpy(clblasX, blasX, (lengthX + params->offBX)* sizeof(*blasX)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufA = base->createEnqueueBuffer(A, (lengthA + params->offA)* sizeof(*A), 0, CL_MEM_READ_WRITE); - bufX = base->createEnqueueBuffer(blasX, (lengthX + params->offBX)* sizeof(*blasX), 0, CL_MEM_READ_WRITE); - bufXtemp = base->createEnqueueBuffer(blasX, (lengthX + params->offBX)* sizeof(*blasX), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xTBMV routine... "; - - clblasOrder fOrder; - clblasTranspose fTrans; - clblasUplo fUplo; - fOrder = params->order; - fTrans = params->transA; - fUplo = params->uplo; - size_t fN = params->N, fK = params->K; - - if (fOrder != clblasColumnMajor) - { - fOrder = clblasColumnMajor; - fTrans = (params->transA == clblasNoTrans)? clblasTrans : clblasNoTrans; - fUplo = (params->uplo == clblasLower)? clblasUpper : clblasLower; - - if( params->transA == clblasConjTrans ) - doConjugate( (A + params->offA), 1, lengthA, params->lda ); - } - - clMath::blas::tbmv(fOrder, fUplo, fTrans, params->diag, fN, fK, A, params->offA, params->lda, blasX, params->offBX, params->incx); - ::std::cerr << "Done" << ::std::endl; - - if ((bufA == NULL) || (bufX == NULL)|| (bufXtemp == NULL)) { - // Skip the test, the most probable reason is - // matrix too big for a device. - - releaseMemObjects(bufA, bufX, bufXtemp ); - deleteBuffers<T>(A, blasX, clblasX); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xTBMV routine... "; - DataType type; - type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT:( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: - ( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT: TYPE_COMPLEX_DOUBLE; - - err = (cl_int)clMath::clblas::tbmv(type, params->order, params->uplo, params->transA, params->diag, params->N, params->K, - bufA, params->offA, params->lda, bufX, params->offBX, params->incx, bufXtemp, - params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufXtemp); - deleteBuffers<T>(A, blasX, clblasX); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::TBMV() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufXtemp); - deleteBuffers<T>(A, blasX, clblasX); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufX, CL_TRUE, 0, - (lengthX + params->offBX) * sizeof(*clblasX), clblasX, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "TBMV: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufA, bufX, bufXtemp); - compareMatrices<T>(clblasColumnMajor, lengthX , 1, (blasX + params->offBX), (clblasX + params->offBX), - lengthX); - deleteBuffers<T>(A, blasX, clblasX); - delete[] events; -} - -// Instantiate the test - -TEST_P(TBMV, stbmv) { - TestParams params; - - getParams(¶ms); - tbmvCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(TBMV, dtbmv) { - TestParams params; - - getParams(¶ms); - tbmvCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(TBMV, ctbmv) { - TestParams params; - - getParams(¶ms); - tbmvCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(TBMV, ztbmv) { - TestParams params; - - getParams(¶ms); - tbmvCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-tbsv.cpp b/external/clBLAS/src/tests/correctness/corr-tbsv.cpp deleted file mode 100644 index 17c59f3f..00000000 --- a/external/clBLAS/src/tests/correctness/corr-tbsv.cpp +++ /dev/null @@ -1,242 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <tbsv.h> -#include "trsv-delta.h" - -static void -releaseMemObjects(cl_mem objA, cl_mem objX) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objX != NULL) - { - clReleaseMemObject(objX); - } - -} - -template <typename T> static void -deleteBuffers(T *A, T *blasX, T *backX, cl_double *deltaX) -{ - if( A != NULL ) - { - delete[] A; - } - if( blasX != NULL ) - { - delete[] blasX; - } - if( backX != NULL ) - { - delete[] backX; - } - if( deltaX != NULL ) - { - delete[] deltaX; - } -} - -template <typename T> -void -tbsvCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *blasX, *clblasX; - cl_mem bufA, bufX; - cl_double *deltaX; - clMath::BlasBase *base; - cl_event *events; - size_t lengthX, lengthA; - - base = clMath::BlasBase::getInstance(); - - if (( (typeid(T) == typeid(DoubleComplex)) || (typeid(T) == typeid(cl_double)) ) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - lengthA = params->N * params->lda ; - lengthX = (params->N - 1)*abs(params->incx) + 1; - - A = new T[ lengthA + params->offA ]; - blasX = new T[ lengthX + params->offBX ]; - clblasX = new T[ lengthX + params->offBX ]; - deltaX = new cl_double[lengthX + params->offBX]; - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - if((A == NULL) || (blasX == NULL) || (clblasX == NULL)) - { - deleteBuffers<T>(A, blasX, clblasX, deltaX); - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped!!!!!!!!!!!!" << ::std::endl; - delete[] events; - SUCCEED(); - return; - } - memset( deltaX, 0, (lengthX + params->offBX)*sizeof(cl_double) ); - memset( blasX, 0, (lengthX + params->offBX)*sizeof(T)); - memset( clblasX, 0, (lengthX + params->offBX)*sizeof(T)); - - randomTbsvMatrices<T>( params->order, params->uplo, params->diag, params->N, params->K, - (A + params->offA), params->lda, (blasX + params->offBX), params->incx ); - - // Generate delta X for result comparison - tbsvDelta<T>( params->order, params->uplo, params->transA, params->diag, params->N, params->K, - (A + params->offA), params->lda, (blasX + params->offBX), params->incx, (deltaX + params->offBX) ); - - memcpy(clblasX, blasX, (lengthX + params->offBX)* sizeof(*blasX)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufA = base->createEnqueueBuffer(A, (lengthA + params->offA)* sizeof(*A), 0, CL_MEM_READ_WRITE); - bufX = base->createEnqueueBuffer(blasX, (lengthX + params->offBX)* sizeof(*blasX), 0, CL_MEM_READ_WRITE); - - ::std::cerr << "Calling reference xTBSV routine... "; - - clblasOrder fOrder; - clblasTranspose fTrans; - clblasUplo fUplo; - fOrder = params->order; - fTrans = params->transA; - fUplo = params->uplo; - size_t fN = params->N, fK = params->K; - - if (fOrder != clblasColumnMajor) - { - fOrder = clblasColumnMajor; - fTrans = (params->transA == clblasNoTrans)? clblasTrans : clblasNoTrans; - fUplo = (params->uplo == clblasLower)? clblasUpper : clblasLower; - - if( params->transA == clblasConjTrans ) - doConjugate( (A + params->offA), params->N, params->lda, params->lda ); - } - - clMath::blas::tbsv(fOrder, fUplo, fTrans, params->diag, fN, fK, A, params->offA, params->lda, blasX, params->offBX, params->incx); - ::std::cerr << "Done" << ::std::endl; - - if ((bufA == NULL) || (bufX == NULL)) { - // Skip the test, the most probable reason is - // matrix too big for a device. - - releaseMemObjects(bufA, bufX); - deleteBuffers<T>(A, blasX, clblasX, deltaX); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xTBSV routine... "; - DataType type; - type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT:( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: - ( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT: TYPE_COMPLEX_DOUBLE; - - err = (cl_int)clMath::clblas::tbsv(type, params->order, params->uplo, params->transA, params->diag, params->N, params->K, - bufA, params->offA, params->lda, bufX, params->offBX, params->incx, - params->numCommandQueues, base->commandQueues(), 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX); - deleteBuffers<T>(A, blasX, clblasX, deltaX); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::TBSV() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX); - deleteBuffers<T>(A, blasX, clblasX, deltaX); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufX, CL_TRUE, 0, - (lengthX + params->offBX) * sizeof(*clblasX), clblasX, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "TBSV: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufA, bufX); - compareMatrices<T>(clblasColumnMajor, lengthX , 1, (blasX + params->offBX), (clblasX + params->offBX), - lengthX, (deltaX + params->offBX) ); - deleteBuffers<T>(A, blasX, clblasX, deltaX); - delete[] events; -} - -// Instantiate the test - -TEST_P(TBSV, stbsv) { - TestParams params; - - getParams(¶ms); - tbsvCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(TBSV, dtbsv) { - TestParams params; - - getParams(¶ms); - tbsvCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(TBSV, ctbsv) { - TestParams params; - - getParams(¶ms); - tbsvCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(TBSV, ztbsv) { - TestParams params; - - getParams(¶ms); - tbsvCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-tpmv.cpp b/external/clBLAS/src/tests/correctness/corr-tpmv.cpp deleted file mode 100644 index 041154e9..00000000 --- a/external/clBLAS/src/tests/correctness/corr-tpmv.cpp +++ /dev/null @@ -1,252 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <tpmv.h> - -static void -releaseMemObjects(cl_mem objAP, cl_mem objX, cl_mem objXTemp) -{ - if(objAP != NULL) - { - clReleaseMemObject(objAP); - } - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objXTemp != NULL) - { - clReleaseMemObject(objXTemp); -} -} - -template <typename T> static void -deleteBuffers(T *AP, T *blasX, T *clblasX) -{ - if(AP != NULL) - { - delete[] AP; - } - if(blasX != NULL) - { - delete[] blasX; - } - if(clblasX != NULL) - { - delete[] clblasX; - } -} - -template <typename T> -void -tpmvCorrectnessTest(TestParams *params) -{ - cl_int err; - T *AP, *blasX, *clblasX; - cl_mem bufAP, bufX, bufXTemp; - clMath::BlasBase *base; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthAP = (params->N *( params->N + 1 ))/2 ; - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - - AP = new T[lengthAP + params->offa ]; - blasX = new T[lengthX + params->offBX ]; - clblasX = new T[lengthX + params->offBX ]; - - if((AP == NULL) || (blasX == NULL) || (clblasX == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(AP, blasX, clblasX); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - // Set data in A and X using populate() routine - int creationFlags = 0; - creationFlags = creationFlags | RANDOM_INIT | PACKED_MATRIX; - - // Default is Column-Major - creationFlags = ( (params-> order) == clblasRowMajor)? (creationFlags | ROW_MAJOR_ORDER) : (creationFlags); - creationFlags = ( (params-> uplo) == clblasLower)? (creationFlags | LOWER_HALF_ONLY) : (creationFlags | UPPER_HALF_ONLY); - BlasRoutineID BlasFn = CLBLAS_TRMV; - - // Populate A and blasX - populate( AP + params->offa, params-> N, params-> N, 0, BlasFn, creationFlags); - populate( blasX , (lengthX + params->offBX), 1, (lengthX + params->offBX), BlasFn); - - // Copy blasX to clblasX - memcpy(clblasX, blasX, (lengthX + params->offBX)* sizeof(*blasX)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufAP = base->createEnqueueBuffer(AP, (lengthAP + params->offa)* sizeof(*AP), 0, CL_MEM_READ_ONLY); - bufX = base->createEnqueueBuffer(clblasX, (lengthX + params->offBX)* sizeof(*clblasX), 0, CL_MEM_WRITE_ONLY); - bufXTemp = base->createEnqueueBuffer(NULL, lengthX * sizeof(*clblasX), 0, CL_MEM_READ_ONLY); - - //printData( "bufX", blasX, lengthX, 1, lengthX); - //printData( "clblasX", clblasX, lengthX, 1, lengthX); - - ::std::cerr << "Calling reference xTPMV routine... "; - - - clblasOrder order; - clblasUplo fUplo; - clblasTranspose fTrans; - - order = params->order; - fUplo = params->uplo; - fTrans = params->transA; - - if (order != clblasColumnMajor) - { - order = clblasColumnMajor; - fUplo = (params->uplo == clblasUpper)? clblasLower : clblasUpper; - fTrans = (params->transA == clblasNoTrans)? clblasTrans : clblasNoTrans; - - if( params->transA == clblasConjTrans ) - doConjugate( (AP +params->offa), (( params->N * (params->N + 1)) / 2) , 1, 1 ); - } - - ::clMath::blas::tpmv( order, fUplo, fTrans, params->diag, params->N, AP, params->offa, blasX, params->offBX, params->incx); - ::std::cerr << "Done" << ::std::endl; - - // Hold X vector - - if ((bufAP == NULL) || (bufX == NULL) || (bufXTemp == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufAP, bufX, bufXTemp); - deleteBuffers<T>(AP, blasX, clblasX); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xTPMV routine... "; - - DataType type; - type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT : ( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: ( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT:TYPE_COMPLEX_DOUBLE; - - // Should use bufXTemp as well - err = (cl_int)::clMath::clblas::tpmv( type, params->order, params->uplo, params->transA, params->diag, params->N, bufAP, - params->offa, bufX, params->offBX, params->incx, bufXTemp, params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufAP, bufX, bufXTemp); - deleteBuffers<T>(AP, blasX, clblasX); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::TPMV() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufAP, bufX, bufXTemp); - deleteBuffers<T>(AP, blasX, clblasX); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufX, CL_TRUE, 0, - (lengthX + params->offBX) * sizeof(*clblasX), clblasX, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "TPMV: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufAP, bufX, bufXTemp); - - compareMatrices<T>(clblasColumnMajor, lengthX , 1, (blasX + params->offBX), (clblasX + params->offBX), - lengthX); - deleteBuffers<T>(AP, blasX, clblasX); - delete[] events; -} - -// Instantiate the test - -TEST_P(TPMV, stpmv) { - TestParams params; - - getParams(¶ms); - tpmvCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(TPMV, dtpmv) { - TestParams params; - - getParams(¶ms); - tpmvCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(TPMV, ctpmv) { - TestParams params; - - getParams(¶ms); - tpmvCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(TPMV, ztpmv) { - TestParams params; - - getParams(¶ms); - tpmvCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-tpsv.cpp b/external/clBLAS/src/tests/correctness/corr-tpsv.cpp deleted file mode 100644 index 931d8214..00000000 --- a/external/clBLAS/src/tests/correctness/corr-tpsv.cpp +++ /dev/null @@ -1,252 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <tpsv.h> -#include <cltypes.h> - -#include "trsv-delta.h" - -static void -releaseMemObjects(cl_mem objA, cl_mem objX) -{ - if (objA != NULL) - clReleaseMemObject(objA); - if (objX != NULL) - clReleaseMemObject(objX); -} - -template <typename T> static void -deleteBuffers(T *A, T *blasX, T *backX, cl_double *deltaX) -{ - if( A != NULL ) - { - delete[] A; - } - if( blasX != NULL ) - { - delete[] blasX; - } - if( backX != NULL ) - { - delete[] backX; - } - if( deltaX != NULL ) - { - delete[] deltaX; -} -} - -template <typename T> -void -tpsvCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *blasX, *backX; - cl_double *deltaX; - cl_mem bufA, bufX; - clMath::BlasBase *base; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthA = (params->N * (params->N + 1)) / 2; - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - - A = new T[lengthA + params->offa]; - blasX = new T[lengthX + params->offBX]; - backX = new T[lengthX + params->offBX]; - deltaX = new cl_double[lengthX + params->offBX]; - - if ((A==NULL) || (blasX == NULL) || (backX == NULL) || (deltaX == NULL)) - { - ::std::cerr << "Unable to allocate matrices in Host memory" << std::endl; - deleteBuffers<T>(A, blasX, backX, deltaX); - delete[] events; - SUCCEED(); - return; - } - memset( deltaX, 0, lengthX*sizeof(cl_double) ); - memset( blasX, 0, lengthX*sizeof(T) ); - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - //custom generation function in blas-random.h - randomTrsvMatrices<T>( params->order, params->uplo, params->diag, params->N, (A + params->offa), 0, (blasX + params->offBX), params->incx); - - // Generate delta X for result comparison - trsvDelta<T>( params->order, params->uplo, params->transA, params->diag, params->N, (A + params->offa), 0, (blasX + params->offBX), params->incx, (deltaX + params->offBX) ); - - /*printf("\n\n before acml call\nA\n"); - printMatrixBlock( params->order, 0, 0, params->N, params->N, params->lda, A); - printf("\nX\n"); - printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, blasX);*/ - - // Copy blasX to clblasX - memcpy(backX, blasX, (lengthX + params->offBX) * sizeof(T)); - // Allocate buffers - bufA = base->createEnqueueBuffer(A, (lengthA + params->offa)* sizeof(T), 0, CL_MEM_READ_ONLY); - bufX = base->createEnqueueBuffer(backX, (lengthX + params->offBX)* sizeof(T), 0, CL_MEM_WRITE_ONLY); - ::std::cerr << "Done" << ::std::endl; - - ::std::cerr << "Calling reference xTPSV routine... "; - - clblasOrder order; - clblasUplo fUplo; - clblasTranspose fTrans; - - order = params->order; - fUplo = params->uplo; - fTrans = params->transA; - - - if (order != clblasColumnMajor) - { - order = clblasColumnMajor; - fUplo = (params->uplo == clblasUpper)? clblasLower : clblasUpper; - fTrans = (params->transA == clblasNoTrans)? clblasTrans : clblasNoTrans; - - if( params->transA == clblasConjTrans ) - doConjugate((A + params->offa), 1, lengthA, 1); - } - ::clMath::blas::tpsv( order, fUplo, fTrans, params->diag, params->N, A, params->offa, blasX, params->offBX, params->incx); - ::std::cerr << "Done" << ::std::endl; - - /* - printf("\n\n acml result X\n"); - printf("\nblasX\n"); - printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, blasX);*/ - - if ((bufA == NULL) || (bufX == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufX); - deleteBuffers<T>(A, blasX, backX, deltaX); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xTPSV routine... "; - - DataType type; - type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT : ( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: ( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT: TYPE_COMPLEX_DOUBLE; - - // Should use bufXTemp as well - err = (cl_int)::clMath::clblas::tpsv(type, params->order, params->uplo, params->transA, params->diag, params->N, bufA, - params->offa, bufX, params->offBX, params->incx, params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - - deleteBuffers<T>(A, blasX, backX, deltaX); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::TPSV() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - - deleteBuffers<T>(A, blasX, backX, deltaX); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufX, CL_TRUE, 0, - lengthX * sizeof(*backX), backX, 0, - NULL, NULL); - - releaseMemObjects(bufA, bufX); - - /* - printf("\n\n clblas result X\n"); - printf("\nclBlasX\n"); - printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, backX); - - printf("\n\n delta X\n\n"); - printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, deltaX);*/ - - // handle lda correctly based on row-major/col-major.. - compareMatrices<T>( clblasColumnMajor, lengthX , 1, blasX, backX, - lengthX, deltaX ); - deleteBuffers<T>(A, blasX, backX, deltaX); - delete[] events; -} - -// Instantiate the test - -TEST_P(TPSV, stpsv) { - TestParams params; - - getParams(¶ms); - tpsvCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(TPSV, dtpsv) { - TestParams params; - - getParams(¶ms); - tpsvCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(TPSV, ctpsv) { - TestParams params; - - getParams(¶ms); - tpsvCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(TPSV, ztpsv) { - TestParams params; - - getParams(¶ms); - tpsvCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-trmm.cpp b/external/clBLAS/src/tests/correctness/corr-trmm.cpp deleted file mode 100644 index c92e0e34..00000000 --- a/external/clBLAS/src/tests/correctness/corr-trmm.cpp +++ /dev/null @@ -1,215 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <trmm.h> - -#include "tcase-filter.h" - -static void -releaseMemObjects(cl_mem A, cl_mem B) -{ - clReleaseMemObject(A); - clReleaseMemObject(B); -} - -template <typename T> static void -deleteBuffers(T *A, T *blasB, T *clblasB) -{ - delete[] A; - delete[] blasB; - delete[] clblasB; -} - -template <typename T> -void -trmmCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *blasB, *clblasB; - T alpha; - cl_mem bufA, bufB; - clMath::BlasBase *base; - bool useAlpha; - cl_event *events; - bool isComplex; - - base = clMath::BlasBase::getInstance(); - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - isComplex = ((typeid(T) == typeid(FloatComplex)) || - (typeid(T) == typeid(DoubleComplex))); - if (canCaseBeSkipped(params, isComplex)) { - std::cerr << ">> Test is skipped because it has no importance for this " - "level of coverage" << std::endl; - SUCCEED(); - return; - } - - useAlpha = base->useAlpha(); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - A = new T[params->rowsA * params->columnsA]; - blasB = new T[params->rowsB * params->columnsB]; - clblasB = new T[params->rowsB * params->columnsB]; - alpha = ZERO<T>(); - - srand(params->seed); - if (useAlpha) { - alpha = convertMultiplier<T>(params->alpha); - } - - ::std::cerr << "Generating input data... "; - randomTrmmMatrices<T>(params->order, params->side, params->uplo, - params->diag, params->M, params->N, useAlpha, - &alpha, A, params->lda, blasB, params->ldb); - memcpy(clblasB, blasB, params->rowsB * params->columnsB * sizeof(*blasB)); - ::std::cerr << "Done" << ::std::endl; - - ::std::cerr << "Calling reference xTRMM routine... "; - if (params->order == clblasColumnMajor) { - ::clMath::blas::trmm(clblasColumnMajor, params->side, params->uplo, - params->transA, params->diag, params->M, params->N, alpha, - A, params->lda, blasB, params->ldb); - } - else { - T *reorderedA = new T[params->rowsA * params->columnsA]; - T *reorderedB = new T[params->rowsB * params->columnsB]; - - reorderMatrix<T>(clblasRowMajor, params->rowsA, params->columnsA, - A, reorderedA); - reorderMatrix<T>(clblasRowMajor, params->rowsB, params->columnsB, - blasB, reorderedB); - ::clMath::blas::trmm(clblasColumnMajor, params->side, params->uplo, - params->transA, params->diag, params->M, params->N, alpha, - reorderedA, params->rowsA, reorderedB, params->rowsB); - reorderMatrix<T>(clblasColumnMajor, params->rowsB, params->columnsB, - reorderedB, blasB); - - delete[] reorderedB; - delete[] reorderedA; - } - ::std::cerr << "Done" << ::std::endl; - - bufA = base->createEnqueueBuffer(A, params->rowsA * params->columnsA * - sizeof(*A), params->offA * sizeof(*A), - CL_MEM_READ_ONLY); - bufB = base->createEnqueueBuffer(clblasB, params->rowsB * params->columnsB * - sizeof(*clblasB), - params->offBX * sizeof(*clblasB), - CL_MEM_READ_WRITE); - if ((bufA == NULL) || (bufB == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufB); - deleteBuffers<T>(A, blasB, clblasB); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xTRMM routine... "; - err = (cl_int)::clMath::clblas::trmm(params->order, params->side, - params->uplo, params->transA, params->diag, params->M, params->N, - alpha, bufA, params->offA, params->lda, bufB, params->offBX, - params->ldb, params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB); - deleteBuffers<T>(A, blasB, clblasB); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::TRMM() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB); - deleteBuffers<T>(A, blasB, clblasB); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufB, CL_TRUE, - params->offBX * sizeof(*clblasB), - params->rowsB * params->columnsB * sizeof(*clblasB), - clblasB, 0, NULL, NULL); - - releaseMemObjects(bufA, bufB); - compareMatrices<T>(params->order, params->M, params->N, blasB, clblasB, - params->ldb); - deleteBuffers<T>(A, blasB, clblasB); - delete[] events; -} - -// Instantiate the test - -TEST_P(TRMM, strmm) { - TestParams params; - - getParams(¶ms); - trmmCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(TRMM, dtrmm) { - TestParams params; - - getParams(¶ms); - trmmCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(TRMM, ctrmm) { - TestParams params; - - getParams(¶ms); - trmmCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(TRMM, ztrmm) { - TestParams params; - - getParams(¶ms); - trmmCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-trmv.cpp b/external/clBLAS/src/tests/correctness/corr-trmv.cpp deleted file mode 100644 index 95089fc6..00000000 --- a/external/clBLAS/src/tests/correctness/corr-trmv.cpp +++ /dev/null @@ -1,258 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <trmv.h> - -static void -releaseMemObjects(cl_mem objA, cl_mem objX, cl_mem objXTemp) -{ - if(objA != NULL) - { - clReleaseMemObject(objA); - } - if(objX != NULL) - { - clReleaseMemObject(objX); - } - if(objXTemp != NULL) - { - clReleaseMemObject(objXTemp); -} -} - -template <typename T> static void -deleteBuffers(T *A, T *blasX, T *clblasX) -{ - if(A != NULL) - { - delete[] A; - } - if(blasX != NULL) - { - delete[] blasX; -} - if(clblasX != NULL) - { - delete[] clblasX; - } -} - -template <typename T> -void -trmvCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *blasX, *clblasX; - cl_mem bufA, bufX, bufXTemp; - clMath::BlasBase *base; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - printf("number of command queues : %d\n\n", params->numCommandQueues); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthA = params->N * params->lda; - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - - A = new T[lengthA + params->offa ]; - blasX = new T[lengthX + params->offBX ]; - clblasX = new T[lengthX + params->offBX ]; - - if((A == NULL) || (blasX == NULL) || (clblasX == NULL)) - { - ::std::cerr << "Cannot allocate memory on host side\n" << "!!!!!!!!!!!!Test skipped.!!!!!!!!!!!!" << ::std::endl; - deleteBuffers<T>(A, blasX, clblasX); - delete[] events; - SUCCEED(); - return; - } - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - // Set data in A and X using populate() routine - int creationFlags = 0; - creationFlags = creationFlags | RANDOM_INIT; - - // Default is Column-Major - creationFlags = ( (params-> order) == clblasRowMajor)? (creationFlags | ROW_MAJOR_ORDER) : (creationFlags); - creationFlags = ( (params-> uplo) == clblasLower)? (creationFlags | LOWER_HALF_ONLY) : (creationFlags | UPPER_HALF_ONLY); - BlasRoutineID BlasFn = CLBLAS_TRMV; - - // Populate A and blasX - populate( A + params->offa, params-> N, params-> N, params-> lda, BlasFn, creationFlags); - populate( blasX , (lengthX + params->offBX), 1, (lengthX + params->offBX), BlasFn); - - // Copy blasX to clblasX - memcpy(clblasX, blasX, (lengthX + params->offBX)* sizeof(*blasX)); - ::std::cerr << "Done" << ::std::endl; - - // Allocate buffers - bufA = base->createEnqueueBuffer(A, (lengthA + params->offa)* sizeof(*A), 0, CL_MEM_READ_ONLY); - bufX = base->createEnqueueBuffer(clblasX, (lengthX + params->offBX)* sizeof(*clblasX), 0, CL_MEM_WRITE_ONLY); - bufXTemp = base->createEnqueueBuffer(NULL, lengthX * sizeof(*clblasX), 0, CL_MEM_READ_ONLY); - - //printData( "bufX", blasX, lengthX, 1, lengthX); - //printData( "clblasX", clblasX, lengthX, 1, lengthX); - - ::std::cerr << "Calling reference xTRMV routine... "; - - - clblasOrder order; - clblasUplo fUplo; - clblasTranspose fTrans; - - order = params->order; - fUplo = params->uplo; - fTrans = params->transA; - - if (order != clblasColumnMajor) - { - order = clblasColumnMajor; - fUplo = (params->uplo == clblasUpper)? clblasLower : clblasUpper; - fTrans = (params->transA == clblasNoTrans)? clblasTrans : clblasNoTrans; - - if( params->transA == clblasConjTrans ) - doConjugate( (A + params->offa), params->N, params->N, params->lda ); - } - - ::clMath::blas::trmv( order, fUplo, fTrans, params->diag, params->N, A, params->offa, params->lda, blasX, params->offBX, params->incx); - ::std::cerr << "Done" << ::std::endl; - - // Hold X vector - - if ((bufA == NULL) || (bufX == NULL) || (bufXTemp == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufX, bufXTemp); - deleteBuffers<T>(A, blasX, clblasX); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xTRMV routine... "; - - DataType type; - type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT : ( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: ( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT:TYPE_COMPLEX_DOUBLE; - - // Should use bufXTemp as well - err = (cl_int)::clMath::clblas::trmv( type, params->order, params->uplo, params->transA, params->diag, params->N, bufA, - params->offa, params->lda, bufX, params->offBX, params->incx, bufXTemp, params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufXTemp); - deleteBuffers<T>(A, blasX, clblasX); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::TRMV() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufX, bufXTemp); - deleteBuffers<T>(A, blasX, clblasX); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - - err = clEnqueueReadBuffer(base->commandQueues()[0], bufX, CL_TRUE, 0, - (lengthX + params->offBX) * sizeof(*clblasX), clblasX, 0, - NULL, NULL); - if (err != CL_SUCCESS) - { - ::std::cerr << "TRMV: Reading results failed...." << std::endl; - } - - releaseMemObjects(bufA, bufX, bufXTemp); - - - // handle lda correctly based on row-major/col-major.. -// printData( "Ref blasX result:", blasX, lengthX, 1, lengthX); -// printData( "OpenCL clblasX result:", clblasX, lengthX, 1, lengthX); - - - compareMatrices<T>(clblasColumnMajor, lengthX , 1, (blasX + params->offBX), (clblasX + params->offBX), - lengthX); - deleteBuffers<T>(A, blasX, clblasX); - delete[] events; -} - -// Instantiate the test - -TEST_P(TRMV, strmv) { - TestParams params; - - getParams(¶ms); - trmvCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(TRMV, dtrmv) { - TestParams params; - - getParams(¶ms); - trmvCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(TRMV, ctrmv) { - TestParams params; - - getParams(¶ms); - trmvCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(TRMV, ztrmv) { - TestParams params; - - getParams(¶ms); - trmvCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/corr-trsm.cpp b/external/clBLAS/src/tests/correctness/corr-trsm.cpp deleted file mode 100644 index e53331ca..00000000 --- a/external/clBLAS/src/tests/correctness/corr-trsm.cpp +++ /dev/null @@ -1,454 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <trsm.h> - -#include "trsm-delta.h" -#include "tcase-filter.h" - -static void -releaseMemObjects(cl_mem A, cl_mem B) -{ - clReleaseMemObject(A); - clReleaseMemObject(B); -} - -template <typename T> static void -deleteBuffers(T *A, T *B, T *blasB, T *clblasB, cl_double *delta) -{ - delete[] A; - delete[] B; - delete[] blasB; - delete[] clblasB; - delete[] delta; -} - -template <typename T> -void -trsmCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *B, *blasB, *clblasB; - T alpha; - cl_mem bufA, bufB; - cl_double *delta; - clMath::BlasBase *base; - bool useAlpha; - cl_event *events; - bool isComplex; - - base = clMath::BlasBase::getInstance(); - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - isComplex = ((typeid(T) == typeid(FloatComplex)) || - (typeid(T) == typeid(DoubleComplex))); - if (canCaseBeSkipped(params, isComplex)) { - std::cerr << ">> Test is skipped because it has no importance for this " - "level of coverage" << std::endl; - SUCCEED(); - return; - } - - useAlpha = base->useAlpha(); - alpha = ZERO<T>(); - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - A = new T[params->rowsA * params->columnsA]; - B = new T[params->rowsB * params->columnsB]; - blasB = new T[params->rowsB * params->columnsB]; - clblasB = new T[params->rowsB * params->columnsB]; - delta = new cl_double[params->rowsB * params->columnsB]; - - srand(params->seed); - if (useAlpha) { - alpha = convertMultiplier<T>(params->alpha); - } - - ::std::cerr << "Generating input data... "; - - randomTrsmMatrices<T>(params->order, params->side, params->uplo, - params->diag, params->M, params->N, useAlpha, - &alpha, A, params->lda, B, params->ldb); - - memcpy(blasB, B, params->rowsB * params->columnsB * sizeof(*B)); - memcpy(clblasB, B, params->rowsB * params->columnsB * sizeof(*B)); - ::std::cerr << "Done" << ::std::endl; - - ::std::cerr << "Calling reference xTRSM routine... "; - if (params->order == clblasColumnMajor) { - ::clMath::blas::trsm(clblasColumnMajor, params->side, params->uplo, - params->transA, params->diag, params->M, params->N, alpha, A, - params->lda, blasB, params->ldb); - } - else { - T *reorderedA = new T[params->rowsA * params->columnsA]; - T *reorderedB = new T[params->rowsB * params->columnsB]; - - reorderMatrix<T>(clblasRowMajor, params->rowsA, params->columnsA, - A, reorderedA); - reorderMatrix<T>(clblasRowMajor, params->rowsB, params->columnsB, - blasB, reorderedB); - - ::clMath::blas::trsm(clblasColumnMajor, params->side, params->uplo, - params->transA, params->diag, params->M, params->N, alpha, - reorderedA, params->rowsA, reorderedB, params->rowsB); - - reorderMatrix<T>(clblasColumnMajor, params->rowsB, params->columnsB, - reorderedB, blasB); - - delete[] reorderedB; - delete[] reorderedA; - } - ::std::cerr << "Done" << ::std::endl; - - bufA = base->createEnqueueBuffer(A, params->rowsA * params->columnsA * - sizeof(*A), params->offA * sizeof(*A), - CL_MEM_READ_ONLY); - bufB = base->createEnqueueBuffer(clblasB, params->rowsB * params->columnsB * - sizeof(*clblasB), - params->offBX * sizeof(*clblasB), - CL_MEM_READ_WRITE); - if ((bufA == NULL) || (bufB == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufB); - deleteBuffers<T>(A, B, blasB, clblasB, delta); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xTRSM routine... "; - err = (cl_int)::clMath::clblas::trsm(params->order, params->side, - params->uplo, params->transA, params->diag, params->M, params->N, - alpha, bufA, params->offA, params->lda, bufB, params->offBX, - params->ldb, params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB); - deleteBuffers<T>(A, B, blasB, clblasB, delta); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::TRSM() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB); - deleteBuffers<T>(A, B, blasB, clblasB, delta); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufB, CL_TRUE, - params->offBX * sizeof(*clblasB), - params->rowsB * params->columnsB * sizeof(*clblasB), - clblasB, 0, NULL, NULL); - - releaseMemObjects(bufA, bufB); - - trsmDelta<T>(params->order, params->side, params->uplo, params->transA, - params->diag, params->M, params->N, A, params->lda, B, params->ldb, - alpha, delta); - - compareMatrices<T>(params->order, params->M, params->N, blasB, clblasB, - params->ldb, delta); - deleteBuffers<T>(A, B, blasB, clblasB, delta); - delete[] events; -} - -// Instantiate the test - -TEST_P(TRSM, strsm) { - TestParams params; - - getParams(¶ms); - trsmCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(TRSM, dtrsm) { - TestParams params; - - getParams(¶ms); - trsmCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(TRSM, ctrsm) { - TestParams params; - - getParams(¶ms); - trsmCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(TRSM, ztrsm) { - TestParams params; - - getParams(¶ms); - trsmCorrectnessTest<DoubleComplex>(¶ms); -} - - - -// ==================================== -// Adding some tests to catch bugs in the scenario where lda != M - - -int arithsum(int i) -{ - int j; - for(j=i-1; j>0; j--) - i += j; - return i; -} - -template <typename T> -void AssignA(T *A, size_t i, size_t j, size_t ld) -{ - A[i*ld + j] = j == i ? (j+1) : ( j > i ? 0 : 1.0 ); -} - -template <> -void AssignA(FloatComplex *A, size_t i, size_t j, size_t ld) -{ - FloatComplex *Ac = (FloatComplex *)A; - Ac[i*ld + j].s[0] = j == i ? (j+1) : ( j > i ? 0 : 1.0 ); - Ac[i*ld + j].s[1] = 0; -} - -template <> -void AssignA(DoubleComplex *A, size_t i, size_t j, size_t ld) -{ - DoubleComplex *Az = (DoubleComplex *)A; - Az[i*ld + j].s[0] = j == i ? (j+1) : ( j > i ? 0 : 1.0 ); - Az[i*ld + j].s[1] = 0; -} - -template <typename T> -void AssignB(T *B, size_t i, size_t j, size_t ld, size_t M) -{ - B[i*ld + j] = arithsum(M) - arithsum(j+1) + (j+1)*(j+1); -} - -template <> -void AssignB(FloatComplex *B, size_t i, size_t j, size_t ld, size_t M) -{ - FloatComplex *Bc = (FloatComplex *)B; - Bc[i*ld + j].s[0] = arithsum(M) - arithsum(j+1) + (j+1)*(j+1); - Bc[i*ld + j].s[1] = 0; -} - -template <> -void AssignB(DoubleComplex *B, size_t i, size_t j, size_t ld, size_t M) -{ - DoubleComplex *Bz = (DoubleComplex *)B; - Bz[i*ld + j].s[0] = arithsum(M) - arithsum(j+1) + (j+1)*(j+1); - Bz[i*ld + j].s[1] = 0; -} - -template <typename T> -void local_assert(T x, T y, T d) -{ - ASSERT_NEAR(x, y, d); -} - -template <> -void local_assert<FloatComplex>(FloatComplex x, FloatComplex y, FloatComplex d) -{ - ASSERT_NEAR(x.s[0], y.s[0], d.s[0]); - ASSERT_NEAR(x.s[1], y.s[1], d.s[1]); -} - -template <> -void local_assert<DoubleComplex>(DoubleComplex x, DoubleComplex y, DoubleComplex d) -{ - ASSERT_NEAR(x.s[0], y.s[0], d.s[0]); - ASSERT_NEAR(x.s[1], y.s[1], d.s[1]); -} - - -template <typename T> -void Extratest(size_t M, size_t N, size_t lda, size_t ldb, T alpha, T delta) -{ - T *A, *B, *blasB, *clblasB; - cl_mem bufA, bufB; - clMath::BlasBase *base; - cl_event *events; - cl_int err; - - base = clMath::BlasBase::getInstance(); - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - - clblasOrder order = clblasColumnMajor; - clblasSide side = clblasLeft; - clblasUplo uplo = clblasUpper; - clblasTranspose trans = clblasNoTrans; - clblasDiag diag = clblasNonUnit; - - A = new T[M * lda]; - B = new T[N * ldb]; - blasB = new T[N * ldb]; - clblasB = new T[N * ldb]; - - memset(A, 0, M*lda*sizeof(T)); - memset(B, 0, N*ldb*sizeof(T)); - - for(int i=0; i<M; i++) // down each column - { - for(int j=0; j<M; j++) // down each row - { - AssignA<T>(A, i, j, lda); - } - } - - for(int i=0; i<N; i++) // down each column - { - for(int j=0; j<M; j++) // down each row - { - AssignB<T>(B, i, j, ldb, M); - } - } - - memcpy(blasB, B, N*ldb*sizeof(T)); - memcpy(clblasB, B, N*ldb*sizeof(T)); - - ::std::cerr << "Calling reference xTRSM routine... "; - ::clMath::blas::trsm(order, side, uplo, trans, diag, M, N, alpha, A, lda, blasB, ldb); - - - bufA = base->createEnqueueBuffer(A, M*lda*sizeof(T), 0, CL_MEM_READ_ONLY); - bufB = base->createEnqueueBuffer(clblasB, N*ldb*sizeof(T), 0, CL_MEM_READ_WRITE); - - events = new cl_event[1]; - memset(events, 0, sizeof(cl_event)); - - if ((bufA == NULL) || (bufB == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufB); - deleteBuffers<T>(A, B, blasB, clblasB, NULL); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xTRSM routine... "; - err = (cl_int)::clMath::clblas::trsm(order, side, uplo, trans, diag, M, N, alpha, bufA, 0, lda, bufB, 0, ldb, - 1, base->commandQueues(), 0, NULL, events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB); - deleteBuffers<T>(A, B, blasB, clblasB, NULL); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::TRSM() failed"; - } - - err = waitForSuccessfulFinish(1, base->commandQueues(), events); - if (err != CL_SUCCESS) { - releaseMemObjects(bufA, bufB); - deleteBuffers<T>(A, B, blasB, clblasB, NULL); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufB, CL_TRUE, - 0, N*ldb*sizeof(T), clblasB, 0, NULL, NULL); - - releaseMemObjects(bufA, bufB); - - // Validate the answer - for(int i=0; i<N; i++) // down each column - { - for(int j=0; j<ldb; j++) // down each row - { - local_assert(blasB[i*ldb + j], clblasB[i*ldb + j], delta); - } - } - - deleteBuffers<T>(A, B, blasB, clblasB, NULL); - delete[] events; -} - -#define ETST_TOLERENCE 1E-5 - -TEST(TRSM_extratest, strsm) -{ - Extratest<float>(5, 2, 32, 32, 1.0f, ETST_TOLERENCE); -} - -TEST(TRSM_extratest, dtrsm) -{ - Extratest<double>(5, 2, 32, 32, 1.0, ETST_TOLERENCE); -} - -TEST(TRSM_extratest, ctrsm) -{ - FloatComplex alpha = floatComplex(1.0f, 0); - FloatComplex delta = floatComplex(ETST_TOLERENCE, ETST_TOLERENCE); - Extratest<FloatComplex>(5, 2, 32, 32, alpha, delta); -} - -TEST(TRSM_extratest, ztrsm) -{ - DoubleComplex alpha = doubleComplex(1.0, 0); - DoubleComplex delta = doubleComplex(ETST_TOLERENCE, ETST_TOLERENCE); - Extratest<DoubleComplex>(5, 2, 32, 32, alpha, delta); -}
\ No newline at end of file diff --git a/external/clBLAS/src/tests/correctness/corr-trsv.cpp b/external/clBLAS/src/tests/correctness/corr-trsv.cpp deleted file mode 100644 index b0ed0caf..00000000 --- a/external/clBLAS/src/tests/correctness/corr-trsv.cpp +++ /dev/null @@ -1,252 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <stdlib.h> // srand() -#include <string.h> // memcpy() -#include <gtest/gtest.h> -#include <clBLAS.h> - -#include <common.h> -#include <blas-internal.h> -#include <blas-wrapper.h> -#include <clBLAS-wrapper.h> -#include <BlasBase.h> -#include <blas-random.h> -#include <trsv.h> -#include <cltypes.h> - -#include "trsv-delta.h" - -static void -releaseMemObjects(cl_mem objA, cl_mem objX) -{ - if (objA != NULL) - clReleaseMemObject(objA); - if (objX != NULL) - clReleaseMemObject(objX); -} - -template <typename T> static void -deleteBuffers(T *A, T *blasX, T *backX, cl_double *deltaX) -{ - if( A != NULL ) - { - delete[] A; - } - if( blasX != NULL ) - { - delete[] blasX; - } - if( backX != NULL ) - { - delete[] backX; - } - if( deltaX != NULL ) - { - delete[] deltaX; -} -} - -template <typename T> -void -trsvCorrectnessTest(TestParams *params) -{ - cl_int err; - T *A, *blasX, *backX; - cl_double *deltaX; - cl_mem bufA, bufX; - clMath::BlasBase *base; - cl_event *events; - - base = clMath::BlasBase::getInstance(); - - if ((typeid(T) == typeid(cl_double) || - typeid(T) == typeid(DoubleComplex)) && - !base->isDevSupportDoublePrecision()) { - - std::cerr << ">> WARNING: The target device doesn't support native " - "double precision floating point arithmetic" << - std::endl << ">> Test skipped" << std::endl; - SUCCEED(); - return; - } - - events = new cl_event[params->numCommandQueues]; - memset(events, 0, params->numCommandQueues * sizeof(cl_event)); - - size_t lengthA = params->N * params->lda; - size_t lengthX = (1 + ((params->N -1) * abs(params->incx))); - - A = new T[lengthA + params->offa]; - blasX = new T[lengthX + params->offBX]; - backX = new T[lengthX + params->offBX]; - deltaX = new cl_double[lengthX + params->offBX]; - - if ((A==NULL) || (blasX == NULL) || (backX == NULL) || (deltaX == NULL)) - { - ::std::cerr << "Unable to allocate matrices in Host memory" << std::endl; - deleteBuffers<T>(A, blasX, backX, deltaX); - delete[] events; - SUCCEED(); - return; - } - memset( deltaX, 0, lengthX*sizeof(cl_double) ); - memset( blasX, 0, lengthX*sizeof(T) ); - - srand(params->seed); - - ::std::cerr << "Generating input data... "; - - //custom generation function in blas-random.h - randomTrsvMatrices<T>( params->order, params->uplo, params->diag, params->N, (A + params->offa), params->lda, (blasX + params->offBX), params->incx); - - // Generate delta X for result comparison - trsvDelta<T>( params->order, params->uplo, params->transA, params->diag, params->N, (A + params->offa), params->lda, (blasX + params->offBX), params->incx, (deltaX + params->offBX) ); - - /*printf("\n\n before acml call\nA\n"); - printMatrixBlock( params->order, 0, 0, params->N, params->N, params->lda, A); - printf("\nX\n"); - printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, blasX);*/ - - // Copy blasX to clblasX - memcpy(backX, blasX, (lengthX + params->offBX) * sizeof(T)); - // Allocate buffers - bufA = base->createEnqueueBuffer(A, (lengthA + params->offa)* sizeof(T), 0, CL_MEM_READ_ONLY); - bufX = base->createEnqueueBuffer(backX, (lengthX + params->offBX)* sizeof(T), 0, CL_MEM_WRITE_ONLY); - ::std::cerr << "Done" << ::std::endl; - - ::std::cerr << "Calling reference xTRSV routine... "; - - clblasOrder order; - clblasUplo fUplo; - clblasTranspose fTrans; - - order = params->order; - fUplo = params->uplo; - fTrans = params->transA; - - - if (order != clblasColumnMajor) - { - order = clblasColumnMajor; - fUplo = (params->uplo == clblasUpper)? clblasLower : clblasUpper; - fTrans = (params->transA == clblasNoTrans)? clblasTrans : clblasNoTrans; - - if( params->transA == clblasConjTrans ) - doConjugate((A + params->offa), params->N, params->N, params->lda ); - } - ::clMath::blas::trsv( order, fUplo, fTrans, params->diag, params->N, A, params->offa, params->lda, blasX, params->offBX, params->incx); - ::std::cerr << "Done" << ::std::endl; - - /* - printf("\n\n acml result X\n"); - printf("\nblasX\n"); - printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, blasX);*/ - - if ((bufA == NULL) || (bufX == NULL)) { - /* Skip the test, the most probable reason is - * matrix too big for a device. - */ - releaseMemObjects(bufA, bufX); - deleteBuffers<T>(A, blasX, backX, deltaX); - delete[] events; - ::std::cerr << ">> Failed to create/enqueue buffer for a matrix." - << ::std::endl - << ">> Can't execute the test, because data is not transfered to GPU." - << ::std::endl - << ">> Test skipped." << ::std::endl; - SUCCEED(); - return; - } - - ::std::cerr << "Calling clblas xTRSV routine... "; - - DataType type; - type = ( typeid(T) == typeid(cl_float))? TYPE_FLOAT : ( typeid(T) == typeid(cl_double))? TYPE_DOUBLE: ( typeid(T) == typeid(cl_float2))? TYPE_COMPLEX_FLOAT: TYPE_COMPLEX_DOUBLE; - - // Should use bufXTemp as well - err = (cl_int)::clMath::clblas::trsv(type, params->order, params->uplo, params->transA, params->diag, params->N, bufA, - params->offa, params->lda, bufX, params->offBX, params->incx, params->numCommandQueues, base->commandQueues(), - 0, NULL, events); - - if (err != CL_SUCCESS) { - - deleteBuffers<T>(A, blasX, backX, deltaX); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "::clMath::clblas::TRSV() failed"; - } - - err = waitForSuccessfulFinish(params->numCommandQueues, - base->commandQueues(), events); - if (err != CL_SUCCESS) { - - deleteBuffers<T>(A, blasX, backX, deltaX); - delete[] events; - ASSERT_EQ(CL_SUCCESS, err) << "waitForSuccessfulFinish()"; - } - ::std::cerr << "Done" << ::std::endl; - - clEnqueueReadBuffer(base->commandQueues()[0], bufX, CL_TRUE, 0, - lengthX * sizeof(*backX), backX, 0, - NULL, NULL); - - releaseMemObjects(bufA, bufX); - - /* - printf("\n\n clblas result X\n"); - printf("\nclBlasX\n"); - printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, backX); - - printf("\n\n delta X\n\n"); - printMatrixBlock( clblasColumnMajor, 0, 0, lengthX, 1, lengthX, deltaX);*/ - - // handle lda correctly based on row-major/col-major.. - compareMatrices<T>( clblasColumnMajor, lengthX , 1, blasX, backX, - lengthX, deltaX ); - deleteBuffers<T>(A, blasX, backX, deltaX); - delete[] events; -} - -// Instantiate the test - -TEST_P(TRSV, strsv) { - TestParams params; - - getParams(¶ms); - trsvCorrectnessTest<cl_float>(¶ms); -} - -TEST_P(TRSV, dtrsv) { - TestParams params; - - getParams(¶ms); - trsvCorrectnessTest<cl_double>(¶ms); -} - -TEST_P(TRSV, ctrsv) { - TestParams params; - - getParams(¶ms); - trsvCorrectnessTest<FloatComplex>(¶ms); -} - -TEST_P(TRSV, ztrsv) { - TestParams params; - - getParams(¶ms); - trsvCorrectnessTest<DoubleComplex>(¶ms); -} diff --git a/external/clBLAS/src/tests/correctness/delta.h b/external/clBLAS/src/tests/correctness/delta.h deleted file mode 100644 index c0f4b23f..00000000 --- a/external/clBLAS/src/tests/correctness/delta.h +++ /dev/null @@ -1,36 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - -#ifndef DELTA_H_ -#define DELTA_H_ - -#include <blas-math.h> -#include <common.h> - -// Type-dependant constants -template <class T> -static cl_double DELTA_0(); -template<> -__template_static cl_double DELTA_0<cl_float>() { return pow(2.0, -20); } -template<> -__template_static cl_double DELTA_0<cl_double>() { return pow(2.0, -50); } -template<> -__template_static cl_double DELTA_0<FloatComplex>() { return pow(2.0, -20); } -template<> -__template_static cl_double DELTA_0<DoubleComplex>() { return pow(2.0, -50); } - -#endif // DELTA_H - diff --git a/external/clBLAS/src/tests/correctness/tcase-filter.cpp b/external/clBLAS/src/tests/correctness/tcase-filter.cpp deleted file mode 100644 index 35a892b1..00000000 --- a/external/clBLAS/src/tests/correctness/tcase-filter.cpp +++ /dev/null @@ -1,219 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include "tcase-filter.h" - -#if defined(SHORT_TESTS) || defined(MEDIUM_TESTS) - -static __inline size_t -selectSize(size_t orig, size_t alt) -{ - return (orig) ? orig : alt; -} - -static size_t -nonZeroSize(size_t size1, size_t size2, size_t size3) -{ - size_t r = 0; - - if (size1) { - r = size1; - } - else if (size2) { - r = size2; - } - else { - r = size3; - } - - return r; -} - -static int -sizeEquCount(size_t size1, size_t size2, size_t size3) -{ - int cnt = 0; - - cnt += static_cast<int>(size1 == size2); - cnt += static_cast<int>(size2 == size3); - cnt += static_cast<int>(size1 == size3); - - return cnt; -} - -static __inline bool -isEquToAny(size_t size, size_t alt1, size_t alt2, size_t alt3) -{ - return ((size == alt1) || (size == alt2) || (size == alt3)); -} - -static __inline bool -isRealConjugation(const TestParams *params, bool isComplex) -{ - return !isComplex && - ((params->transA == clblasConjTrans) || - (params->transB == clblasConjTrans)); -} - -#endif /* SHORT_TESTS || MEDIUM_TESTS */ - -#if defined(SHORT_TESTS) - -bool -canCaseBeSkipped(const TestParams *params, bool isComplex) -{ - size_t s; - size_t m, n, k, lda, ldb, ldc; - - // skip cases with conjugated transposition for real data - if (isRealConjugation(params, isComplex)) { - return true; - } - - /* - * Enable only cases at which all the problem dimensions are equal - * to each other - */ - s = nonZeroSize(params->M, params->N, params->K); - m = selectSize(params->M, s); - n = selectSize(params->N, s); - k = selectSize(params->K, s); - if (sizeEquCount(m, n, k) < 3) { - return true; - } - - /* - * filter BigLDA cases - */ - /* - s = nonZeroSize(params->lda, params->ldb, params->ldc); - lda = selectSize(params->lda, s); - ldb = selectSize(params->ldb, s); - ldc = selectSize(params->ldc, s); - if (sizeEquCount(lda, ldb, ldc) < 3) { - return true; - } - - if (!isEquToAny(lda, m, n, k)) { - return true; - } - */ - return false; -} - -#elif defined(MEDIUM_TESTS) /* SHORT_TESTS */ - -#include <algorithm> - -#include <stdio.h> - -/* - * Evaluate best vector length that buffer with such leading dimension - * would have for such leading dimension. - */ -static unsigned int -prognozedVecLen(size_t ld) -{ - size_t u = static_cast<size_t>(1) << (sizeof(size_t) * 8 - 1); - size_t vecLen; - - // typically vecLen will not exceed 8 - ld %= 8; - if (ld == 0) { - return 8; - } - else if (ld == 1) { - return 1; - } - - // find the highest non zero bit - for (; (u != 0) && !(u & ld); u >>= 1); - - /* - * Evaluated as minimum of modules based operation results against - * upper and lower power of 2 bounds - */ - vecLen = ld - u; - u >>= 1; - vecLen = ::std::min(vecLen, u - ld); - - return static_cast<unsigned int>(vecLen); -} - -bool -canCaseBeSkipped(const TestParams *params, bool isComplex) -{ - size_t s; - size_t m, n, k, lda, ldb, ldc; - int bigCnt = 0; - unsigned int vecLen; - - // skip cases with conjugated transposition for real data - if (isRealConjugation(params, isComplex)) { - return true; - } - - // set of cases for extended versions is really tiny, so enable them all - if (params->offA || params->offBX || params->offCY) { - return false; - } - - s = nonZeroSize(params->M, params->N, params->K); - m = selectSize(params->M, s); - n = selectSize(params->N, s); - k = selectSize(params->K, s); - - // enable BigLDA cases when problem dimensions all are equal to each other - s = nonZeroSize(params->lda, params->ldb, params->ldc); - lda = selectSize(params->lda, s); - ldb = selectSize(params->ldb, s); - ldc = selectSize(params->ldc, s); - bigCnt += static_cast<int>(!isEquToAny(lda, m, n, k)); - bigCnt += static_cast<int>(!isEquToAny(ldb, m, n, k)); - bigCnt += static_cast<int>(!isEquToAny(ldc, m, n, k)); - if (bigCnt) { - if (sizeEquCount(m, n, k) < 3) { - return true; - } - else { - return false; - } - } - - // enable only cases at which buffers will have the same vectorization - vecLen = prognozedVecLen(lda); - if ((prognozedVecLen(ldb) != vecLen) || - (prognozedVecLen(ldc) != vecLen)) { - - return true; - } - - return false; -} - -#else /* MEDIUM_TESTS */ - -bool -canCaseBeSkipped(const TestParams *params, bool isComplex) -{ - (void)params; - (void)isComplex; - return false; -} - -#endif /* !SHORT_TESTS && !MEDIUM_TESTS */ - diff --git a/external/clBLAS/src/tests/correctness/tcase-filter.h b/external/clBLAS/src/tests/correctness/tcase-filter.h deleted file mode 100644 index 4ba1bdd8..00000000 --- a/external/clBLAS/src/tests/correctness/tcase-filter.h +++ /dev/null @@ -1,30 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -/* - * Filter for skipping test cases when run time is more important than - * coverage - */ - -#ifndef TCASEFILTER_H_ -#define TCASEFILTER_H_ - -#include <cmdline.h> - -bool canCaseBeSkipped(const TestParams *params, bool isComplex); - -#endif /* TCASEFILTER_H_ */ diff --git a/external/clBLAS/src/tests/correctness/test-correctness.cpp b/external/clBLAS/src/tests/correctness/test-correctness.cpp deleted file mode 100644 index 75da51ab..00000000 --- a/external/clBLAS/src/tests/correctness/test-correctness.cpp +++ /dev/null @@ -1,3415 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#define DO_GEMM -#define DO_TRMM -#define DO_TRSM -#define DO_SYR2K -#define DO_SYRK -#define DO_GEMV -#define DO_SYMV -#define DO_SYMM -#define DO_TRMV -#define DO_TPMV -#define DO_TRSV -#define DO_SYR -#define DO_SPR -#define DO_GER -#define DO_GERC -#define DO_SYR2 -#define DO_HER -#define DO_HER2 -#define DO_HEMM -#define DO_HEMV -#define DO_HPMV -#define DO_SPMV -#define DO_SBMV -#define DO_HERK -#define DO_TPSV -#define DO_HPR -#define DO_SPR2 -#define DO_HPR2 -#define DO_GBMV -#define DO_HBMV -#define DO_TBMV -#define DO_TBSV -#define DO_HER2K -#define DO_SWAP -#define DO_COPY -#define DO_SCAL -#define DO_AXPY -#define DO_DOT -#define DO_DOTC -#define DO_ROTG -#define DO_ROTM -#define DO_ROT -#define DO_ROTMG -#define DO_NRM2 -#define DO_ASUM -#define DO_iAMAX - -//#define DO_SPL - Only used for special case testing (for devel purposes) -//#define DO_GEMM_2 - This needs to remain commented. - -#include <gtest/gtest.h> -#include <BlasBase.h> -#include <ExtraTestSizes.h> -#include <gemm.h> -#include <gemm-2.h> -#include <trmm.h> -#include <trsm.h> -#include <gemv.h> -#include <symv.h> -#include <syr2k.h> -#include <syrk.h> -#include <trsv.h> -#include <trmv.h> -#include <tpmv.h> -#include <symm.h> -#include <syr.h> -#include <sbmv.h> -#include <spr.h> -#include <ger.h> -#include <gerc.h> -#include <syr2.h> -#include <her.h> -#include <her2.h> -#include <hemv.h> -#include <hpmv.h> -#include <spmv.h> -#include <hemm.h> -#include <herk.h> -#include <tpsv.h> -#include <hpr.h> -#include <spr2.h> -#include <hpr2.h> -#include <gbmv.h> -#include <hbmv.h> -#include <tbmv.h> -#include <tbsv.h> -#include <her2k.h> -#include <swap.h> -#include <scal.h> -#include <copy.h> -#include <axpy.h> -#include <dot.h> -#include <asum.h> -#include <dotc.h> -#include <rotg.h> -#include <rotm.h> -#include <rot.h> -#include <rotmg.h> -#include <nrm2.h> -#include <iamax.h> - -using ::testing::TestWithParam; -using ::testing::Values; -using ::testing::ValuesIn; -using ::testing::Combine; - -TestParams globalTestParams; - -// Different ranges of test parameters - -static const clblasOrder orderSet[] = - { clblasColumnMajor, clblasRowMajor }; -static const clblasTranspose transSet[] = - { clblasNoTrans, clblasTrans, clblasConjTrans }; -static const clblasSide sideSet[] = - { clblasLeft, clblasRight }; -static const clblasUplo uploSet[] = - { clblasUpper, clblasLower }; -static const clblasDiag diagSet[] = - { clblasUnit, clblasNonUnit }; - -const size_t ZERO_VAL[1] = { 0 }; -const int ONE_VAL[1] = { 1 }; -const int verySmallRange[] = -{1, 3, 5, 10, 11, 15, 16, 23, 21, 32, 33, 45, 40, 63, 333, 1024, 1025, 4096, 4223}; -const int completeRange[] = -{1, 3, 5, 10, 11, 15, 16, 23, 21, 32, 33, 45, 40, 63, 333, 1024, 1025, 4096, 4223}; -#if defined SHORT_TESTS -const int smallRange[] = - { 63, 128 }; - -const int numQueues[] = - { 2 }; -#elif defined MEDIUM_TESTS /* SHORT_TESTS */ -const int smallRange[] = - { 15, 64, 133 }; -const int numQueues[] = - { 3, 4 }; -#else /* MEDIUM_TESTS */ -const int smallRange[] = - { 15, 16, 33, 40, 62, 64, 128, 129, 256, 258 }; - //{ 15, 16, 32, 33, 63, 64, 128, 129, 256, 257 }; - //{ 3, 4, 15, 16, 32, 33, 63, 64, 128, 129, 256, 257, 333, 566, 787, 1024, 1025, 1113, 1111, 999, 883, 633, 17 }; - -const int numQueues[] = - { 2, 3, 4, 5, 6, 7 }; -#endif /* !SHORT_TESTS && !MEDIUM_TESTS */ - -#if defined(SHORT_TESTS) || defined(MEDIUM_TESTS) - -enum { - BIG_LDA = 500, - BIG_LDB = 600, - BIG_LDC = 700 -}; - -const int incs[] = - { 33, -33 }; - -#else /* SHORT_TESTS || MEDIUM_TESTS */ - -enum { - BIG_LDA = 501, - BIG_LDB = 602, - BIG_LDC = 703 -}; - -const int incs[] = - { 1, -1, 33, -33 }; - -#endif /* !SHORT_TESTS && !MEDIUM_TESTS */ - -#if defined(SHORT_TESTS) || defined(MEDIUM_TESTS) -const size_t offs[] = - { 63, 258 }; -#else /* !SHORT_TESTS && !MEDIUM_TESTS */ -const size_t offs[] = - {0, 63, 128, 258 }; -#endif - -const int ldaRange[] = {0, 3192, 4097 }; -const int offsetRange[] = { 0, 100 }; -const double realAlphaRange[] = {(double)50, (double)100, (double)999999}; -const cl_float2 complexAlphaRange[] = {floatComplex(0,1), floatComplex(3,4)}; -const cl_float2 complexAlpha = floatComplex(2,3); - -const ComplexLong alphaBetaRange[] = {{50,50}, {20,20}}; -const ComplexLong alphaBeta = {10,10}; -const ComplexLong sflagRange[] = {{-1,0}, {0,0}, {1,0}, {-2,0}}; - -const ComplexLong rotCosMedium = {0, 3}; -const ComplexLong rotSinMedium = {0, 4}; - -const ComplexLong rotCosShort = {1, 6}; -const ComplexLong rotSinShort = {1, 2}; - -#ifdef DO_SPL - -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeHER2_SPL, HER2, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), ValuesIn(complexAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange),ValuesIn(offsetRange),ValuesIn(ldaRange), - Values(1) ) ); -#endif - - - -#ifdef DO_HEMV - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_HEMV, HEMV, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), Values((size_t)0), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_0HEMV, HEMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(15), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), Values((size_t)0), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(order_HEMV, HEMV, Combine( - ValuesIn(orderSet), Values(clblasLower), ValuesIn(smallRange), Values(alphaBeta), - Values(alphaBeta), ValuesIn(offs), Values((size_t)0), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(uplo_HEMV, HEMV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), ValuesIn(offs), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(alpha_beta_HEMV, HEMV, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), Values((size_t)0), ValuesIn(offs), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedBig_0HEMV, HEMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(1500, 5101), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), Values((size_t)0), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#else -INSTANTIATE_TEST_CASE_P(ALL_HEMV, HEMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(alphaBetaRange), - ValuesIn(alphaBetaRange), ValuesIn(offs), ValuesIn(offs), ValuesIn(offs), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), - Values(1))); - -#endif // Correctness - -#endif - -#ifdef DO_SWAP -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(SmallRange, SWAPXY, Combine( - Values(100,50), Values(0), Values(1), Values(0), Values(1), Values(1) ) ); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Medium_SWAP, SWAPXY, Combine( - Values(64,128,256,512), Values(0,3), Values(1,-1), Values(0,3), Values(1,-1), Values(1))); - -#else -INSTANTIATE_TEST_CASE_P(ALL_SWAP, SWAPXY, Combine( - ValuesIn(completeRange), ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(incs), Values(1))); - -#endif -#endif - -#ifdef DO_AXPY -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Small_AXPY, AXPY, Combine( - Values(100,50), ValuesIn(alphaBetaRange), Values(0), Values(1), Values(0), Values(1), Values(1) ) ); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Medium_AXPY, AXPY, Combine( - Values(64,128,256,512), ValuesIn(alphaBetaRange), Values(0,3), Values(1,-1), Values(0,3), Values(1,-1), Values(1))); - -#else -INSTANTIATE_TEST_CASE_P(ALL_AXPY, AXPY, Combine( - ValuesIn(completeRange), ValuesIn(alphaBetaRange), ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(incs), Values(1))); - -#endif -#endif - -#ifdef DO_ROTG -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Small_ROTG, ROTG, Combine( - Values(1, 5), Values(1, 6), Values(2, 8), Values(3, 7), Values(1) ) ); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Medium_ROTG, ROTG, Combine( - Values(64,128,256,512), Values(64, 128, 256, 512), Values(0,3), Values(0,3), Values(1))); - -#else -INSTANTIATE_TEST_CASE_P(ALL_ROTG, ROTG, Combine( - ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(offsetRange), Values(1))); - -#endif -#endif - -#ifdef DO_ROTM -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Small_ROTM, ROTM, Combine( - Values(1, 5, 10, 20), Values(1, 6), Values(1, -1), Values(1, 6), Values(1, -1), Values(1, 6), ValuesIn(sflagRange), Values(1))); -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Medium_ROTM, ROTM, Combine( - Values(64,128,256,512), Values(0,3), Values(1, -3, 3, 1), Values(0,3), Values(1, -3, 3, 1), Values(0, 3), ValuesIn(sflagRange), Values(1))); -#else -INSTANTIATE_TEST_CASE_P(ALL_ROTM, ROTM, Combine( - ValuesIn(completeRange), ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(incs), - ValuesIn(offsetRange), ValuesIn(sflagRange), Values(1))); -#endif -#endif - -#ifdef DO_ROT -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Small_ROT, ROT, Combine( - Values(1, 5, 10, 20), Values(1, 6), Values(1, -1), Values(1, 6), Values(1, -1), Values(rotCosShort), Values(rotSinShort), Values(1))); -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Medium_ROT, ROT, Combine( - Values(64,128,256,512), Values(0,3), Values(1, -3, 3, 1), Values(0,3), Values(1, -3, 3, 1), Values(rotCosMedium), Values(rotSinMedium), Values(1))); -#else -INSTANTIATE_TEST_CASE_P(ALL_ROT, ROT, Combine( - ValuesIn(completeRange), ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(incs), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); -#endif -#endif - -#ifdef DO_ROTMG -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Small_ROTMG, ROTMG, Combine( - Values(1, 6), Values(1, 6), Values(1, 6), Values(1, 6), Values(1, 6), ValuesIn(sflagRange), Values(1))); -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Medium_ROTMG, ROTMG, Combine( - Values(1, 3, 15), Values(0, 3, 15), Values(0, 3, 15), Values(0, 3, 15), Values(0, 3, 15), ValuesIn(sflagRange), Values(1))); -#else -INSTANTIATE_TEST_CASE_P(ALL_ROTMG, ROTMG, Combine( - ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(offsetRange), - ValuesIn(offsetRange), ValuesIn(sflagRange), Values(1))); -#endif -#endif - -//NRM2 - -#ifdef DO_NRM2 - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_NRM2, NRM2, Combine( - ValuesIn(smallRange), Values(1), Values(1), Values(1), Values(1)) ); - -INSTANTIATE_TEST_CASE_P(SelectedSmall0_NRM2, NRM2, Combine( - Values(61), Values(4, -11), Values(0), Values(1), Values(1)) ); - - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Medium_NRM2, NRM2, Combine( - ValuesIn(smallRange), Values(-10), Values(1), Values(1), Values(1) ) ); - -INSTANTIATE_TEST_CASE_P(SelectedBig0_NRM2, NRM2, Combine( - Values(4900), Values(1), Values(4), Values(1), Values(1) ) ); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_NRM2, NRM2, Combine( - ValuesIn(completeRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), Values(1) ) ); - -#endif // Correctness -#endif - -#ifdef DO_ASUM - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_ASUM, ASUM, Combine( - ValuesIn(smallRange), Values(1), Values(1), Values(1), Values(1)) ); - -INSTANTIATE_TEST_CASE_P(SelectedSmall0_ASUM, ASUM, Combine( - Values(61), Values(4, -11), Values(0), Values(1), Values(1)) ); - - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Medium_ASUM, ASUM, Combine( - ValuesIn(smallRange), Values(-10), Values(1), Values(1), Values(1) ) ); - -INSTANTIATE_TEST_CASE_P(SelectedBig0_ASUM, ASUM, Combine( - Values(4900), Values(1), Values(4), Values(1), Values(1) ) ); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_ASUM, ASUM, Combine( - ValuesIn(completeRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), Values(1) ) ); - -#endif // Correctness -#endif - -#ifdef DO_iAMAX - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_iAMAX, iAMAX, Combine( - ValuesIn(smallRange), Values(1), Values(1), Values(1), Values(1)) ); - -INSTANTIATE_TEST_CASE_P(SelectedSmall0_iAMAX, iAMAX, Combine( - Values(61), Values(4, -1), Values(0), Values(1), Values(1)) ); - - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Medium_iAMAX, iAMAX, Combine( - ValuesIn(smallRange), Values(-10), Values(1), Values(1), Values(1) ) ); - -INSTANTIATE_TEST_CASE_P(SelectedBig0_iAMAX, iAMAX, Combine( - Values(4900), Values(1), Values(4), Values(1), Values(1) ) ); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_iAMAX, iAMAX, Combine( - ValuesIn(completeRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), Values(1) ) ); - -#endif // Correctness -#endif - -#ifdef DO_HPMV - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_HPMV, HPMV, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), Values((size_t)0), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_0HPMV, HPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(15), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), Values((size_t)0), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(order_HPMV, HPMV, Combine( - ValuesIn(orderSet), Values(clblasLower), ValuesIn(smallRange), Values(alphaBeta), - Values(alphaBeta), ValuesIn(offs), Values((size_t)0), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(uplo_HPMV, HPMV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), ValuesIn(offs), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(alpha_beta_HPMV, HPMV, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), Values((size_t)0), ValuesIn(offs), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedBig_0HPMV, HPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(1500, 5101), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), Values((size_t)0), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#else -INSTANTIATE_TEST_CASE_P(ALL_HPMV, HPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(alphaBetaRange), - ValuesIn(alphaBetaRange), ValuesIn(offs), ValuesIn(offs), ValuesIn(offs), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), - Values(1))); - -#endif // Correctness - -#endif - -#ifdef DO_SYMM - -#if defined(SHORT_TESTS) -/*INSTANTIATE_TEST_CASE_P(Short_SYMM, SYMM, Combine( - Values(clblasRowMajor), Values(clblasLeft),Values(clblasLower), ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(complexAlphaRange), - ValuesIn(complexAlphaRange), Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 0)), Values(1)));*/ -INSTANTIATE_TEST_CASE_P(SelectedSmall_0SYMM, SYMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet),ValuesIn(uploSet), Values(15),Values(15), Values(complexAlpha), - Values(complexAlpha), Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 0)), Values(1))); - -#elif defined(MEDIUM_TESTS) -/*INSTANTIATE_TEST_CASE_P(order_SYMM, SYMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet),Values(clblasLower), ValuesIn(smallRange),ValuesIn(smallRange) ,ValuesIn(complexAlphaRange), - ValuesIn(complexAlphaRange), Values(clMath::ExtraTestSizes(0, 0, 0, 9, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(uplo_SYMM, SYMM, Combine( - Values(clblasRowMajor), Values(clblasLeft),ValuesIn(uploSet), ValuesIn(smallRange),ValuesIn(smallRange), ValuesIn(complexAlphaRange), - ValuesIn(complexAlphaRange), Values(clMath::ExtraTestSizes(0, 0, 0, 0, 9, 0)), Values(1)));*/ -INSTANTIATE_TEST_CASE_P(alpha_beta_SYMM, SYMM, Combine( - Values(clblasRowMajor), Values(clblasLeft),Values(clblasLower), Values(64),Values(133), Values(complexAlpha), - Values(complexAlpha), Values(clMath::ExtraTestSizes(0, (size_t)0, (size_t)0, 3, 7, 11)), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedBig_0SYMM, SYMM, Combine( - ValuesIn(orderSet), Values(clblasLeft),Values(clblasLower), Values(1100),Values(4000), Values(complexAlpha), - Values(complexAlpha), Values(clMath::ExtraTestSizes(0, (size_t)0, (size_t)0, 0, 0, 0)), Values(1))); - -#else -INSTANTIATE_TEST_CASE_P(ALL_SYMM_FriendlyOffsets, SYMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet),ValuesIn(uploSet), ValuesIn(smallRange),ValuesIn(smallRange), ValuesIn(complexAlphaRange), - ValuesIn(complexAlphaRange), Values(clMath::ExtraTestSizes(0, (size_t)0, (size_t)0, 64, 32, 128)), - Values(1))); -INSTANTIATE_TEST_CASE_P(ALL_SYMM_UnfriendlyOffsets, SYMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet),ValuesIn(uploSet), ValuesIn(smallRange),ValuesIn(smallRange), ValuesIn(complexAlphaRange), - ValuesIn(complexAlphaRange), Values(clMath::ExtraTestSizes(0, (size_t)0, (size_t)0, 6, 3, 12)), - Values(1))); - -#endif // Correctness -#endif - - -#ifdef DO_HEMM - -#if defined(SHORT_TESTS) -/*INSTANTIATE_TEST_CASE_P(Short_HEMM, HEMM, Combine( - Values(clblasRowMajor), Values(clblasLeft),Values(clblasLower), ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(complexAlphaRange), - ValuesIn(complexAlphaRange), Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 0)), Values(1)));*/ -INSTANTIATE_TEST_CASE_P(SelectedSmall_0HEMM, HEMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet),ValuesIn(uploSet), Values(15),Values(15), Values(complexAlpha), - Values(complexAlpha), Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 0)), Values(1))); - -#elif defined(MEDIUM_TESTS) -/*INSTANTIATE_TEST_CASE_P(order_HEMM, HEMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet),Values(clblasLower), ValuesIn(smallRange),ValuesIn(smallRange) ,ValuesIn(complexAlphaRange), - ValuesIn(complexAlphaRange), Values(clMath::ExtraTestSizes(0, 0, 0, 9, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(uplo_HEMM, HEMM, Combine( - Values(clblasRowMajor), Values(clblasLeft),ValuesIn(uploSet), ValuesIn(smallRange),ValuesIn(smallRange), ValuesIn(complexAlphaRange), - ValuesIn(complexAlphaRange), Values(clMath::ExtraTestSizes(0, 0, 0, 0, 9, 0)), Values(1)));*/ -INSTANTIATE_TEST_CASE_P(alpha_beta_HEMM, HEMM, Combine( - Values(clblasRowMajor), Values(clblasLeft),Values(clblasLower), Values(64),Values(133), Values(complexAlpha), - Values(complexAlpha), Values(clMath::ExtraTestSizes(0, (size_t)0, (size_t)0, 0, 0, 9)), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedBig_0HEMM, HEMM, Combine( - ValuesIn(orderSet), Values(clblasLeft),Values(clblasLower), Values(1010),Values( 4000), Values(complexAlpha), - Values(complexAlpha), Values(clMath::ExtraTestSizes(0, (size_t)0, (size_t)0, 0, 1, 0)), Values(1))); - -#else -INSTANTIATE_TEST_CASE_P(ALL_HEMM, HEMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet),ValuesIn(uploSet), ValuesIn(smallRange),ValuesIn(smallRange), ValuesIn(complexAlphaRange), ValuesIn(complexAlphaRange), Values(clMath::ExtraTestSizes(0, (size_t)512, (size_t)511, 9, 0, 0)), Values(1))); - -#endif // Correctness -#endif - - -#ifdef DO_SPMV - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_SPMV, SPMV, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), Values((size_t)0), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_0SPMV, SPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(15), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), Values((size_t)0), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(order_SPMV, SPMV, Combine( - ValuesIn(orderSet), Values(clblasLower), ValuesIn(smallRange), Values(alphaBeta), - Values(alphaBeta), ValuesIn(offs), Values((size_t)0), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(uplo_SPMV, SPMV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), ValuesIn(offs), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(alpha_beta_SPMV, SPMV, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), Values((size_t)0), ValuesIn(offs), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedBig_0SPMV, SPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(1500, 5101), Values(alphaBeta), - Values(alphaBeta), Values((size_t)0), Values((size_t)0), Values((size_t)0), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#else -INSTANTIATE_TEST_CASE_P(ALL_SPMV, SPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(alphaBetaRange), - ValuesIn(alphaBetaRange), ValuesIn(offs), ValuesIn(offs), ValuesIn(offs), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), - Values(1))); - -#endif // Correctness - -#endif - - -#ifdef DO_GEMM_2 -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_BigLDA_OFF_NX, GEMM2, Combine( - Values(clblasColumnMajor), Values(clblasNoTrans), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 501, 502, 1, 3, 10)), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_BigLDA_OFF_TN, GEMM2, Combine( - Values(clblasColumnMajor), Values(clblasTrans), Values(clblasNoTrans ), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, (size_t)501, (size_t)502, 3, 2, 1)), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_BigLDA_OFF_HN, GEMM2, Combine( - Values(clblasColumnMajor), Values(clblasConjTrans), Values(clblasNoTrans ), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, (size_t)501, (size_t)502, 3, 2, 1)), Values(1))); - -#if !defined(SHORT_TESTS) && !defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_NX, GEMM2, Combine( - Values(clblasColumnMajor), Values(clblasNoTrans), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_TN, GEMM2, Combine( - Values(clblasColumnMajor), Values(clblasTrans), Values(clblasNoTrans ), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_HN, GEMM2, Combine( - Values(clblasColumnMajor), Values(clblasConjTrans), Values(clblasNoTrans ), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes()), Values(1))); -#endif - -#endif //DO_GEMM_2 - -#ifdef DO_GEMM -// xGEMM tests -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange, GEMM, Combine( - Values(clblasColumnMajor), ValuesIn(transSet), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange, GEMM, Combine( - Values(clblasRowMajor), ValuesIn(transSet), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes()), Values(1))); -// We know, that SmallRange does not have values more that 257, -// so lda is set to 500. -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_BigLDA, GEMM, Combine( - Values(clblasColumnMajor), ValuesIn(transSet), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 501, 502, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange_BigLDA, GEMM, Combine( - Values(clblasRowMajor), ValuesIn(transSet), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 501, 502, 0, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_BigLDA_OffSet, GEMM, Combine( - Values(clblasColumnMajor), ValuesIn(transSet), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 501, 502, 1, 0, 0)), Values(1))); - -// Cases for extended versions with offsets - -#if defined(SHORT_TESTS) || defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_0, GEMM, Combine( - Values(clblasColumnMajor), ValuesIn(transSet), ValuesIn(transSet), - Values(67), Values(138), Values(220), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 600, 700)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_0, GEMM, Combine( - Values(clblasRowMajor), ValuesIn(transSet), ValuesIn(transSet), - Values(67), Values(138), Values(220), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 600, 700)), Values(1))); - -#else /* SHORT_TESTS || MEDIUM_TESTS */ - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_0, GEMM, Combine( - Values(clblasColumnMajor), ValuesIn(transSet), ValuesIn(transSet), - Values(67), Values(135), Values(228), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_1, GEMM, Combine( - Values(clblasColumnMajor), ValuesIn(transSet), ValuesIn(transSet), - Values(64), Values(64), Values(64), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 501, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_2, GEMM, Combine( - Values(clblasColumnMajor), ValuesIn(transSet), ValuesIn(transSet), - Values(128), Values(64), Values(77), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 502)), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_3, GEMM, Combine( - Values(clblasColumnMajor), ValuesIn(transSet), ValuesIn(transSet), - Values(112), Values(86), Values(68), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 501, 502)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_0, GEMM, Combine( - Values(clblasRowMajor), ValuesIn(transSet), ValuesIn(transSet), - Values(67), Values(135), Values(228), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_1, GEMM, Combine( - Values(clblasRowMajor), ValuesIn(transSet), ValuesIn(transSet), - Values(64), Values(64), Values(64), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 501, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_2, GEMM, Combine( - Values(clblasRowMajor), ValuesIn(transSet), ValuesIn(transSet), - Values(128), Values(64), Values(77), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 502)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_3, GEMM, Combine( - Values(clblasRowMajor), ValuesIn(transSet), ValuesIn(transSet), - Values(112), Values(86), Values(68), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 501, 502)), Values(1))); - -#endif /* !SHORT_TESTS || !MEDIUM_TESTS */ - -// Big matrices -#if !defined SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(SelectedBig_0, GEMM, Combine( - ValuesIn(orderSet), - Values(clblasNoTrans), Values(clblasNoTrans), - Values(2801), Values(2903), Values(3005), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(SelectedBig_1, GEMM, Combine( - ValuesIn(orderSet), - Values(clblasNoTrans), Values(clblasNoTrans), - Values(4777), Values(4333), Values(5000), - Values(clMath::ExtraTestSizes()), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_2, GEMM, Combine( - ValuesIn(orderSet), - Values(clblasTrans), Values(clblasNoTrans), - Values(5777), Values(5333), Values(3000), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedBig_3, GEMM, Combine( - ValuesIn(orderSet), - Values(clblasTrans), Values(clblasConjTrans), - Values(6777), Values(3333), Values(3000), - Values(clMath::ExtraTestSizes()), Values(1))); - -#endif // !MEDIUM_TESTS -#endif // !SHORT_TESTS - -// Small matrices and Custom cases - -INSTANTIATE_TEST_CASE_P(SelectedSmall_0, GEMM, Combine( - ValuesIn(orderSet), - Values(clblasNoTrans), Values(clblasNoTrans), - Values(1), Values(1), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(SelectedSmall_1, GEMM, Combine( - ValuesIn(orderSet), - Values(clblasNoTrans), Values(clblasNoTrans), - Values(2), Values(1), Values(3), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(SelectedSmall_2, GEMM, Combine( - ValuesIn(orderSet), - Values(clblasTrans), Values(clblasNoTrans), - Values(3), Values(2), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_3, GEMM, Combine( - ValuesIn(orderSet), - Values(clblasTrans), Values(clblasConjTrans), - Values(4), Values(3), Values(2), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_4, GEMM, Combine( - ValuesIn(orderSet), - Values(clblasConjTrans), Values(clblasNoTrans), - Values(17), Values(13), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); - - // Custom test - use command line arguments to tweak it -INSTANTIATE_TEST_CASE_P(Custom, GEMM, Combine( - ValuesIn(orderSet), ValuesIn(transSet), ValuesIn(transSet), - Values(32), Values(32), Values(32), - Values(clMath::ExtraTestSizes()), Values(1))); - -#endif /* !MEDIUM_TESTS */ -#endif /* !SHORT_TESTS */ - -#endif // DO_GEMM - - -#ifdef DO_TRMM -// xTRMM tests - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange, TRMM, Combine( - Values(clblasColumnMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange, TRMM, Combine( - Values(clblasRowMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes()), Values(1))); -// We know, that SmallRange does not have values more that 257, -// so lda is set to 500. -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_BigLDA, TRMM, Combine( - Values(clblasColumnMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 501, 0, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange_BigLDA, TRMM, Combine( - Values(clblasRowMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 501, 0, 0, 0, 0)), Values(1))); - -#if defined(SHORT_TESTS) || defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_0, TRMM, Combine( - Values(clblasColumnMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(115), Values(158), - Values(clMath::ExtraTestSizes(0, 0, 0, 502, 606, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_0, TRMM, Combine( - Values(clblasRowMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(115), Values(158), - Values(clMath::ExtraTestSizes(0, 0, 0, 502, 606, 0)), Values(1))); - -#else /* SHORT_TESTS || MEDIUM_TESTS */ - -// Cases for extended versions with offsets -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_0, TRMM, Combine( - Values(clblasColumnMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(115), Values(113), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_1, TRMM, Combine( - Values(clblasColumnMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(128), Values(66), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 501, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_2, TRMM, Combine( - Values(clblasColumnMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(53), Values(67), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 501, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_0, TRMM, Combine( - Values(clblasRowMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(115), Values(113), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_1, TRMM, Combine( - Values(clblasRowMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(128), Values(66), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 501, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_2, TRMM, Combine( - Values(clblasRowMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(53), Values(67), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 501, 0)), Values(1))); - -#endif /* !SHORT_TESTS && !MEDIUM_TESTS */ - -// Big matrices - -#if !defined SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(SelectedBig_0, TRMM, Combine( - ValuesIn(orderSet), - Values(clblasRight), Values(clblasUpper), Values(clblasTrans), - Values(clblasNonUnit), - Values(2801), Values(2903), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(SelectedBig_1, TRMM, Combine( - ValuesIn(orderSet), - Values(clblasRight), Values(clblasUpper), Values(clblasTrans), - Values(clblasNonUnit), - Values(4567), Values(4321), - Values(clMath::ExtraTestSizes()), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_2, TRMM, Combine( - ValuesIn(orderSet), - Values(clblasLeft), Values(clblasUpper), Values(clblasNoTrans), - Values(clblasNonUnit), - Values(5567), Values(5321), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedBig_3, TRMM, Combine( - ValuesIn(orderSet), - Values(clblasLeft), Values(clblasLower), Values(clblasTrans), - Values(clblasUnit), - Values(6567), Values(3321), - Values(clMath::ExtraTestSizes()), Values(1))); - -#endif // !MEDIUM_TESTS -#endif // !SHORT_TESTS - -// Small matrices and Custom tests - -INSTANTIATE_TEST_CASE_P(SelectedSmall_0, TRMM, Combine( - ValuesIn(orderSet), - Values(clblasRight), Values(clblasUpper), Values(clblasTrans), - Values(clblasNonUnit), - Values(1), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(SelectedSmall_1, TRMM, Combine( - ValuesIn(orderSet), - Values(clblasRight), Values(clblasUpper), Values(clblasTrans), - Values(clblasNonUnit), - Values(2), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(SelectedSmall_2, TRMM, Combine( - ValuesIn(orderSet), - Values(clblasLeft), Values(clblasUpper), Values(clblasNoTrans), - Values(clblasNonUnit), - Values(3), Values(2), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_3, TRMM, Combine( - ValuesIn(orderSet), - Values(clblasLeft), Values(clblasLower), Values(clblasTrans), - Values(clblasUnit), - Values(4), Values(3), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_4, TRMM, Combine( - ValuesIn(orderSet), - Values(clblasLeft), Values(clblasUpper), Values(clblasNoTrans), - Values(clblasUnit), - Values(17), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); - -// Custom test - use command line arguments to tweak it -INSTANTIATE_TEST_CASE_P(Custom, TRMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(32), Values(32), - Values(clMath::ExtraTestSizes()), Values(1))); - - -#endif /* !MEDIUM_TESTS */ -#endif /* !SHORT_TESTS */ -#endif // DO_TRMM - -#ifdef DO_TRSM -// xTRSM tests - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange, TRSM, Combine( - Values(clblasColumnMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange, TRSM, Combine( - Values(clblasRowMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes()), Values(1))); -// We know, that SmallRange does not have values more that 257, -// so lda is set to 500. -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_BigLDA, TRSM, Combine( - Values(clblasColumnMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 501, 0, 0, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange_BigLDA, TRSM, Combine( - Values(clblasRowMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 501, 0, 0, 0, 0)), Values(1))); - -#if defined(SHORT_TESTS) || defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_0, TRSM, Combine( - Values(clblasColumnMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(115), Values(158), - Values(clMath::ExtraTestSizes(0, 0, 0, 502, 606, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_0, TRSM, Combine( - Values(clblasRowMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(115), Values(158), - Values(clMath::ExtraTestSizes(0, 0, 0, 502, 606, 0)), Values(1))); - -#else /* SHORT_TESTS || MEDIUM_TESTS */ - -// Cases for extended versions with offsets -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_0, TRSM, Combine( - Values(clblasColumnMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(115), Values(113), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_1, TRSM, Combine( - Values(clblasColumnMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(128), Values(66), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 501, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_2, TRSM, Combine( - Values(clblasColumnMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(53), Values(67), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 501, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_0, TRSM, Combine( - Values(clblasRowMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(115), Values(113), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_1, TRSM, Combine( - Values(clblasRowMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(128), Values(66), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 501, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_2, TRSM, Combine( - Values(clblasRowMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(53), Values(67), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 501, 0)), Values(1))); - -#endif /* !SHORT_TESTS && !MEDIUM_TESTS */ - -// Big matrices - -#if !defined SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(SelectedBig_0, TRSM, Combine( - ValuesIn(orderSet), - Values(clblasRight), Values(clblasUpper), Values(clblasTrans), - Values(clblasNonUnit), - Values(2801), Values(2903), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(SelectedBig_1, TRSM, Combine( - ValuesIn(orderSet), - Values(clblasRight), Values(clblasUpper), Values(clblasTrans), - Values(clblasNonUnit), - Values(4567), Values(4321), - Values(clMath::ExtraTestSizes()), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_2, TRSM, Combine( - ValuesIn(orderSet), - Values(clblasLeft), Values(clblasUpper), Values(clblasNoTrans), - Values(clblasNonUnit), - Values(5567), Values(5321), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedBig_3, TRSM, Combine( - ValuesIn(orderSet), - Values(clblasLeft), Values(clblasLower), Values(clblasTrans), - Values(clblasUnit), - Values(6567), Values(3321), - Values(clMath::ExtraTestSizes()), Values(1))); - -#endif // !MEDIUM_TESTS -#endif // !SHORT_TESTS - -// Small matrices and Custom tests - -INSTANTIATE_TEST_CASE_P(SelectedSmall_0, TRSM, Combine( - ValuesIn(orderSet), - Values(clblasRight), Values(clblasUpper), Values(clblasTrans), - Values(clblasNonUnit), - Values(1), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(SelectedSmall_1, TRSM, Combine( - ValuesIn(orderSet), - Values(clblasRight), Values(clblasUpper), Values(clblasTrans), - Values(clblasNonUnit), - Values(2), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(SelectedSmall_2, TRSM, Combine( - ValuesIn(orderSet), - Values(clblasLeft), Values(clblasUpper), Values(clblasNoTrans), - Values(clblasNonUnit), - Values(3), Values(2), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_3, TRSM, Combine( - ValuesIn(orderSet), - Values(clblasLeft), Values(clblasLower), Values(clblasTrans), - Values(clblasUnit), - Values(4), Values(3), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_4, TRSM, Combine( - ValuesIn(orderSet), - Values(clblasLeft), Values(clblasUpper), Values(clblasNoTrans), - Values(clblasUnit), - Values(17), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); - -// Custom test - use command line arguments to tweak it -INSTANTIATE_TEST_CASE_P(Custom, TRSM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(32), Values(32), - Values(clMath::ExtraTestSizes()), Values(1))); - -#endif /* !MEDIUM_TESTS */ -#endif /* !SHORT_TESTS */ -#endif // DO_TRSM - -#ifdef DO_GEMV -// xGEMV tests - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange, GEMV, Combine( - Values(clblasColumnMajor), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange, GEMV, Combine( - Values(clblasRowMajor), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -// We know, that SmallRange does not have values more that 257, -// so lda is set to 500. -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_BigLDA, GEMV, Combine( - Values(clblasColumnMajor), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 501, 502, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange_BigLDA, GEMV, Combine( - Values(clblasRowMajor), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 501, 502, 0, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(SmallRange_VariousInc, GEMV, Combine( - ValuesIn(orderSet), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - ValuesIn(clMath::makeContainerETS(ZERO_VAL, incs, incs, - ZERO_VAL, ZERO_VAL, ZERO_VAL)), - Values(1))); - -// Cases for the extended version with offsets -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx, GEMV, Combine( - Values(clblasColumnMajor), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - ValuesIn(clMath::makeContainerETS(ZERO_VAL, ONE_VAL, ONE_VAL, offs, - ZERO_VAL, ZERO_VAL)), - Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx, GEMV, Combine( - Values(clblasRowMajor), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - ValuesIn(clMath::makeContainerETS(ZERO_VAL, ONE_VAL, ONE_VAL, offs, - ZERO_VAL, ZERO_VAL)), - Values(1))); - -// Big matrices -#if !defined SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(SelectedBig_0, GEMV, Combine( - ValuesIn(orderSet), Values(clblasTrans), - Values(2800), Values(2800), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#if !defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(SelectedBig_1, GEMV, Combine( - ValuesIn(orderSet), Values(clblasTrans), - Values(4567), Values(4321), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - - -INSTANTIATE_TEST_CASE_P(SelectedBig_2, GEMV, Combine( - ValuesIn(orderSet), Values(clblasNoTrans), - Values(5567), Values(5321), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedBig_3, GEMV, Combine( - ValuesIn(orderSet), Values(clblasTrans), - Values(6567), Values(3321), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#endif // !MEDIUM_TESTS -#endif // !SHORT_TESTS - -// Small matrices and Custom tests - -INSTANTIATE_TEST_CASE_P(SelectedSmall_0, GEMV, Combine( - ValuesIn(orderSet), Values(clblasTrans), - Values(1), Values(1), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#if !defined SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(SelectedSmall_1, GEMV, Combine( - ValuesIn(orderSet), Values(clblasTrans), - Values(2), Values(1), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#if !defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(SelectedSmall_2, GEMV, Combine( - ValuesIn(orderSet), Values(clblasNoTrans), - Values(3), Values(2), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_3, GEMV, Combine( - ValuesIn(orderSet), Values(clblasTrans), - Values(4), Values(3), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_4, GEMV, Combine( - ValuesIn(orderSet), Values(clblasNoTrans), - Values(17), Values(1), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -// Custom test - use command line arguments to tweak it -INSTANTIATE_TEST_CASE_P(Custom, GEMV, Combine( - ValuesIn(orderSet), ValuesIn(transSet), - Values(32), Values(32), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#endif /* !MEDIUM_TESTS */ -#endif /* !SHORT_TESTS */ -#endif // DO_GEMV - -#ifdef DO_SYMV -// xSYMV tests - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange, SYMV, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), - ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange, SYMV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), - ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -// We know, that SmallRange does not have values more that 257, -// so lda is set to 500. -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_BigLDA, SYMV, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), - ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 501, 502, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange_BigLDA, SYMV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), - ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 501, 502, 0, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(SmallRange_VariousInc, SYMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - ValuesIn(smallRange), - ValuesIn(clMath::makeContainerETS(ZERO_VAL, incs, incs, - ZERO_VAL, ZERO_VAL, ZERO_VAL)), - Values(1))); - -// cases for the extended versions with offsets -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx, SYMV, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), - ValuesIn(smallRange), - ValuesIn(clMath::makeContainerETS(ZERO_VAL, ONE_VAL, ONE_VAL, offs, - ZERO_VAL, ZERO_VAL)), - Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx, SYMV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), - ValuesIn(smallRange), - ValuesIn(clMath::makeContainerETS(ZERO_VAL, ONE_VAL, ONE_VAL, offs, - ZERO_VAL, ZERO_VAL)), - Values(1))); - -// Big matrices -#if !defined SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(SelectedBig_0, SYMV, Combine( - ValuesIn(orderSet), Values(clblasUpper), - Values(2801), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#if !defined MEDIUM_TESTS - -INSTANTIATE_TEST_CASE_P(SelectedBig_1, SYMV, Combine( - ValuesIn(orderSet), Values(clblasUpper), - Values(4567), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_2, SYMV, Combine( - ValuesIn(orderSet), Values(clblasLower), - Values(5567), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedBig_3, SYMV, Combine( - ValuesIn(orderSet), Values(clblasUpper), - Values(6567), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#endif // !MEDIUM_TESTS -#endif // !SHORT_TESTS - -// Small matrices and Custom tests - -INSTANTIATE_TEST_CASE_P(SelectedSmall_0, SYMV, Combine( - ValuesIn(orderSet), Values(clblasUpper), - Values(1), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#if !defined SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(SelectedSmall_1, SYMV, Combine( - ValuesIn(orderSet), Values(clblasUpper), - Values(2), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#if !defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(SelectedSmall_2, SYMV, Combine( - ValuesIn(orderSet), Values(clblasLower), - Values(3), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_3, SYMV, Combine( - ValuesIn(orderSet), Values(clblasUpper), - Values(4), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_4, SYMV, Combine( - ValuesIn(orderSet), Values(clblasLower), - Values(5), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -// Custom test - use command line arguments to tweak it -INSTANTIATE_TEST_CASE_P(Custom, SYMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(32), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), Values(1))); - -#endif /* !MEDIUM_TESTS */ -#endif /* !SHORT_TESTS */ -#endif - -#ifdef DO_SYR2K -// xSYR2K tests - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange, SYR2K, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange, SYR2K, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes()), Values(1))); -// We know, that SmallRange does not have values more that 257, -// so lda is set to 500. -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_BigLDA, SYR2K, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 501, 502, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange_BigLDA, SYR2K, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 501, 502, 0, 0, 0)), Values(1))); - -// cases for the extended versions with the offsets -#if defined(SHORT_TESTS) || defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_0, SYR2K, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(254), Values(353), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 602, 704)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_0, SYR2K, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(254), Values(353), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 602, 704)), Values(1))); - -#else /* SHORT_TESTS || MEDIUM_TESTS */ - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_0, SYR2K, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(255), Values(253), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_1, SYR2K, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(128), Values(64), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 501, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_2, SYR2K, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(75), Values(200), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 502)), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_3, SYR2K, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(111), Values(256), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 501, 502)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_0, SYR2K, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(255), Values(253), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_1, SYR2K, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(128), Values(64), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 501, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_2, SYR2K, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(75), Values(200), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 502)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_3, SYR2K, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(111), Values(256), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 501, 502)), Values(1))); - -#endif /* !SHORT_TESTS && !MEDIUM_TESTS */ - -// Big matrices -#if !defined SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(SelectedBig_0, SYR2K, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasTrans), - Values(2801), Values(2903), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(SelectedBig_1, SYR2K, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasTrans), - Values(4567), Values(4321), - Values(clMath::ExtraTestSizes()), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_2, SYR2K, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasNoTrans), - Values(5567), Values(5321), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedBig_3, SYR2K, Combine( - ValuesIn(orderSet), Values(clblasLower), Values(clblasTrans), - Values(6567), Values(3321), - Values(clMath::ExtraTestSizes()), Values(1))); - -#endif // !MEDIUM_TESTS -#endif // !SHORT_TESTS - -// Small matrices and Custom tests - -INSTANTIATE_TEST_CASE_P(SelectedSmall_0, SYR2K, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasTrans), - Values(1), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(SelectedSmall_1, SYR2K, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasTrans), - Values(2), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(SelectedSmall_2, SYR2K, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasNoTrans), - Values(3), Values(2), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_3, SYR2K, Combine( - ValuesIn(orderSet), Values(clblasLower), Values(clblasTrans), - Values(4), Values(3), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_4, SYR2K, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasNoTrans), - Values(17), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); - -// Custom test - use command line arguments to tweak it -INSTANTIATE_TEST_CASE_P(Custom, SYR2K, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(transSet), - Values(32), Values(32), - Values(clMath::ExtraTestSizes()), Values(1))); - -#endif /* !MEDIUM_TESTS */ -#endif /* !SHORT_TESTS */ -#endif // DO_SYR2K - -#ifdef DO_HERK -/* - ::std::tr1::tuple< - clblasOrder, // order - clblasUplo, // uplo - clblasTranspose, // transA - int, // N - int, // K - ComplexLong, // alpha - ComplexLong, // beta - ExtraTestSizes, // offa, offc, lda, ldc. - int // numCommandQueues -*/ -#if !defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(SPL_HERK, HERK, Combine( - Values(clblasColumnMajor, clblasRowMajor), ValuesIn(uploSet), Values(clblasNoTrans, clblasConjTrans), - Values(513), Values(513), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes()), Values(1))); -#endif - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_HERK, HERK, Combine( - Values(clblasRowMajor), Values(clblasUpper), Values(clblasNoTrans), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall0_HERK, HERK, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(clblasConjTrans), - Values(14), Values(15), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes(0,0,0,9,0,0)), Values(1))); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_HERK, HERK, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasNoTrans), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes(0,0,0,0,10,0)), Values(1))); -INSTANTIATE_TEST_CASE_P(Uplo_HERK, HERK, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), Values(clblasNoTrans), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes(0,0,0,9,0,0)), Values(1))); -INSTANTIATE_TEST_CASE_P(Trans_HERK, HERK, Combine( - Values(clblasRowMajor), Values(clblasUpper), Values(clblasNoTrans, clblasConjTrans), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes(0,0,0,0,10,0)), Values(1))); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_HERK, HERK, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(clblasNoTrans, clblasConjTrans), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes(0,0,0,9,10,0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig0_HERK, HERK, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(clblasNoTrans, clblasConjTrans), - Values(2510, 4300), Values(1500,4600), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes(0,0,0,9,0,0)), Values(1))); - -#endif // Correctness - -#endif // DO_HERK - - -#ifdef DO_HER2K - -#if !defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(SPL_HER2K, HER2K, Combine( - Values(clblasColumnMajor, clblasRowMajor), ValuesIn(uploSet), Values(clblasNoTrans, clblasConjTrans), - Values(513), Values(513), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes()), Values(1))); -#endif - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_HER2K, HER2K, Combine( - Values(clblasRowMajor), Values(clblasUpper), Values(clblasNoTrans), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall0_HER2K, HER2K, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(clblasConjTrans), - Values(14), Values(15), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes(0,0,0,9,0,0)), Values(1))); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_HER2K, HER2K, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasNoTrans), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes(0,0,0,0,10,0)), Values(1))); -INSTANTIATE_TEST_CASE_P(Uplo_HER2K, HER2K, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), Values(clblasNoTrans), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes(0,0,0,9,0,0)), Values(1))); -INSTANTIATE_TEST_CASE_P(Trans_HER2K, HER2K, Combine( - Values(clblasRowMajor), Values(clblasUpper), Values(clblasNoTrans, clblasConjTrans), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes(0,0,0,0,10,0)), Values(1))); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_HER2K, HER2K, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(clblasNoTrans, clblasConjTrans), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes(0,0,0,9,10,0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig0_HER2K, HER2K, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(clblasNoTrans, clblasConjTrans), - Values(2510, 4300), Values(1500,4600), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes(0,0,0,9,0,0)), Values(1))); - -#endif // Correctness - -#endif // DO_HER2K - - -#ifdef DO_SYRK -// xSYRK tests - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange, SYRK, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange, SYRK, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes()), Values(1))); -// We know, that SmallRange does not have values more that 257, -// so lda is set to 500. -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_BigLDA, SYRK, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 0, 501, 0, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange_BigLDA, SYRK, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(500, 0, 501, 0, 0, 0)), Values(1))); - -// cases for the extended versions with the offsets -#if defined(SHORT_TESTS) || defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_0, SYRK, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(252), Values(353), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 0, 702)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_0, SYRK, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(252), Values(353), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 0, 702)), Values(1))); - -#else /* SHORT_TESTS || MEDIUM_TESTS */ - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_0, SYRK, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(255), Values(253), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_1, SYRK, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(128), Values(64), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 501)), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeEx_2, SYRK, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(75), Values(200), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 0, 501)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_0, SYRK, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(255), Values(253), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 0, 0)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_1, SYRK, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(128), Values(64), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 501)), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeEx_2, SYRK, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(transSet), - Values(75), Values(200), - Values(clMath::ExtraTestSizes(0, 0, 0, 500, 0, 501)), Values(1))); - -#endif /* !SHORT_TESTS && !MEDIUM_TESTS */ - -// Big matrices -#if !defined(SHORT_TESTS) - -INSTANTIATE_TEST_CASE_P(SelectedBig_0, SYRK, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasTrans), - Values(2801), Values(2903), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(SelectedBig_1, SYRK, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasTrans), - Values(4567), Values(4321), - Values(clMath::ExtraTestSizes()), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_2, SYRK, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasNoTrans), - Values(5567), Values(5321), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedBig_3, SYRK, Combine( - ValuesIn(orderSet), Values(clblasLower), Values(clblasTrans), - Values(6567), Values(3321), - Values(clMath::ExtraTestSizes()), Values(1))); - -#endif // !MEDIUM_TESTS -#endif // !SHORT_TESTS - -// Small matrices and Custom tests - -INSTANTIATE_TEST_CASE_P(SelectedSmall_0, SYRK, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasTrans), - Values(1), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(SelectedSmall_1, SYRK, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasTrans), - Values(2), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); - -#if !defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(SelectedSmall_2, SYRK, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasNoTrans), - Values(3), Values(2), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_3, SYRK, Combine( - ValuesIn(orderSet), Values(clblasLower), Values(clblasTrans), - Values(4), Values(3), - Values(clMath::ExtraTestSizes()), Values(1))); -INSTANTIATE_TEST_CASE_P(SelectedSmall_4, SYRK, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasNoTrans), - Values(17), Values(1), - Values(clMath::ExtraTestSizes()), Values(1))); - -// Custom test - use command line arguments to tweak it -INSTANTIATE_TEST_CASE_P(Custom, SYRK, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(transSet), - Values(32), Values(32), - Values(clMath::ExtraTestSizes()), Values(1))); - -#endif /* !MEDIUM_TESTS */ -#endif /* !SHORT_TESTS */ - -#endif // DO_SYRK - - -#ifdef DO_TRMV - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(ShortTRMV, TRMV, Combine( - Values(clblasRowMajor), Values(clblasLower), - Values(clblasNoTrans), Values(clblasUnit),ValuesIn(smallRange),Values(0), - Values(1), Values(0), Values(0), Values(1))); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_TRMV, TRMV, Combine( - ValuesIn(orderSet), Values(clblasLower), - Values(clblasNoTrans), Values(clblasUnit),ValuesIn(smallRange),Values(0), - Values(1), Values(0,9), Values(0), Values(1))); -INSTANTIATE_TEST_CASE_P(Uplo_TRMV, TRMV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), - Values(clblasNoTrans), Values(clblasUnit),ValuesIn(smallRange),Values(0), - Values(1), Values(0), Values(0,10), Values(1))); -INSTANTIATE_TEST_CASE_P(Trans_TRMV, TRMV, Combine( - Values(clblasRowMajor), Values(clblasLower), - ValuesIn(transSet), Values(clblasUnit),ValuesIn(smallRange),Values(0), - Values(1), Values(0,9), Values(0), Values(1))); -INSTANTIATE_TEST_CASE_P(Diag_TRMV, TRMV, Combine( - Values(clblasRowMajor), Values(clblasLower), - Values(clblasNoTrans), ValuesIn(diagSet), ValuesIn(smallRange),Values(0), - Values(1), Values(0), Values(0,10), Values(1))); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(All_TRMV, TRMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet),ValuesIn(smallRange),Values(0,4097), - ValuesIn(incs), Values(0, 10), Values(0, 9), Values(1))); - -#endif // Correctness - -#endif - -#ifdef DO_TPMV - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(ShortTPMV, TPMV, Combine( - Values(clblasRowMajor), Values(clblasLower), - Values(clblasNoTrans), Values(clblasUnit),ValuesIn(smallRange),Values(0), - Values(1), Values(0), Values(0), Values(1))); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_TPMV, TPMV, Combine( - ValuesIn(orderSet), Values(clblasLower), - Values(clblasNoTrans), Values(clblasUnit),ValuesIn(smallRange),Values(0), - Values(1), Values(0,9), Values(0), Values(1))); -INSTANTIATE_TEST_CASE_P(Uplo_TPMV, TPMV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), - Values(clblasNoTrans), Values(clblasUnit),ValuesIn(smallRange),Values(0), - Values(1), Values(0), Values(0,10), Values(1))); -INSTANTIATE_TEST_CASE_P(Trans_TPMV, TPMV, Combine( - Values(clblasRowMajor), Values(clblasLower), - ValuesIn(transSet), Values(clblasUnit),ValuesIn(smallRange),Values(0), - Values(1), Values(0,9), Values(0), Values(1))); -INSTANTIATE_TEST_CASE_P(Diag_TPMV, TPMV, Combine( - Values(clblasRowMajor), Values(clblasLower), - Values(clblasNoTrans), ValuesIn(diagSet), ValuesIn(smallRange),Values(0), - Values(1), Values(0), Values(0,10), Values(1))); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(All_TPMV, TPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet),ValuesIn(smallRange),Values(0,4097), - ValuesIn(incs), Values(0, 10), Values(0, 9), Values(1))); - -#endif // Correctness - -#endif - -#ifdef DO_TRSV - -#ifdef SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeTRSV, TRSV, Combine( - Values(clblasRowMajor), Values(clblasUpper), - Values(clblasNoTrans), Values(clblasUnit),ValuesIn(smallRange), - Values(0), Values(1), Values(0), Values(0), Values(1))); - -#endif - -#ifdef MEDIUM_TESTS - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeTRSV, TRSV, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), - Values(clblasTrans), Values(clblasNonUnit), ValuesIn(smallRange), - Values(0), Values(1), Values(0), Values(0), Values(1))); - -INSTANTIATE_TEST_CASE_P(SmallRange_VariousIncTRSV, TRSV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), - Values(clblasNoTrans, clblasConjTrans), Values(clblasUnit), ValuesIn(smallRange), - Values(0), ValuesIn(incs), Values(0), Values(0), Values(1))); - -#endif - -#if !defined SHORT_TESTS && !defined MEDIUM_TESTS - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeTRSV, TRSV, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet),ValuesIn(smallRange), - Values(0), Values(1), Values(0,10), Values(0,9), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeTRSV, TRSV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet),ValuesIn(smallRange), - Values(0), Values(1), Values(0,10), Values(0,9), Values(1))); -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRange_BigLDATRSV, TRSV, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet),ValuesIn(smallRange), - Values(500), Values(1), Values(0,10), Values(0,9), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRange_BigLDATRSV, TRSV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet),ValuesIn(smallRange), - Values(500), Values(1), Values(0,10), Values(0,9), Values(1))); -INSTANTIATE_TEST_CASE_P(SmallRange_VariousIncTRSV, TRSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet),ValuesIn(smallRange), - Values(0), ValuesIn(incs), Values(0,10), Values(0,9), Values(1))); - -#endif - -#endif - -#ifdef DO_TPSV - -#ifdef SHORT_TESTS - -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeTPSV, TPSV, Combine( - Values(clblasRowMajor), Values(clblasUpper), - Values(clblasNoTrans), Values(clblasUnit),ValuesIn(smallRange), - Values(0), Values(1), Values(0), Values(0), Values(1))); - -#endif - -#ifdef MEDIUM_TESTS - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeTPSV, TPSV, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), - Values(clblasTrans), Values(clblasNonUnit), ValuesIn(smallRange), - Values(0), Values(1), Values(0), Values(0), Values(1))); - -INSTANTIATE_TEST_CASE_P(SmallRange_VariousIncTPSV, TPSV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), - Values(clblasNoTrans, clblasConjTrans), Values(clblasUnit), ValuesIn(smallRange), - Values(0), ValuesIn(incs), Values(0), Values(0), Values(1))); - -#endif - -#if !defined SHORT_TESTS && !defined MEDIUM_TESTS - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeTPSV, TPSV, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet),ValuesIn(smallRange), - Values(0), Values(1), Values(0,10), Values(0,9), Values(1))); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeTPSV, TPSV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet),ValuesIn(smallRange), - Values(0), Values(1), Values(0,10), Values(0,9), Values(1))); -INSTANTIATE_TEST_CASE_P(SmallRange_VariousIncTPSV, TPSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet),ValuesIn(smallRange), - Values(0), ValuesIn(incs), Values(0,10), Values(0,9), Values(1))); -#endif - -#endif - -/*#ifdef DO_SYMM - - - order = ::std::tr1::get<0>(GetParam()); - side = ::std::tr1::get<1>(GetParam()); - uplo = ::std::tr1::get<2>(GetParam()); - M = ::std::tr1::get<3>(GetParam()); - N = ::std::tr1::get<4>(GetParam()); - lda = ::std::tr1::get<5>(GetParam()); - ldb = ::std::tr1::get<6>(GetParam()); - ldc = ::std::tr1::get<7>(GetParam()); - offa = ::std::tr1::get<8>(GetParam()); - numCommandQueues = ::std::tr1::get<9>(GetParam()); - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeSYMM, SYMM, Combine( - Values(clblasColumnMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(3192), Values(3192), Values(3192), Values(0), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeSYMM, SYMM, Combine( - Values(clblasRowMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(3192), Values(3192), Values(3192), Values(0), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(ColumnMajor_VariousLDASYMM, SYMM, Combine( - Values(clblasColumnMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(smallRange), ValuesIn(smallRange), - ValuesIn(ldaRange), ValuesIn(ldaRange), ValuesIn(ldaRange), Values(0), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(RowMajor_VariousLDASYMM, SYMM, Combine( - Values(clblasRowMajor), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(smallRange), ValuesIn(smallRange), - ValuesIn(ldaRange), ValuesIn(ldaRange), ValuesIn(ldaRange), Values(0), - Values(1) ) ); -#endif -*/ - -#ifdef DO_SYR -/* - clblasOrder, // order - clblasUplo, // uplo - int, // N - double, //alpha - int, // offx - int, // incx, should be greater than 0 - int, // offa - int, // lda, 0 - undefined - int // numCommandQueues -*/ - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_SYR, SYR, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedSmall_SYR, SYR, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(15), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(1) ) ); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_SYR, SYR, Combine( - ValuesIn(orderSet), Values(clblasLower), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0,9), Values(1), Values(0,10), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(Uplo_SYR, SYR, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0,9), Values(1), Values(0,10), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedBig_SYR, SYR, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), Values(1500), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(1) ) ); - -#else // Correctness - -INSTANTIATE_TEST_CASE_P(ALL, SYR, Combine(ValuesIn(orderSet), ValuesIn(uploSet), - ValuesIn(smallRange), ValuesIn(realAlphaRange), ValuesIn(offsetRange), ValuesIn(incs), - ValuesIn(offsetRange), ValuesIn(ldaRange), Values(1) ) ); - -#endif - -#endif - - -#ifdef DO_SPR - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_SPR, SPR, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedSmall_SPR, SPR, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(15), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(1) ) ); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_SPR, SPR, Combine( - ValuesIn(orderSet), Values(clblasLower), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0,9), Values(1), Values(0,10), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(Uplo_SPR, SPR, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0,9), Values(1), Values(0,10), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedBig_SPR, SPR, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), Values(1500, 5101), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(1) ) ); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(All_SPR, SPR, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(realAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(ldaRange), Values(1) ) ); - -#endif // Correctness - -#endif - -#ifdef DO_GER - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_GER, GER, Combine( - Values(clblasRowMajor),ValuesIn(smallRange), ValuesIn(smallRange), - Values(0), Values(1), Values(1), Values(0), Values(0), Values(0), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedSmall0_GER, GER, Combine( - ValuesIn(orderSet), Values(61), Values(32), - Values(0), Values(4,-11), Values(-30,1), Values(0), Values(0), Values(0), - Values(1) ) ); - - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_GER, GER, Combine( - ValuesIn(orderSet), ValuesIn(smallRange), ValuesIn(smallRange), - Values(0), Values(-10), Values(21), Values(0,9), Values(0), Values(0), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedBig0_GER, GER, Combine( - ValuesIn(orderSet), Values(4900), Values(3999), - Values(0), Values(4), Values(-33), Values(0), Values(0), Values(0), - Values(1) ) ); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_GER, GER, Combine( - ValuesIn(orderSet), ValuesIn(smallRange), ValuesIn(smallRange), - ValuesIn(ldaRange), ValuesIn(incs), ValuesIn(incs), ValuesIn(offsetRange),ValuesIn(offsetRange),ValuesIn(offsetRange), - Values(1) ) ); - -#endif // Correctness - -#endif - - -#ifdef DO_GERC - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_GERC, GERC, Combine( - Values(clblasRowMajor),ValuesIn(smallRange), ValuesIn(smallRange), - Values(0), Values(1), Values(1), Values(0), Values(0), Values(0), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedSmall0_GERC, GERC, Combine( - ValuesIn(orderSet), Values(61), Values(32), - Values(0), Values(4,-11), Values(-30,1), Values(0), Values(0), Values(0), - Values(1) ) ); - - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_GERC, GERC, Combine( - ValuesIn(orderSet), ValuesIn(smallRange), ValuesIn(smallRange), - Values(0), Values(-10), Values(21), Values(0,9), Values(0), Values(0,19), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedBig0_GERC, GERC, Combine( - ValuesIn(orderSet), Values(4900), Values(3999), - Values(0), Values(4), Values(-33), Values(0), Values(0), Values(0), - Values(1) ) ); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_GERC, GERC, Combine( - ValuesIn(orderSet), ValuesIn(smallRange), ValuesIn(smallRange), - ValuesIn(ldaRange), ValuesIn(incs), ValuesIn(incs), ValuesIn(offsetRange),ValuesIn(offsetRange),ValuesIn(offsetRange), - Values(1) ) ); - -#endif // Correctness - -#endif - -#ifdef DO_HER -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_HER, HER, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedSmall_HER, HER, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(15), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(1) ) ); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_HER, HER, Combine( - ValuesIn(orderSet), Values(clblasLower), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0,10), Values(0,9), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(Uplo_HER, HER, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0,10), Values(0,9), Values(1) ) ); - -#else // Correctness - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeHER, HER, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(realAlphaRange), - ValuesIn(ldaRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeHER, HER, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(realAlphaRange), - ValuesIn(ldaRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(ColumnMajor_VariousLDAHER, HER, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(realAlphaRange), - ValuesIn(ldaRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(RowMajor_VariousLDAHER, HER, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(realAlphaRange), - ValuesIn(ldaRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), - Values(1) ) ); -#endif - -#endif - -#ifdef DO_HPR - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_HPR, HPR, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedSmall_HPR, HPR, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(15), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(1) ) ); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_HPR, HPR, Combine( - ValuesIn(orderSet), Values(clblasLower), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0,10), Values(0,9), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(Uplo_HPR, HPR, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0,10), Values(0,9), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedBig_HPR, HPR, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), Values(1500, 5101), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(1) ) ); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(All_HPR, HPR, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(realAlphaRange), - ValuesIn(ldaRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), Values(1) ) ); - -#endif // Correctness - -#endif - - -#ifdef DO_HER2 -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_HER2, HER2, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), ValuesIn(complexAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedSmall_HER2, HER2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(15), ValuesIn(complexAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(0), Values(1) ) ); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_HER2, HER2, Combine( - ValuesIn(orderSet), Values(clblasLower), ValuesIn(smallRange), ValuesIn(complexAlphaRange), - Values(0,9), Values(1), Values(0,10), Values(0,9), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(Uplo_HER2, HER2, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(complexAlphaRange), - Values(0,10), Values(1), Values(0,10), Values(0,9), Values(0), Values(1) ) ); - -#else // Correctness - -INSTANTIATE_TEST_CASE_P(ColumnMajor_SmallRangeHER2, HER2, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(complexAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange),ValuesIn(offsetRange),ValuesIn(ldaRange), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(RowMajor_SmallRangeHER2, HER2, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(complexAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange),ValuesIn(offsetRange),ValuesIn(ldaRange), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(ColumnMajor_VariousLDAHER2, HER2, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(complexAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange),ValuesIn(offsetRange),ValuesIn(ldaRange), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(RowMajor_VariousLDAHER2, HER2, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(complexAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange),ValuesIn(offsetRange),ValuesIn(ldaRange), - Values(1) ) ); - -#endif - -#endif - -#ifdef DO_HPR2 - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_HPR2, HPR2, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), ValuesIn(complexAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedSmall_HPR2, HPR2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(15), ValuesIn(complexAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(0), Values(1) ) ); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_HPR2, HPR2, Combine( - ValuesIn(orderSet), Values(clblasLower), ValuesIn(smallRange), ValuesIn(complexAlphaRange), - Values(0,9), Values(1), Values(0,10), Values(0,9), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(Uplo_HPR2, HPR2, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(complexAlphaRange), - Values(0,10), Values(1), Values(0,10), Values(0,9), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedBig_HPR2, HPR2, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), Values(1500, 5101), ValuesIn(complexAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(0), Values(1) ) ); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(All_HPR2, HPR2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(complexAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(ldaRange), Values(1) ) ); - -#endif // Correctness - -#endif - - -/*INSTANTIATE_TEST_CASE_P(ALL_HEMM_WITH_OFFSETS_ZERO, HEMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(smallRange), ValuesIn(smallRange),ValuesIn(complexAlphaRange), ValuesIn(complexAlphaRange), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 0)), - //Values(clMath::ExtraTestSizes(0, 0, 0, 12, 0, 1)), - Values(1) ) ); - -INSTANTIATE_TEST_CASE_P(ALL_HEMM, HEMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(smallRange), ValuesIn(smallRange),ValuesIn(complexAlphaRange), ValuesIn(complexAlphaRange), - Values(clMath::ExtraTestSizes(0, 0, 0, 12, 13, 15)), - Values(1) ) ); - - -INSTANTIATE_TEST_CASE_P(SelectedBig_0, HEMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet), ValuesIn(uploSet), - Values(5600), Values(5600),ValuesIn(complexAlphaRange), ValuesIn(complexAlphaRange), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 0)), - Values(1) ) ); - - -*/ - - - -/* -INSTANTIATE_TEST_CASE_P(SYMM_VERYSMALL, SYMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(verySmallRange), ValuesIn(verySmallRange),ValuesIn(complexAlphaRange), ValuesIn(complexAlphaRange), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 0)), - Values(1) ) );*/ - -/*INSTANTIATE_TEST_CASE_P(ALL_SYMM, SYMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(smallRange), ValuesIn(smallRange),ValuesIn(complexAlphaRange), ValuesIn(complexAlphaRange), - Values(clMath::ExtraTestSizes(0, 0, 0, 1, 3, 13)), - Values(1) ) ); - -INSTANTIATE_TEST_CASE_P(ALL_SYMM_WITH_OFFSETS_ZERO, SYMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(smallRange), ValuesIn(smallRange),ValuesIn(complexAlphaRange), ValuesIn(complexAlphaRange), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 0)), - Values(1) ) ); - - -INSTANTIATE_TEST_CASE_P(SelectedBig_0, SYMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet), ValuesIn(uploSet), - Values(5600), Values(5600),ValuesIn(complexAlphaRange), ValuesIn(complexAlphaRange), - Values(clMath::ExtraTestSizes(0, 0, 0, 0, 0, 0)), - Values(1) ) ); -*/ - - -#ifdef DO_SYR2 - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_SYR2, SYR2, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedSmall_SYR2, SYR2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(15), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(0), Values(1) ) ); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_SYR2, SYR2, Combine( - ValuesIn(orderSet), Values(clblasLower), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0,9), Values(1), Values(0,10), Values(0,9), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(Uplo_SYR2, SYR2, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0,10), Values(1), Values(0,10), Values(0,9), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedBig_SYR2, SYR2, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), Values(1500, 2800), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(0), Values(1) ) ); - -#else // Correctness - -INSTANTIATE_TEST_CASE_P(ALL, SYR2, Combine(ValuesIn(orderSet), ValuesIn(uploSet), - ValuesIn(smallRange), ValuesIn(realAlphaRange), ValuesIn(offsetRange), ValuesIn(incs), - ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(ldaRange), Values(1))); - -#endif - -#endif - - -#ifdef DO_SPR2 - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_SPR2, SPR2, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedSmall_SPR2, SPR2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(15), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(0), Values(1) ) ); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_SPR2, SPR2, Combine( - ValuesIn(orderSet), Values(clblasLower), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0,9), Values(1), Values(0,10), Values(0,9), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(Uplo_SPR2, SPR2, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(realAlphaRange), - Values(0,10), Values(1), Values(0,10), Values(0,9), Values(0), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedBig_SPR2, SPR2, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), Values(1500, 5101), ValuesIn(realAlphaRange), - Values(0), Values(1), Values(0), Values(0), Values(0), Values(1) ) ); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(All_SPR2, SPR2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(realAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(ldaRange), Values(1) ) ); - -#endif // Correctness - -#endif - - -#ifdef DO_GBMV - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_GBMV, GBMV, Combine( - Values(clblasRowMajor), Values(clblasNoTrans), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), - Values(ExtraTestSizes(0, (int)1, (int)1, 0, 0, 0)), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedSmall0_GBMV, GBMV, Combine( - ValuesIn(orderSet), Values(clblasConjTrans), - Values(14), Values(15), Values(10), Values(8),Values(ExtraTestSizes(0, (int)-1, (int)1, 9, 0, 0)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_GBMV, GBMV, Combine( - ValuesIn(orderSet), Values(clblasNoTrans), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), - Values(ExtraTestSizes(0, (int)1, (int)33, 10, 0, 0)), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(Trans_GBMV, GBMV, Combine( - Values(clblasRowMajor), ValuesIn(transSet), - ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), ValuesIn(smallRange), - Values(ExtraTestSizes(0, (int)-33, (int)1, 0, 10, 0)), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedVerySmall_GBMV, GBMV, Combine( - ValuesIn(orderSet), ValuesIn(transSet), - Values(1, 2, 4, 9), Values(3, 6, 11), Values(5), Values(7),Values(ExtraTestSizes(0, (int)-1, (int)1, 9, 0, 0)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig0_GBMV, GBMV, Combine( - Values(clblasRowMajor), ValuesIn(transSet), Values(2599), Values(999), - Values(2000), Values(565), Values(clMath::ExtraTestSizes(0,(int)30,(int)1,9,0,6)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_GBMV, GBMV, Combine( - ValuesIn(orderSet), ValuesIn(transSet), ValuesIn(smallRange), ValuesIn(smallRange), - ValuesIn(smallRange), ValuesIn(smallRange),Values(clMath::ExtraTestSizes(0,(int)22,(int)-20,9,10,0)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig1_GBMV, GBMV, Combine( - ValuesIn(orderSet), ValuesIn(transSet), Values(2510, 2300), Values(1500,2400), - Values(2509, 2299), Values(1499,2399),Values(clMath::ExtraTestSizes(0,(int)3,(int)-2,9,0,6)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -#endif // Correctness - -#endif // DO_GBMV - - - -#ifdef DO_SBMV - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_SBMV, SBMV, Combine( - Values(clblasRowMajor), Values(clblasUpper), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(ExtraTestSizes(0, (int)1, (int)1, 0, 0, 0)), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedSmall0_SBMV, SBMV, Combine( - ValuesIn(orderSet), Values(clblasLower), - Values(14), Values(10), Values(ExtraTestSizes(0, (int)-1, (int)1, 9, 0, 0)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_SBMV, SBMV, Combine( - ValuesIn(orderSet), Values(clblasUpper), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(ExtraTestSizes(0, (int)1, (int)33, 10, 0, 0)), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(Uplo__SBMV, SBMV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(ExtraTestSizes(0, (int)-33, (int)1, 0, 10, 0)), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedVerySmall_SBMV, SBMV, Combine( - ValuesIn(orderSet), Values(clblasUpper), - Values(7), Values(5),Values(ExtraTestSizes(0, (int)-1, (int)1, 9, 0, 0)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig0_SBMV, SBMV, Combine( - Values(clblasRowMajor), Values(clblasLower), - Values(2000), Values(565), Values(clMath::ExtraTestSizes(0,(int)30,(int)1,9,0,6)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_SBMV, SBMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(0,(int)22,(int)-20,9,10,0)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig1_SBMV, SBMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(2510, 2300), Values(1500,1700), - Values(clMath::ExtraTestSizes(0,(int)3,(int)-2,9,0,6)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -#endif // Correctness - -#endif // DO_SBMV - -//HBMV -#ifdef DO_HBMV - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_HBMV, HBMV, Combine( - Values(clblasRowMajor), Values(clblasUpper), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(ExtraTestSizes(0, (int)1, (int)1, 0, 0, 0)), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedSmall0_HBMV, HBMV, Combine( - ValuesIn(orderSet), Values(clblasLower), - Values(14), Values(10), Values(ExtraTestSizes(0, (int)-1, (int)1, 9, 0, 0)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_HBMV, HBMV, Combine( - ValuesIn(orderSet), Values(clblasUpper), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(ExtraTestSizes(0, (int)1, (int)33, 10, 0, 0)), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(Trans_HBMV, HBMV, Combine( - Values(clblasRowMajor), Values(clblasLower), - ValuesIn(smallRange), ValuesIn(smallRange), - Values(ExtraTestSizes(0, (int)-33, (int)1, 0, 10, 0)), ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedVerySmall_HBMV, HBMV, Combine( - ValuesIn(orderSet), Values(clblasUpper), - Values(7), Values(5),Values(ExtraTestSizes(0, (int)-1, (int)1, 9, 0, 0)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig0_HBMV, HBMV, Combine( - Values(clblasRowMajor), Values(clblasLower), - Values(2000), Values(565), Values(clMath::ExtraTestSizes(0,(int)30,(int)1,9,0,6)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_HBMV, HBMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(smallRange), ValuesIn(smallRange), - Values(clMath::ExtraTestSizes(0,(int)22,(int)-20,9,10,0)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig1_HBMV, HBMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(2510, 2300), Values(1500,1700), - Values(clMath::ExtraTestSizes(0,(int)3,(int)-2,9,0,6)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), Values(1))); - -#endif // Correctness - -#endif // DO_HBMV - - -#ifdef DO_TBMV - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_TBMV, TBMV, Combine( - Values(clblasRowMajor), Values(clblasUpper), Values(clblasNoTrans), Values(clblasNonUnit), - ValuesIn(smallRange), ValuesIn(smallRange), Values(ExtraTestSizes(0, (int)1, (int)1, 0, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedSmall0_TBMV, TBMV, Combine( - ValuesIn(orderSet), Values(clblasLower), Values(clblasTrans), Values(clblasUnit), - Values(14), Values(13), Values(ExtraTestSizes(0, (int)-1, (int)1, 9, 0, 0)), Values(1))); - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Order_TBMV, TBMV, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasNoTrans), Values(clblasNonUnit), - ValuesIn(smallRange), ValuesIn(smallRange),Values(ExtraTestSizes(0, (int)1, (int)33, 10, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(Uplo_TBMV, TBMV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), Values(clblasNoTrans), Values(clblasNonUnit), - ValuesIn(smallRange), ValuesIn(smallRange), Values(ExtraTestSizes(0, (int)1, (int)1, 0, 0, 10)), Values(1))); - -INSTANTIATE_TEST_CASE_P(Trans_TBMV, TBMV, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(transSet), Values(clblasUnit), - ValuesIn(smallRange), ValuesIn(smallRange), Values(ExtraTestSizes(0, (int)-33, (int)1, 0, 10, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(Diag_TBMV, TBMV, Combine( - Values(clblasRowMajor), Values(clblasUpper), Values(clblasNoTrans), ValuesIn(diagSet), - ValuesIn(smallRange), ValuesIn(smallRange), Values(ExtraTestSizes(0, (int)1, (int)1, 8, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedVerySmall_TBMV, TBMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(transSet), ValuesIn(diagSet), - Values(1, 2, 4, 9), Values(3), Values(ExtraTestSizes(0, (int)-1, (int)1, 9, 0, 0)), Values(1))); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_TBMV, TBMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(transSet), ValuesIn(diagSet), - ValuesIn(smallRange), ValuesIn(smallRange), Values(clMath::ExtraTestSizes(0,(int)22,(int)-20,9,10,0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_TBMV, TBMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(transSet), ValuesIn(diagSet), - Values(2509, 2299), Values(1499,2199), Values(clMath::ExtraTestSizes(0,(int)3,(int)-2,9,0,6)), Values(1))); - -#endif // Correctness - -#endif // DO_TBMV - - -#ifdef DO_TBSV - -#if defined(SHORT_TESTS) -/* -INSTANTIATE_TEST_CASE_P(Short_TBSV, TBSV, Combine( - Values(clblasRowMajor), Values(clblasUpper), Values(clblasNoTrans), Values(clblasNonUnit), - ValuesIn(smallRange), ValuesIn(smallRange), Values(ExtraTestSizes(0, (int)1, (int)1, 0, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedSmall0_TBSV, TBSV, Combine( - ValuesIn(orderSet), Values(clblasLower), Values(clblasTrans), Values(clblasUnit), - Values(14), Values(13), Values(ExtraTestSizes(0, (int)-1, (int)1, 9, 0, 0)), Values(1))); -*/ - -INSTANTIATE_TEST_CASE_P(Short_TBSV, TBSV, Combine( - Values(clblasRowMajor), Values(clblasLower), Values(clblasNoTrans), Values(clblasNonUnit), - ValuesIn(smallRange), ValuesIn(smallRange), Values(ExtraTestSizes(0, (int)1, (int)1, 0, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedSmall0_TBSV, TBSV, Combine( - Values(clblasRowMajor), Values(clblasLower), Values(clblasNoTrans), Values(clblasUnit), - Values(14), Values(13), Values(ExtraTestSizes(0, (int)-2, (int)1, 9, 0, 0)), Values(1))); - -#elif defined(MEDIUM_TESTS) - -INSTANTIATE_TEST_CASE_P(Order_TBSV, TBSV, Combine( - Values(clblasRowMajor), Values(clblasLower), Values(clblasNoTrans), Values(clblasNonUnit), - ValuesIn(smallRange), ValuesIn(smallRange),Values(ExtraTestSizes(0, (int)1, (int)33, 10, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(Uplo_TBSV, TBSV, Combine( - Values(clblasRowMajor), Values(clblasLower), Values(clblasNoTrans), Values(clblasNonUnit), - ValuesIn(smallRange), ValuesIn(smallRange), Values(ExtraTestSizes(0, (int)1, (int)1, 0, 0, 10)), Values(1))); - -INSTANTIATE_TEST_CASE_P(Trans_TBSV, TBSV, Combine( - Values(clblasRowMajor), Values(clblasLower), Values(clblasNoTrans), Values(clblasUnit), - ValuesIn(smallRange), ValuesIn(smallRange), Values(ExtraTestSizes(0, (int)-33, (int)1, 0, 10, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(Diag_TBSV, TBSV, Combine( - Values(clblasRowMajor), Values(clblasLower), Values(clblasNoTrans), ValuesIn(diagSet), - ValuesIn(smallRange), ValuesIn(smallRange), Values(ExtraTestSizes(0, (int)1, (int)1, 8, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedVerySmall_TBSV, TBSV, Combine( - Values(clblasRowMajor), Values(clblasLower), Values(clblasNoTrans), ValuesIn(diagSet), - Values(1, 2, 4, 9), Values(3), Values(ExtraTestSizes(0, (int)-1, (int)1, 9, 0, 0)), Values(1))); -/* -INSTANTIATE_TEST_CASE_P(Order_TBSV, TBSV, Combine( - ValuesIn(orderSet), Values(clblasUpper), Values(clblasNoTrans), Values(clblasNonUnit), - ValuesIn(smallRange), ValuesIn(smallRange),Values(ExtraTestSizes(0, (int)1, (int)33, 10, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(Uplo_TBSV, TBSV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), Values(clblasNoTrans), Values(clblasNonUnit), - ValuesIn(smallRange), ValuesIn(smallRange), Values(ExtraTestSizes(0, (int)1, (int)1, 0, 0, 10)), Values(1))); - -INSTANTIATE_TEST_CASE_P(Trans_TBSV, TBSV, Combine( - Values(clblasRowMajor), Values(clblasLower), ValuesIn(transSet), Values(clblasUnit), - ValuesIn(smallRange), ValuesIn(smallRange), Values(ExtraTestSizes(0, (int)-33, (int)1, 0, 10, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(Diag_TBSV, TBSV, Combine( - Values(clblasRowMajor), Values(clblasUpper), Values(clblasNoTrans), ValuesIn(diagSet), - ValuesIn(smallRange), ValuesIn(smallRange), Values(ExtraTestSizes(0, (int)1, (int)1, 8, 0, 0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedVerySmall_TBSV, TBSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(transSet), ValuesIn(diagSet), - Values(1, 2, 4, 9), Values(3), Values(ExtraTestSizes(0, (int)-1, (int)1, 9, 0, 0)), Values(1))); -*/ -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_TBSV, TBSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(transSet), ValuesIn(diagSet), - ValuesIn(smallRange), ValuesIn(smallRange), Values(clMath::ExtraTestSizes(0,(int)22,(int)-20,9,10,0)), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_TBSV, TBSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(transSet), ValuesIn(diagSet), - Values(2509, 2299), Values(1499,2199), Values(clMath::ExtraTestSizes(0,(int)3,(int)-2,9,0,6)), Values(1))); - -#endif // Correctness - -#endif // DO_TBSV - -//COPY - -#ifdef DO_COPY - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_COPY, COPY, Combine( - ValuesIn(smallRange), Values(1), Values(1), Values(1), Values(1), Values(1)) ); -INSTANTIATE_TEST_CASE_P(SelectedSmall0_COPY, COPY, Combine( - Values(61), Values(4, -11), Values(1), Values(0), Values(1), Values(1) ) ); - - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Medium_COPY, COPY, Combine( - ValuesIn(smallRange), Values(-10), Values(1), Values(1), Values(1), Values(1) ) ); - -INSTANTIATE_TEST_CASE_P(SelectedBig0_COPY, COPY, Combine( - Values(4900), Values(1), Values(1), Values(4), Values(1), Values(1) ) ); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_COPY, COPY, Combine( - ValuesIn(completeRange), ValuesIn(incs), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), Values(1) ) ); - -#endif // Correctness - -#endif - -//DOT - -#ifdef DO_DOT - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_DOT, DOT, Combine( - ValuesIn(smallRange), Values(1), Values(1), Values(1), Values(1), Values(1), Values(1)) ); -INSTANTIATE_TEST_CASE_P(SelectedSmall0_DOT, DOT, Combine( - Values(61), Values(4, -11), Values(1), Values(0), Values(1), Values(1) , Values(1)) ); - - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Medium_DOT, DOT, Combine( - ValuesIn(smallRange), Values(-10), Values(1), Values(1), Values(1), Values(1), Values(1) ) ); - -INSTANTIATE_TEST_CASE_P(SelectedBig0_DOT, DOT, Combine( - Values(4900), Values(1), Values(1), Values(4), Values(1), Values(1), Values(1) ) ); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_DOT, DOT, Combine( - ValuesIn(completeRange), ValuesIn(incs), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(offsetRange), Values(1) ) ); - -#endif // Correctness - -#endif - -#ifdef DO_DOTC - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_DOTC, DOTC, Combine( - ValuesIn(smallRange), Values(1), Values(1), Values(1), Values(1), Values(1), Values(1)) ); -INSTANTIATE_TEST_CASE_P(SelectedSmall0_DOTC, DOTC, Combine( - Values(61), Values(4, -11), Values(1), Values(0), Values(1), Values(1) , Values(1)) ); - - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Medium_DOTC, DOTC, Combine( - ValuesIn(smallRange), Values(-10), Values(1), Values(1), Values(1), Values(1), Values(1) ) ); - -INSTANTIATE_TEST_CASE_P(SelectedBig0_DOTC, DOTC, Combine( - Values(4900), Values(1), Values(1), Values(4), Values(1), Values(1), Values(1) ) ); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_DOTC, DOTC, Combine( - ValuesIn(completeRange), ValuesIn(incs), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(offsetRange), Values(1) ) ); - -#endif // Correctness - -#endif - - -#ifdef DO_SCAL - -#if defined(SHORT_TESTS) -INSTANTIATE_TEST_CASE_P(Short_SCAL, SCAL, Combine( - ValuesIn(smallRange), ValuesIn(alphaBetaRange), Values(0), Values(1), Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedSmall0_SCAL, SCAL, Combine( - Values(61), ValuesIn(alphaBetaRange), Values(0), Values(4,-11), Values(1) ) ); - - -#elif defined(MEDIUM_TESTS) -INSTANTIATE_TEST_CASE_P(Medium_SCAL, SCAL, Combine( - ValuesIn(smallRange), ValuesIn(alphaBetaRange), Values(0), Values(-10), Values(1) ) ); - -INSTANTIATE_TEST_CASE_P(SelectedBig0_SCAL, SCAL, Combine( - Values(4900), ValuesIn(alphaBetaRange), Values(0), Values(4), Values(1) ) ); - -#else // Correctness -INSTANTIATE_TEST_CASE_P(ALL_SCAL, SCAL, Combine( - ValuesIn(completeRange), ValuesIn(alphaBetaRange), ValuesIn(offsetRange), ValuesIn(incs), Values(1) ) ); - -#endif // Correctness - -#endif - - -// Big matrices -#if !defined SHORT_TESTS - -#ifdef DO_TRMV -INSTANTIATE_TEST_CASE_P(SelectedBig_0TRMV, TRMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasTrans), ValuesIn(diagSet),Values(2800), - Values(0), ValuesIn(incs), Values(0, 10), Values(0, 9), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_1TRMV, TRMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasTrans), ValuesIn(diagSet),Values(4567), - Values(0), ValuesIn(incs), Values(0, 10), Values(0, 9), Values(1))); -#endif - -#ifdef DO_TRSV -INSTANTIATE_TEST_CASE_P(SelectedBig_0TRSV, TRSV, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), - Values(clblasTrans), ValuesIn(diagSet),Values(2800), - Values(0), Values(1), Values(0), Values(0), Values(1))); - -#endif - -#ifdef DO_TPSV -INSTANTIATE_TEST_CASE_P(SelectedBig_0TPSV, TPSV, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), - Values(clblasTrans), ValuesIn(diagSet),Values(2800), - Values(0), Values(1), Values(0), Values(0), Values(1))); -#endif - -#ifdef DO_HER -INSTANTIATE_TEST_CASE_P(SelectedBig_0HER, HER, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), Values(2800), Values((double)50), - Values(0), Values(1), Values(0), Values(0), - Values(1) ) ); -#endif - - -#ifdef DO_HER2 -INSTANTIATE_TEST_CASE_P(SelectedBig_0HER2, HER2, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), Values(2800), Values((cl_float2)floatComplex(0,1)), - Values(0), Values(1), Values(0), - Values(0), Values(0),Values(1) ) ); -#endif - - -#if !defined(MEDIUM_TESTS) - -#ifdef DO_TRMV -INSTANTIATE_TEST_CASE_P(SelectedBig_2TRMV, TRMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(5567), - Values(0), ValuesIn(incs), Values(0, 10), Values(0, 9), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_3TRMV, TRMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(6567), - Values(0), ValuesIn(incs), Values(0, 10), Values(0, 9), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_4TRMV, TRMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(7567), - Values(0), ValuesIn(incs), Values(0, 10), Values(0, 9), Values(1))); -#endif - -#ifdef DO_TPMV -INSTANTIATE_TEST_CASE_P(SelectedBig_2TPMV, TPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(5567),Values(0), - ValuesIn(incs), Values(0, 10), Values(0, 9), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_3TPMV, TPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(6567),Values(0), - ValuesIn(incs), Values(0, 10), Values(0, 9), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_4TPMV, TPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(7567),Values(0), - ValuesIn(incs), Values(0, 10), Values(0, 9), Values(1))); -#endif - - -#ifdef DO_TRSV -INSTANTIATE_TEST_CASE_P(SelectedBig_1TRSV, TRSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasTrans), ValuesIn(diagSet),Values(4567), - Values(0), ValuesIn(incs), Values(0,10), Values(0,9), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_2TRSV, TRSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(5567), - Values(0), ValuesIn(incs), Values(0,10), Values(0,9), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_3TRSV, TRSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(6567), - Values(0), ValuesIn(incs), Values(0,10), Values(0,9), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_4TRSV, TRSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(7567), - Values(0), ValuesIn(incs), Values(0,10), Values(0,9), Values(1))); -#endif - -#ifdef DO_TPSV -INSTANTIATE_TEST_CASE_P(SelectedBig_1TPSV, TPSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasTrans), ValuesIn(diagSet),Values(4567), - Values(0), ValuesIn(incs), Values(0,10), Values(0,9), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_2TPSV, TPSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(5567), - Values(0), ValuesIn(incs), Values(0,10), Values(0,9), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_3TPSV, TPSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(6567), - Values(0), ValuesIn(incs), Values(0,10), Values(0,9), Values(1))); - -INSTANTIATE_TEST_CASE_P(SelectedBig_4TPSV, TPSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(7567), - Values(0), ValuesIn(incs), Values(0,10), Values(0,9), Values(1))); -#endif - - -#ifdef DO_HER -INSTANTIATE_TEST_CASE_P(SelectedBig_1HER, HER, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(3192), ValuesIn(realAlphaRange), - ValuesIn(ldaRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedBig_2HER, HER, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(2048), ValuesIn(realAlphaRange), - ValuesIn(ldaRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedBig_3HER, HER, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(3192), ValuesIn(realAlphaRange), - ValuesIn(ldaRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), - Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedBig_4HER, HER, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(2055), ValuesIn(realAlphaRange), - ValuesIn(ldaRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), - Values(1) ) ); -#endif - -#ifdef DO_HER2 -INSTANTIATE_TEST_CASE_P(SelectedBig_1HER2, HER2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(3192), ValuesIn(complexAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange), - ValuesIn(offsetRange), ValuesIn(ldaRange),Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedBig_2HER2, HER2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(2048), ValuesIn(complexAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), - ValuesIn(ldaRange),Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedBig_3HER2, HER2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(3192), ValuesIn(complexAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), - ValuesIn(ldaRange),Values(1) ) ); -INSTANTIATE_TEST_CASE_P(SelectedBig_4HER2, HER2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(2055), ValuesIn(complexAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange),ValuesIn(offsetRange), - ValuesIn(ldaRange),Values(1) ) ); -#endif - - -#endif /* !MEDIUM_TESTS */ -#endif /* !SHORT_TESTS */ - -// Small matrices - -#ifdef DO_TRMV -INSTANTIATE_TEST_CASE_P(SelectedSmall_0TRMV, TRMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(1), - Values(0), Values(1), Values(0, 10), Values(0, 9), Values(1))); -#endif - -#ifdef DO_TPMV -INSTANTIATE_TEST_CASE_P(SelectedSmall_0TPMV, TPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(1),Values(0), - Values(1), Values(0, 10), Values(0, 9), Values(1))); -#endif - -#ifdef DO_TRSV -INSTANTIATE_TEST_CASE_P(SelectedSmall_0TRSV, TRSV, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), - Values(clblasNoTrans), Values(clblasNonUnit), Values(1), - Values(0), Values(1), Values(0,10), Values(0,9), Values(1))); -#endif - -#ifdef DO_TPSV -INSTANTIATE_TEST_CASE_P(SelectedSmall_0TPSV, TPSV, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), - Values(clblasNoTrans), Values(clblasNonUnit), Values(1), - Values(0), Values(1), Values(0,10), Values(0,9), Values(1))); -#endif - - -#ifdef DO_HER -INSTANTIATE_TEST_CASE_P(SelectedSmall_0HER, HER, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), Values(4), ValuesIn(realAlphaRange), - Values(0), ValuesIn(incs), Values(0,9), Values(0,11), - Values(1) ) ); -#endif - -#ifdef DO_HER2 -INSTANTIATE_TEST_CASE_P(SelectedSmall_0HER2, HER2, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), Values(4), ValuesIn(complexAlphaRange), - Values(0,7), ValuesIn(incs), Values(0,9), Values(0,11), - Values(0),Values(1) ) ); -#endif - - -#if !defined SHORT_TESTS - -#ifdef DO_TRMV -INSTANTIATE_TEST_CASE_P(SelectedSmall_1TRMV, TRMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(2), - Values(0), Values(1), Values(0, 10), Values(0, 9), Values(1))); -#endif - -#ifdef DO_TPMV -INSTANTIATE_TEST_CASE_P(SelectedSmall_1TPMV, TPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(2),Values(0), - Values(1), Values(0, 10), Values(0, 9), Values(1))); -#endif - -#ifdef DO_TRSV -INSTANTIATE_TEST_CASE_P(SelectedSmall_1TRSV, TRSV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), - Values(clblasNoTrans), Values(clblasUnit), Values(2), - Values(0), Values(1), Values(10), Values(9), Values(1))); -#endif - - -#ifdef DO_HER -INSTANTIATE_TEST_CASE_P(SelectedSmall_1HER, HER, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), Values(12), ValuesIn(realAlphaRange), - Values(0), ValuesIn(incs), Values(0), Values(1), - Values(1) ) ); - -#endif - -#ifdef DO_HER2 -INSTANTIATE_TEST_CASE_P(SelectedSmall_1HER2, HER2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(12), ValuesIn(complexAlphaRange), - Values(0,1), ValuesIn(incs), Values(0),Values(9), - Values(0),Values(1) ) ); -#endif - - -#if !defined(MEDIUM_TESTS) - -#ifdef DO_TRMV -INSTANTIATE_TEST_CASE_P(SelectedSmall_2TRMV, TRMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(13), - Values(0), Values(1), Values(0, 10), Values(0, 9), Values(1))); -#endif - -#ifdef DO_TPMV -INSTANTIATE_TEST_CASE_P(SelectedSmall_2TPMV, TPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(clblasNoTrans), ValuesIn(diagSet), Values(13),Values(0), - Values(1), Values(0, 10), Values(0, 9), Values(1))); -#endif - -#ifdef DO_TRSV -INSTANTIATE_TEST_CASE_P(SelectedSmall_2TRSV, TRSV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), - Values(clblasNoTrans), Values(clblasNonUnit), Values(13), - Values(0), Values(1), Values(0,10), Values(0,9), Values(1))); -#endif - -#ifdef DO_TPSV -INSTANTIATE_TEST_CASE_P(SelectedSmall_2TPSV, TPSV, Combine( - Values(clblasRowMajor), ValuesIn(uploSet), - Values(clblasTrans), Values(clblasUnit), Values(13), - Values(0), Values(1), Values(0,10), Values(0,9), Values(1))); -#endif - - -#ifdef DO_HER -INSTANTIATE_TEST_CASE_P(SelectedSmallHER_2HER, HER, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(65), ValuesIn(realAlphaRange), - Values(0), ValuesIn(incs), Values(0), Values(0), - Values(1) ) ); - -#endif - -#ifdef DO_HER2 -INSTANTIATE_TEST_CASE_P(SelectedSmallHER2_2HER2, HER2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(65), ValuesIn(complexAlphaRange), - Values(0), ValuesIn(incs), Values(0), Values(0), - Values(0), Values(1) ) ); -#endif - -#endif /* !MEDIUM_TESTS */ -#endif /* !SHORT_TESTS */ - -// Custom test - use command line arguments to tweak it -#if !defined SHORT_TESTS && !defined MEDIUM_TESTS -#ifdef DO_TRMV -INSTANTIATE_TEST_CASE_P(Custom, TRMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), Values(32), - Values(0), Values(1), Values(0, 10), Values(0, 9), Values(1))); -#endif - -#ifdef DO_TRSV -INSTANTIATE_TEST_CASE_P(Custom, TRSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), Values(32), - Values(0), Values(1), Values(0,10), Values(0,9), Values(1))); -#endif - -#ifdef DO_TPSV -INSTANTIATE_TEST_CASE_P(Custom, TPSV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), Values(32), - Values(0), Values(1), Values(0,10), Values(0,9), Values(1))); -#endif - -#ifdef DO_GER -INSTANTIATE_TEST_CASE_P(Custom, GER, Combine( - ValuesIn(orderSet), - Values(32), Values(32), - Values(0), Values(1), Values(1), Values(0, 9), Values(0, 11), Values(0, 10), - Values(1) ) ); -#endif - -#ifdef DO_GERC -INSTANTIATE_TEST_CASE_P(Custom, GERC, Combine( - ValuesIn(orderSet), - Values(32), Values(32), - Values(0), Values(1), Values(1), Values(0, 9), Values(0, 11), Values(0, 10), - Values(1) ) ); -#endif - -#ifdef DO_HER -INSTANTIATE_TEST_CASE_P(Custom, HER, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(32), Values(99.0), - Values(0), Values(1), Values(6, 2), Values(0, 5), - Values(1) ) ); - -#endif - -#ifdef DO_HER2 -INSTANTIATE_TEST_CASE_P(Custom, HER2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(32), ValuesIn(complexAlphaRange), - Values(0), Values(1), Values(0), Values(0),Values(40), Values(1) ) ); -#endif - -#endif /* !SHORT_TESTS */ -// Multiple command queues tests - -#if defined SHORT_TESTS -#define QUEUES_TEST_MATRIX_SIZES 257 -#elif defined MEDIUM_TESTS -#define QUEUES_TEST_MATRIX_SIZES 385 -#else -#define QUEUES_TEST_MATRIX_SIZES 513,1025 -#endif - -#if !defined(SHORT_TESTS) - -#ifdef DO_GEMM -INSTANTIATE_TEST_CASE_P(MultipleQueues, GEMM, Combine( - ValuesIn(orderSet), ValuesIn(transSet), ValuesIn(transSet), - Values(QUEUES_TEST_MATRIX_SIZES), - Values(QUEUES_TEST_MATRIX_SIZES), - Values(QUEUES_TEST_MATRIX_SIZES), - Values(clMath::ExtraTestSizes()), ValuesIn(numQueues))); -#endif - -#if !defined(MEDIUM_TESTS) - - -#ifdef DO_TRMM - -INSTANTIATE_TEST_CASE_P(MultipleQueues, TRMM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(QUEUES_TEST_MATRIX_SIZES), Values(QUEUES_TEST_MATRIX_SIZES), - Values(clMath::ExtraTestSizes()), ValuesIn(numQueues))); -#endif - -#ifdef DO_TRSM -INSTANTIATE_TEST_CASE_P(MultipleQueues, TRSM, Combine( - ValuesIn(orderSet), ValuesIn(sideSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet), - Values(QUEUES_TEST_MATRIX_SIZES), Values(QUEUES_TEST_MATRIX_SIZES), - Values(clMath::ExtraTestSizes()), ValuesIn(numQueues))); -#endif - -#endif /* MEDIUM_TESTS */ - - -#ifdef DO_GEMV -INSTANTIATE_TEST_CASE_P(MultipleQueues, GEMV, Combine( - ValuesIn(orderSet), ValuesIn(transSet), - Values(QUEUES_TEST_MATRIX_SIZES), Values(QUEUES_TEST_MATRIX_SIZES), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), ValuesIn(numQueues))); -#endif - -#ifdef DO_SYMV -INSTANTIATE_TEST_CASE_P(MultipleQueues, SYMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - Values(QUEUES_TEST_MATRIX_SIZES), - Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), ValuesIn(numQueues))); -#endif - -#ifdef DO_SYR2K -INSTANTIATE_TEST_CASE_P(MultipleQueues, SYR2K, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(transSet), - Values(QUEUES_TEST_MATRIX_SIZES), Values(QUEUES_TEST_MATRIX_SIZES), - Values(clMath::ExtraTestSizes()), ValuesIn(numQueues))); -#endif - -#ifdef DO_SYRK -INSTANTIATE_TEST_CASE_P(MultipleQueues, SYRK, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(transSet), - Values(QUEUES_TEST_MATRIX_SIZES), Values(QUEUES_TEST_MATRIX_SIZES), - Values(clMath::ExtraTestSizes()), ValuesIn(numQueues))); -#endif - -#if !defined MEDIUM_TESTS - -#ifdef DO_HERK -INSTANTIATE_TEST_CASE_P(MultipleQueues, HERK, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(transSet), - Values(QUEUES_TEST_MATRIX_SIZES), Values(QUEUES_TEST_MATRIX_SIZES), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes()), ValuesIn(numQueues))); -#endif - -#ifdef DO_HER2K -INSTANTIATE_TEST_CASE_P(MultipleQueues, HER2K, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(transSet), - Values(QUEUES_TEST_MATRIX_SIZES), Values(QUEUES_TEST_MATRIX_SIZES), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), - Values(clMath::ExtraTestSizes()), ValuesIn(numQueues))); -#endif - -#ifdef DO_TRMV -INSTANTIATE_TEST_CASE_P(MultipleQueues, TRMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet),Values(QUEUES_TEST_MATRIX_SIZES), - Values(0), Values(1), Values(0, 10), Values(0, 9), ValuesIn(numQueues))); -#endif - -#ifdef DO_TPMV -INSTANTIATE_TEST_CASE_P(MultipleQueues, TPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), - ValuesIn(transSet), ValuesIn(diagSet),Values(QUEUES_TEST_MATRIX_SIZES), - Values(0), Values(1), Values(0, 10), Values(0, 9), ValuesIn(numQueues))); -#endif - -#ifdef DO_HEMV -INSTANTIATE_TEST_CASE_P(MultipleQueues, HEMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(alphaBetaRange), - ValuesIn(alphaBetaRange), Values(0, 10), Values(0, 9), Values(0, 8), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), ValuesIn(numQueues))); -#endif - -#ifdef DO_HPMV -INSTANTIATE_TEST_CASE_P(MultipleQueues, HPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(alphaBetaRange), - ValuesIn(alphaBetaRange), Values(0, 10), Values(0, 9), Values(0, 8), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), ValuesIn(numQueues))); -#endif - - -#ifdef DO_SPMV -INSTANTIATE_TEST_CASE_P(MultipleQueues, SPMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(alphaBetaRange), - ValuesIn(alphaBetaRange), Values(0, 10), Values(0, 9), Values(0, 8), Values(clMath::ExtraTestSizes(0, 1, 1, 0, 0, 0)), ValuesIn(numQueues))); -#endif - -#ifdef DO_TRSV -INSTANTIATE_TEST_CASE_P(MultipleQueues, TRSV, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), - Values(clblasConjTrans), ValuesIn(diagSet),Values(QUEUES_TEST_MATRIX_SIZES), - Values(0), Values(1), Values(0,10), Values(0,9), ValuesIn(numQueues))); -#endif - -#ifdef DO_TPSV -INSTANTIATE_TEST_CASE_P(MultipleQueues, TPSV, Combine( - Values(clblasColumnMajor), ValuesIn(uploSet), - Values(clblasTrans), ValuesIn(diagSet),Values(QUEUES_TEST_MATRIX_SIZES), - Values(0), Values(1), Values(0,10), Values(0,9), ValuesIn(numQueues))); -#endif - -#ifdef DO_SYR -INSTANTIATE_TEST_CASE_P(MultipleQueues, SYR, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(realAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), - ValuesIn(offsetRange), ValuesIn(ldaRange), ValuesIn(numQueues) ) ); - -#endif - -#ifdef DO_SPR -INSTANTIATE_TEST_CASE_P(MultipleQueues, SPR, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(realAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), - ValuesIn(offsetRange), ValuesIn(ldaRange), ValuesIn(numQueues) ) ); - -#endif - -#ifdef DO_GER -INSTANTIATE_TEST_CASE_P(MultipleQueues, GER, Combine( - ValuesIn(orderSet), - Values(QUEUES_TEST_MATRIX_SIZES), Values(QUEUES_TEST_MATRIX_SIZES), - Values(0), Values(1), Values(1), Values(0, 9), Values(0, 11), Values(0, 10), - ValuesIn(numQueues) ) ); -#endif - -#ifdef DO_GERC -INSTANTIATE_TEST_CASE_P(MultipleQueues, GERC, Combine( - ValuesIn(orderSet), - Values(QUEUES_TEST_MATRIX_SIZES), Values(QUEUES_TEST_MATRIX_SIZES), - Values(0), Values(1), Values(1), Values(0, 9), Values(0, 11), Values(0, 10), - ValuesIn(numQueues) ) ); -#endif - -#ifdef DO_HER -INSTANTIATE_TEST_CASE_P(MultipleQueues, HER, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(realAlphaRange), - ValuesIn(ldaRange), Values(1), Values(0), Values(0), - ValuesIn(numQueues) ) ); - -#endif - -#ifdef DO_HPR -INSTANTIATE_TEST_CASE_P(MultipleQueues, HPR, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(realAlphaRange), - ValuesIn(ldaRange), Values(1), Values(0), Values(0), - ValuesIn(numQueues) ) ); - -#endif - -#ifdef DO_HER2 -INSTANTIATE_TEST_CASE_P(MultipleQueues, HER2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(complexAlphaRange), - Values(0), Values(1), Values(0),Values(1), ValuesIn(ldaRange), - ValuesIn(numQueues) ) ); -#endif - -#ifdef DO_HPR2 -INSTANTIATE_TEST_CASE_P(MultipleQueues, HPR2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(complexAlphaRange), - Values(0), Values(1), Values(0),Values(1), ValuesIn(ldaRange), - ValuesIn(numQueues) ) ); -#endif - - -#ifdef DO_SYR2 -#endif - -#ifdef DO_SPR2 -INSTANTIATE_TEST_CASE_P(MultipleQueues, SPR2, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(realAlphaRange), - ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange), - ValuesIn(offsetRange), ValuesIn(ldaRange), ValuesIn(numQueues) ) ); -#endif - -#ifdef DO_GBMV -INSTANTIATE_TEST_CASE_P(MultipleQueues, GBMV, Combine( - ValuesIn(orderSet), ValuesIn(transSet), Values(QUEUES_TEST_MATRIX_SIZES), Values(QUEUES_TEST_MATRIX_SIZES), - Values(QUEUES_TEST_MATRIX_SIZES), Values(QUEUES_TEST_MATRIX_SIZES),Values(clMath::ExtraTestSizes(0,(int)1,(int)1,0,0,0)), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), ValuesIn(numQueues))); -#endif - -#ifdef DO_TBMV -INSTANTIATE_TEST_CASE_P(MultipleQueues, TBMV, Combine( - ValuesIn(orderSet), ValuesIn(uploSet), ValuesIn(transSet), ValuesIn(diagSet), Values(QUEUES_TEST_MATRIX_SIZES), - Values(QUEUES_TEST_MATRIX_SIZES), Values(clMath::ExtraTestSizes(0,(int)1,(int)1,0,0,0)), ValuesIn(numQueues))); -#endif - -#ifdef DO_SCAL -INSTANTIATE_TEST_CASE_P(MultipleQueues, SCAL, Combine( - Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(alphaBetaRange), ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(numQueues))); -#endif - -#ifdef DO_COPY -INSTANTIATE_TEST_CASE_P(MultipleQueues, COPY, Combine( - Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(incs), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(numQueues))); -#endif - -#ifdef DO_SWAP -INSTANTIATE_TEST_CASE_P(MultipleQueues, SWAPXY, Combine( - Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(offsetRange), ValuesIn(incs), - ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(numQueues) ) ); -#endif - -#ifdef DO_DOT -INSTANTIATE_TEST_CASE_P(MultipleQueues, DOT, Combine( - Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(incs), ValuesIn(incs), - ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(numQueues) ) ); -#endif - -#ifdef DO_DOTC -INSTANTIATE_TEST_CASE_P(MultipleQueues, DOTC, Combine( - Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(incs), ValuesIn(incs), - ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(numQueues) ) ); -#endif - -#ifdef DO_AXPY -INSTANTIATE_TEST_CASE_P(MultipleQueues, AXPY, Combine( - Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(alphaBetaRange), ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(numQueues))); -#endif - -#ifdef DO_ROTG -INSTANTIATE_TEST_CASE_P(MultipleQueues, ROTG, Combine( - ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(numQueues))); -#endif - -#ifdef DO_ROTM -INSTANTIATE_TEST_CASE_P(MultipleQueues, ROTM, Combine( - Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(incs), - ValuesIn(offsetRange), ValuesIn(sflagRange), ValuesIn(numQueues))); -#endif - -#ifdef DO_ROT -INSTANTIATE_TEST_CASE_P(MultipleQueues, ROT, Combine( - Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(offsetRange), ValuesIn(incs), ValuesIn(offsetRange), ValuesIn(incs), - ValuesIn(alphaBetaRange), ValuesIn(alphaBetaRange), ValuesIn(numQueues))); -#endif - -#ifdef DO_ROTMG -INSTANTIATE_TEST_CASE_P(MultipleQueues, ROTMG, Combine( - ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(offsetRange), - ValuesIn(offsetRange), ValuesIn(sflagRange), ValuesIn(numQueues))); -#endif - -#ifdef DO_NRM2 -INSTANTIATE_TEST_CASE_P(MultipleQueues, NRM2, Combine( - Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(incs), - ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(numQueues) ) ); -#endif - -#ifdef DO_ASUM -INSTANTIATE_TEST_CASE_P(MultipleQueues, ASUM, Combine( - Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(incs), - ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(numQueues) ) ); -#endif - -#ifdef DO_iAMAX -INSTANTIATE_TEST_CASE_P(MultipleQueues, iAMAX, Combine( - Values(QUEUES_TEST_MATRIX_SIZES), ValuesIn(incs), - ValuesIn(offsetRange), ValuesIn(offsetRange), ValuesIn(numQueues) ) ); -#endif - -#endif /* !MEDIUM_TESTS */ -#endif /* SHORT_TESTS */ - -#undef QUEUES_TEST_MATRIX_SIZES - -/////////////////////////////////////////////////////////////////////////////// - -int -main(int argc, char *argv[]) -{ - ::clMath::BlasBase *base; - TestParams params; - int ret; - - if( (argc > 1) && ( !strcmp(argv[1], "--test-help") || !strcmp(argv[1], "-?") || !strcmp(argv[1], "-h") ) ) - { - printUsage("test-correctness"); - ::testing::InitGoogleTest(&argc, argv); - return 0; - } - - // The library takes an environment variable to control how to cache kernels; automate the setting of this - // environment variable in our different test programs to set it to reasonable values - // Read environmental variable to limit or disable ( 0 ) the size of the kernel cache in memory - char* kCacheEnv = getenv( "AMD_CLBLAS_KCACHE_LIMIT_MB" ); - if( kCacheEnv == NULL ) - { -#if defined( SHORT_TESTS ) -#else - putenv( (char*)"AMD_CLBLAS_KCACHE_LIMIT_MB=256" ); -#endif - } - - ::testing::InitGoogleTest(&argc, argv); - ::std::cerr << "Initialize OpenCL and clblas..." << ::std::endl; - base = ::clMath::BlasBase::getInstance(); - if (base == NULL) { - ::std::cerr << "Fatal error, OpenCL or clblas initialization failed! " - "Leaving the test." << ::std::endl; - return -1; - } - - base->setSeed(DEFAULT_SEED); - - if (argc != 1) { - params.optFlags = NO_FLAGS; - params.devType = CL_DEVICE_TYPE_GPU; - params.devName = NULL; - if (parseBlasCmdLineArgs(argc, argv, ¶ms) != 0) { - printUsage(argv[0]); - return 1; - } - if (params.optFlags & SET_SEED) { - base->setSeed(params.seed); - } - if (params.optFlags & SET_ALPHA) { - base->setAlpha(params.alpha); - } - if (params.optFlags & SET_BETA) { - base->setBeta(params.beta); - } - if (params.optFlags & SET_M) { - base->setM(params.M); - } - if (params.optFlags & SET_N) { - base->setN(params.N); - } - if (params.optFlags & SET_K) { - base->setK(params.K); - } - if (params.optFlags & SET_INCX) { - base->setIncX(params.incx); - } - if (params.optFlags & SET_INCY) { - base->setIncY(params.incy); - } - if (params.optFlags & SET_DEVICE_TYPE) { - if (!base->setDeviceType(¶ms.devType, params.devName)) { - ::std::cerr << "Fatal error, OpenCL or clblas " - "initialization failed! Leaving the test." << - ::std::endl; - return -1; - } - } - if (params.optFlags & SET_NUM_COMMAND_QUEUES) { - base->setNumCommandQueues(params.numCommandQueues); - } - } - - parseEnv(¶ms); - if (params.optFlags & SET_USE_IMAGES) { - base->setUseImages(params.useImages); - } - - /* Use of image based buffers is deprecated - if (base->useImages()) { - if (base->addScratchImages()) { - std::cerr << "FATAL ERROR, CANNOT CREATE SCRATCH IMAGES!" << std::endl; - } - } - */ - - base->printEnvInfo(); - ret = RUN_ALL_TESTS(); - - if (base->useImages()) { - base->removeScratchImages(); - } - - /* - * Explicitely tell the singleton to release all resources, - * before we return from main. - */ - base->release( ); - - return ret; -} diff --git a/external/clBLAS/src/tests/correctness/trsm-delta.h b/external/clBLAS/src/tests/correctness/trsm-delta.h deleted file mode 100644 index 29673442..00000000 --- a/external/clBLAS/src/tests/correctness/trsm-delta.h +++ /dev/null @@ -1,240 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - - -#include <blas-math.h> -#include <common.h> - -// Type-dependant constants -template <class T> -static cl_double DELTA_0(); -template<> -__template_static cl_double DELTA_0<cl_float>() { return pow(2.0, -20); } -template<> -__template_static cl_double DELTA_0<cl_double>() { return pow(2.0, -50); } -template<> -__template_static cl_double DELTA_0<FloatComplex>() { return pow(2.0, -20); } -template<> -__template_static cl_double DELTA_0<DoubleComplex>() { return pow(2.0, -50); } - -size_t -trsmBlockSize(size_t elemSize) -{ - /* TODO: Right now TRSM generators use block size of 16 elements for the - * double complex type, and of 32 elements for another types. - * If this changes, we have to fetch block size from TRSM generator - * somehow. - */ - return (elemSize == sizeof(DoubleComplex)) ? 16 : 32; -} - -template <typename T> -void -trsmDelta( - clblasOrder order, - clblasSide side, - clblasUplo uplo, - clblasTranspose transA, - clblasDiag diag, - size_t M, - size_t N, - T *A, - size_t lda, - T *B, - size_t ldb, - T alpha, - cl_double *delta) -{ - cl_double *deltaCLBLAS, s; - int i, k, j, jStart, jEnd, idx; - int zinc; - size_t z = 0; - size_t bsize; - bool isUpper; - T v; - - isUpper = ((uplo == clblasUpper) && (transA == clblasNoTrans)) || - ((uplo == clblasLower) && (transA != clblasNoTrans)); - - deltaCLBLAS = new cl_double[M * N]; - bsize = trsmBlockSize(sizeof(T)); - - if (side == clblasLeft) { - // Calculate delta of TRSM evaluated with the Gauss' method - - for (k = 0; k < (int)N; k++) { - if (isUpper) { - for (i = (int)M - 1; i >= 0; i--) { - v = getElement<T>(order, clblasNoTrans, i, k, B, ldb); - if (diag == clblasNonUnit) { - v = v / getElement<T>(order, transA, i, i, A, lda); - } - s = module(v) * DELTA_0<T>() * module(alpha); - if (i == (int)(M - 1)) { - delta[i * N + k] = s; - } - else { - delta[i * N + k] = s + delta[(i + 1) * N + k]; - } - assert(delta[i* N + k] >= 0); - } - } - else { - for (i = 0; i < (int)M; i++) { - v = getElement<T>(order, clblasNoTrans, i, k, B, ldb); - if (diag == clblasNonUnit) { - v = v / getElement<T>(order, transA, i, i, A, lda); - } - s = module(v) * DELTA_0<T>() * module(alpha); - if (i == 0) { - delta[i * N + k] = s; - } - else { - delta[i * N + k] = s + delta[(i - 1) * N + k]; - } - assert(delta[i* N + k] >= 0); - } - } - } - - // Calculate clblas TRSM delta - - for (k = 0; k < (int)N; k++) { - for (i = 0; i < (int)M; i++) { - s = 0.0; - - /* - * For the upper triangular matrix the solving process proceeds - * from the bottom to the top, and the bottommost block's - * delta influents most of all. For the lower triangular matrix - * the situation is opposite. - */ - if (isUpper) { - jStart = i / (int)bsize; - // index of the block just after the last matrix block - jEnd = ((int)M + (int)bsize - 1) / (int)bsize; - z = 1; - zinc = 1; - } - else { - jStart = 0; - jEnd = i / (int)bsize + 1; - z = jEnd - jStart; - zinc = -1; - } - - for (j = jStart; j < jEnd; j++) { - idx = j * (int)bsize + i % (int)bsize; - if (idx >= (int)M) { - continue; - } - s += z * delta[idx * N + k]; - z += zinc; - } - - deltaCLBLAS[i * N + k] = s * bsize; - assert(deltaCLBLAS[i* N + k] >= 0); - } - } - } - else { - // Calculate delta of TRSM evaluated with the Gauss' method - - for (i = 0; i < (int)M; i++) { - if (isUpper) { - for (k = 0; k < (int)N; k++) { - v = getElement<T>(order, clblasNoTrans, i, k, B, ldb); - if (diag == clblasNonUnit) { - v = v / getElement<T>(order, transA, k, k, A, lda); - } - s = module(v) * DELTA_0<T>() * module(alpha); - if (k == 0) { - delta[i * N + k] = s; - } - else { - delta[i * N + k] = s + delta[i * N + (k - 1)]; - } - assert(delta[i* N + k] >= 0); - } - } - else { - for (k = (int)N - 1; k >= 0; k--) { - v = getElement<T>(order, clblasNoTrans, i, k, B, ldb); - if (diag == clblasNonUnit) { - v = v / getElement<T>(order, transA, k, k, A, lda); - } - s = module(v) * DELTA_0<T>() * module(alpha); - if (k == (int)(N - 1)) { - delta[i * N + k] = s; - } - else { - delta[i * N + k] = s + delta[i * N + (k + 1)]; - } - assert(delta[i* N + k] >= 0); - } - } - } - - // Calculate clblas TRSM delta - - for (i = 0; i < (int)M; i++) { - for (k = 0; k < (int)N; k++) { - s = 0.0; - - /* - * Approach is the same as for the left side matrix, but delta - * is calculated over the rows rather than the columns. - * Now, since the matrices are swapped, the largest and - * tightest blocks are swapped as well. Therefore, pass - * direction for the upper and lower triangular matrix is also - * swapped. - */ - if (isUpper) { - jStart = 0; - jEnd = k / (int)bsize + 1; - z = jEnd - jStart; - zinc = -1; - } - else { - jStart = k / (int)bsize; - jEnd = (k + (int)bsize - 1) / (int)bsize; - z = 1; - zinc = 1; - } - - for (j = jStart; j < jEnd; j++) { - idx = j * (int)bsize + k % (int)bsize; - if (idx >= (int)N) { - continue; - } - s += z * delta[i * N + idx]; - z += zinc; - } - - deltaCLBLAS[i * N + k] = s * bsize; - assert(deltaCLBLAS[i* N + k] >= 0); - } - } - } - - for (k = 0; k < (int)N; k++) { - for (i = 0; i < (int)M; i++) { - delta[i * N + k] += deltaCLBLAS[i * N + k]; - } - } - - delete[] deltaCLBLAS; -} diff --git a/external/clBLAS/src/tests/correctness/trsv-delta.h b/external/clBLAS/src/tests/correctness/trsv-delta.h deleted file mode 100644 index 872fdba8..00000000 --- a/external/clBLAS/src/tests/correctness/trsv-delta.h +++ /dev/null @@ -1,296 +0,0 @@ -/* ************************************************************************ - * Copyright 2013 Advanced Micro Devices, Inc. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - * ************************************************************************/ - -#ifndef TRSV_DELTA_H_ -#define TRSV_DELTA_H_ - -#include "delta.h" - -static size_t -trsvBlockSize(size_t elemSize) -{ - /* TODO: Right now TRSV generators use block size of 16 elements for the - * double complex type, and of 32 elements for another types. - * If this changes, we have to fetch block size from TRSV generator - * somehow. - */ - return (elemSize == sizeof(DoubleComplex)) ? 16 : 32; -} - -template <typename T> -void -trsvDelta( - clblasOrder order, - clblasUplo uplo, - clblasTranspose transA, - clblasDiag diag, - size_t N, - T *A, - size_t lda, - T *X, - int incx, - cl_double *delta) -{ - cl_double *deltaCLBLAS, s; - int i, j, jStart, jEnd, idx; - int zinc; - size_t z = 0; - size_t bsize, lenX; - bool isUpper = false; - size_t previncxi=0; - T v; - - isUpper = ((uplo == clblasUpper) && (transA == clblasNoTrans)) || - ((uplo == clblasLower) && (transA != clblasNoTrans)); - // incx = abs(incx); - lenX = 1 + (N-1)*abs(incx); - deltaCLBLAS = new cl_double[lenX]; - bsize = trsvBlockSize(sizeof(T)); - - // Calculate delta of TRSV evaluated with the Gauss' method - - if (isUpper) { - for (i = (int)N - 1; i >= 0; i--) { - size_t incxi; - - incxi = (incx > 0) ? (i*incx) : (N-1-i)*abs(incx); - v = getElement<T>(clblasColumnMajor, clblasNoTrans, incxi, 0, X, lenX); - if (diag == clblasNonUnit) { - T tempA; - if(lda > 0) - { - tempA = getElement<T>(order, transA, i, i, A, lda); - } - else - { - tempA = getElementPacked(order, clblasNoTrans, uplo, i, i, A, N); - } - v = v / tempA; - } - s = module(v) * DELTA_0<T>(); - if (i == (int)(N - 1)) { - delta[ incxi ] = s; - } - else { - delta[ incxi ] = s + delta[ previncxi ]; - } - assert(delta[ incxi ] >= 0); - previncxi = incxi; - } - } - else { - for (i = 0; i < (int)N; i++) { - size_t incxi; - - incxi = (incx > 0) ? (i*incx) : (N-1-i)*abs(incx); - v = getElement<T>(clblasColumnMajor, clblasNoTrans, incxi, 0, X, lenX); - if (diag == clblasNonUnit) { - T tempA; - if(lda > 0) - { - tempA = getElement<T>(order, transA, i, i, A, lda); - } - else - { - tempA = getElementPacked(order, clblasNoTrans, uplo, i, i, A, N); - } - v = v / tempA; - } - s = module(v) * DELTA_0<T>(); - if (i == 0) { - delta[ incxi ] = s; - } - else { - delta[ incxi ] = s + delta[ previncxi ]; - } - assert(delta[ incxi ] >= 0); - previncxi = incxi; - } - } - - // Calculate clblas TRSV delta - - for (i = 0; i < (int)N; i++) { - size_t incxi; - s = 0.0; - - /* - * For the upper triangular matrix the solving process proceeds - * from the bottom to the top, and the bottommost block's - * delta influents most of all. For the lower triangular matrix - * the situation is opposite. - */ - if (isUpper) { - jStart = i / (int)bsize; - // index of the block just after the last matrix block - jEnd = ((int)N + (int)bsize - 1) / (int)bsize; - z = 1; - zinc = 1; - } - else { - jStart = 0; - jEnd = i / (int)bsize + 1; - z = jEnd - jStart; - zinc = -1; - } - - for (j = jStart; j < jEnd; j++) { - size_t incxi; - - idx = j * (int)bsize + i % (int)bsize; - if (idx >= (int)N) { - continue; - } - incxi = (incx > 0) ? (idx*incx) : (N-1-idx)*abs(incx); - s += z * delta[ incxi ]; - z += zinc; - } - - incxi = (incx > 0) ? (i*incx) : (N-1-i)*abs(incx); - deltaCLBLAS[ incxi ] = s * bsize; - assert(deltaCLBLAS[ incxi ] >= 0); - } - - for (i = 0; i < (int)N; i++) { - size_t incxi; - - incxi = (incx > 0) ? (i*incx) : (N-1-i)*abs(incx); - delta[ incxi ] += deltaCLBLAS[ incxi ]; - } - - delete[] deltaCLBLAS; -} - -template <typename T> -void -tbsvDelta( - clblasOrder order, - clblasUplo uplo, - clblasTranspose transA, - clblasDiag diag, - size_t N, - size_t K, - T *A, - size_t lda, - T *X, - int incx, - cl_double *delta) -{ - cl_double *deltaCLBLAS, s; - int i, j, jStart, jEnd, idx; - int zinc; - size_t z = 0; - size_t bsize, lenX; - bool isUpper = false; - size_t previncxi=0; - T v; - - isUpper = ((uplo == clblasUpper) && (transA == clblasNoTrans)) || - ((uplo == clblasLower) && (transA != clblasNoTrans)); - lenX = 1 + (N-1)*abs(incx); - deltaCLBLAS = new cl_double[lenX]; - bsize = trsvBlockSize(sizeof(T)); - - // Calculate delta of TRSV evaluated with the Gauss' method - - if (isUpper) { - for (i = (int)N - 1; i >= 0; i--) { - size_t incxi; - - incxi = (incx > 0) ? (i*incx) : (N-1-i)*abs(incx); - v = getElement<T>(clblasColumnMajor, clblasNoTrans, incxi, 0, X, lenX); - if (diag == clblasNonUnit) { - v = v / getElementBanded<T>(order, uplo, i, i, K, A, lda); - } - s = module(v) * DELTA_0<T>(); - if (i == (int)(N - 1)) { - delta[ incxi ] = s; - } - else { - delta[ incxi ] = s + delta[ previncxi ]; - } - assert(delta[ incxi ] >= 0); - previncxi = incxi; - } - } - else { - for (i = 0; i < (int)N; i++) { - size_t incxi; - - incxi = (incx > 0) ? (i*incx) : (N-1-i)*abs(incx); - v = getElement<T>(clblasColumnMajor, clblasNoTrans, incxi, 0, X, lenX); - if (diag == clblasNonUnit) { - v = v / getElementBanded<T>(order, uplo, i, i, K, A, lda); - } - s = module(v) * DELTA_0<T>(); - if (i == 0) { - delta[ incxi ] = s; - } - else { - delta[ incxi ] = s + delta[ previncxi ]; - } - assert(delta[ incxi ] >= 0); - previncxi = incxi; - } - } - - // Calculate clblas TRSV delta - - for (i = 0; i < (int)N; i++) { - size_t incxi; - s = 0.0; - if (isUpper) { - jStart = i / (int)bsize; - // index of the block just after the last matrix block - jEnd = ((int)N + (int)bsize - 1) / (int)bsize; - z = 1; - zinc = 1; - } - else { - jStart = 0; - jEnd = i / (int)bsize + 1; - z = jEnd - jStart; - zinc = -1; - } - - for (j = jStart; j < jEnd; j++) { - size_t incxi; - - idx = j * (int)bsize + i % (int)bsize; - if (idx >= (int)N) { - continue; - } - incxi = (incx > 0) ? (idx*incx) : (N-1-idx)*abs(incx); - s += z * delta[ incxi ]; - z += zinc; - } - - incxi = (incx > 0) ? (i*incx) : (N-1-i)*abs(incx); - deltaCLBLAS[ incxi ] = s * bsize; - assert(deltaCLBLAS[ incxi ] >= 0); - } - - for (i = 0; i < (int)N; i++) { - size_t incxi; - - incxi = (incx > 0) ? (i*incx) : (N-1-i)*abs(incx); - delta[ incxi ] += deltaCLBLAS[ incxi ]; - } - - delete[] deltaCLBLAS; -} -#endif - |