summaryrefslogtreecommitdiff
path: root/external/clBLAS/src/client/clfunc_xger.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'external/clBLAS/src/client/clfunc_xger.hpp')
-rw-r--r--external/clBLAS/src/client/clfunc_xger.hpp425
1 files changed, 0 insertions, 425 deletions
diff --git a/external/clBLAS/src/client/clfunc_xger.hpp b/external/clBLAS/src/client/clfunc_xger.hpp
deleted file mode 100644
index d2f36dbc..00000000
--- a/external/clBLAS/src/client/clfunc_xger.hpp
+++ /dev/null
@@ -1,425 +0,0 @@
-/* ************************************************************************
- * Copyright 2013 Advanced Micro Devices, Inc.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- * ************************************************************************/
-
-
-// $Id
-
-#ifndef CLBLAS_BENCHMARK_XGER_HXX__
-#define CLBLAS_BENCHMARK_XGER_HXX__
-
-#include "clfunc_common.hpp"
-
-template <typename T>
-struct xGerBuffer
-{
- clblasOrder order_;
- size_t m_;
- size_t n_;
- T alpha;
- T* X;
- cl_mem x_;
- size_t offX;
- int incx_;
- T* Y;
- cl_mem y_;
- size_t offY;
- int incy_;
- T* A;
- cl_mem a_;
- size_t a_num_vectors_;
- size_t offA;
- size_t lda_;
-}; // struct buffer
-
-template <typename T>
-class xGer : public clblasFunc
-{
-public:
- xGer(StatisticalTimer& timer, cl_device_type devType) : clblasFunc(timer, devType)
- {
- timer.getUniqueID("clGer", 0);
- }
-
- ~xGer()
- {
- delete buffer_.X;
- delete buffer_.Y;
- delete buffer_.A;
- OPENCL_V_THROW( clReleaseMemObject(buffer_.x_), "releasing buffer X");
- OPENCL_V_THROW( clReleaseMemObject(buffer_.y_), "releasing buffer Y");
- OPENCL_V_THROW( clReleaseMemObject(buffer_.a_), "releasing buffer A");
- }
-
- //void call_func() {}
-
- double gflops()
- {
- return (buffer_.m_*(buffer_.m_+1))/time_in_ns();
- }
-
- std::string gflops_formula()
- {
- return "M*(M+1)/time";
- }
-
- void setup_buffer(int order_option, int side_option, int
- uplo_option, int diag_option, int transA_option, int
- transB_option, size_t M, size_t N, size_t K,
- size_t lda, size_t ldb, size_t ldc,size_t offA,
- size_t offB, size_t offC, double alpha,
- double beta)
- {
- initialize_scalars(alpha, beta);
-
- buffer_.m_ = M;
- buffer_.n_ = N;
- buffer_.incx_ = 1;
- buffer_.incy_ = 1;
-
- if (order_option == 0)
- {
- buffer_.order_ = clblasRowMajor;
- }
- else
- {
- buffer_.order_ = clblasColumnMajor;
- }
-
-
- if (lda == 0)
- {
- buffer_.lda_ = M;
- }
- else
- {
- if( lda < M )
- {
- std::cerr << "ERROR: lda must be set to 0 or a value >= M" << std::endl;
- }
- else if (lda >= M)
- {
- buffer_.lda_ = lda;
- }
- }
- buffer_.offA = offA;
- buffer_.offX = offB;
- buffer_.offY = offC;
-
-
- buffer_.a_num_vectors_ = buffer_.n_;
- size_t sizeA = buffer_.lda_*buffer_.a_num_vectors_;
- size_t sizeX = buffer_.m_;
- size_t sizeY = buffer_.n_;
- buffer_.A = new T[sizeA];
- buffer_.X = new T[sizeX];
- buffer_.Y = new T[sizeY];
-
-
- cl_int err;
- buffer_.a_ = clCreateBuffer(ctx_, CL_MEM_READ_ONLY,
- sizeA*sizeof(T),
- NULL, &err);
-
- buffer_.x_ = clCreateBuffer(ctx_, CL_MEM_READ_WRITE,
- sizeX*sizeof(T),
- NULL, &err);
- buffer_.y_ = clCreateBuffer(ctx_, CL_MEM_READ_WRITE,
- sizeY*sizeof(T),
- NULL, &err);
- }
-
- void initialize_cpu_buffer()
- {
- srand(10);
-
- for (size_t i = 0; i < buffer_.m_; ++i)
- {
- buffer_.X[i] = static_cast<T>(rand())/static_cast<T>(RAND_MAX);
- }
- for (size_t i = 0; i < buffer_.n_; ++i)
- {
- buffer_.Y[i] = static_cast<T>(rand())/static_cast<T>(RAND_MAX);
- }
-
- for (size_t i = 0; i < buffer_.a_num_vectors_; ++i)
- {
- for (size_t j = 0; j < buffer_.lda_; ++j)
- {
- if (i == j)
- {
- /*if (buffer_.diag_ == clblasUnit)
- {
- buffer_.a_[i*buffer_.lda_+j] = static_cast<T>(1.0);
- }
- else
- {*/
- buffer_.A[i*buffer_.lda_+j] =
- static_cast<T>(rand())/static_cast<T>(RAND_MAX);
- //}
- }
- else
- {
- buffer_.A[i*buffer_.lda_+j] = static_cast<T>(0.0);
- }
- }
- }
- }
-
- void initialize_gpu_buffer()
- {
- cl_int err;
-
- err = clEnqueueWriteBuffer(queue_, buffer_.a_, CL_TRUE, 0,
- buffer_.lda_*buffer_.a_num_vectors_*sizeof(T),
- buffer_.A, 0, NULL, NULL);
-
- err = clEnqueueWriteBuffer(queue_, buffer_.x_, CL_TRUE, 0,
- buffer_.m_*sizeof(T),
- buffer_.X, 0, NULL, NULL);
- err = clEnqueueWriteBuffer(queue_, buffer_.y_, CL_TRUE, 0,
- buffer_.n_*sizeof(T),
- buffer_.Y, 0, NULL, NULL);
- }
-
- void reset_gpu_write_buffer()
- {
- cl_int err;
- err = clEnqueueWriteBuffer(queue_, buffer_.x_, CL_TRUE, 0,
- buffer_.m_,
- buffer_.x_, 0, NULL, NULL);
- }
- void call_func();
-
- void read_gpu_buffer()
- {
- //cl_int err;
- //to-do need to fill up
- }
- void roundtrip_func()
- {//to-do need to fill up
- }
- void roundtrip_setup_buffer(int order_option, int side_option, int uplo_option,
- int diag_option, int transA_option, int transB_option,
- size_t M, size_t N, size_t K, size_t lda, size_t ldb,
- size_t ldc, size_t offA, size_t offBX, size_t offCY,
- double alpha, double beta)
- {}
- void releaseGPUBuffer_deleteCPUBuffer()
- {
- //this is necessary since we are running a iteration of tests and calculate the average time. (in client.cpp)
- //need to do this before we eventually hit the destructor
- //to-do
- }
-
-protected:
- void initialize_scalars(double alpha, double beta)
- {
- buffer_.alpha = alpha;
- }
-
-private:
- xGerBuffer<T> buffer_;
-
-}; // class xger
-
-//template<>
-//void
-//xGer<cl_float2>::
-//initialize_scalars(double alpha, double beta)
-//{
-// buffer_.alpha = alpha;
-//}
-
-//template<>
-//void
-//xGer<cl_double2>::
-//initialize_scalars(double alpha, double beta)
-//{
-//}
-
-template<>
-void
-xGer<cl_float>::
-call_func()
-{
- timer.Start(timer_id);
- clblasSger(buffer_.order_, buffer_.m_, buffer_.n_, buffer_.alpha, buffer_.x_, buffer_.offX, 1, buffer_.y_, buffer_.offY,
- 1, buffer_.a_, buffer_.offA, buffer_.lda_, 1, &queue_, 0, NULL,
- &event_);
- clWaitForEvents(1, &event_);
- timer.Stop(timer_id);
-}
-
-template<>
-void
-xGer<cl_double>::
-call_func()
-{
- timer.Start(timer_id);
- clblasDger(buffer_.order_, buffer_.m_, buffer_.n_, buffer_.alpha, buffer_.x_, buffer_.offX, 1, buffer_.y_, buffer_.offY,
- 1, buffer_.a_, buffer_.offA, buffer_.lda_, 1, &queue_, 0, NULL,
- &event_);
- clWaitForEvents(1, &event_);
- timer.Stop(timer_id);
-}
-
-//template<>
-//void
-//xGer<cl_float2>::
-//call_func()
-//{
-// timer.Start(timer_id);
-// clblasCger(order_, buffer_.m_, buffer_.n, buffer_a_, 0,
-// buffer_.lda_, buffer_x_, 0, 1, 1, &queue_, 0, NULL,
-// &event_);
-// clWaitForEvents(1, &event_);
-// timer.Stop(timer_id);
-//}
-//
-//template<>
-//void
-//xGer<cl_double2>::
-//call_func()
-//{
-// timer.Start(timer_id);
-// clblasZger(order_, buffer_.uplo_, buffer_.trans_a_,
-// buffer_.diag_, buffer_.m_, buffer_a_, 0,
-// buffer_.lda_, buffer_x_, 0, 1, 1, &queue_, 0, NULL,
-// &event_);
-// clWaitForEvents(1, &event_);
-// timer.Stop(timer_id);
-//}
-
-//template<>
-//void
-//xGer<cl_float2>::
-//initialize_cpu_buffer()
-//{
-// srand(10);
-// for (size_t i = 0; i < buffer_.m_; ++i)
-// {
-// buffer_x_[i].s[0] =
-// static_cast<cl_float>(rand())/static_cast<cl_float>(RAND_MAX);
-// buffer_.x_[i].s[1] =
-// static_cast<cl_float>(rand())/static_cast<cl_float>(RAND_MAX);
-// }
-//
-// for (size_t i = 0; i < buffer_.a_num_vectors_; ++i)
-// {
-// for (size_t j = 0; j < buffer_.lda_; ++j)
-// {
-// if (i == j)
-// {
-// if (buffer_.diag_ == clblasUnit)
-// {
-// buffer_.a_[i*buffer_.lda_+j].s[0] = 1.0f;
-// buffer_.a_[i*buffer_.lda_+j].s[1] = 0.0f;
-// }
-// else
-// {
-// buffer_.a_[i*buffer_.lda_+j].s[0] =
-// static_cast<cl_float>(rand())/static_cast<cl_float>(RAND_MAX);
-// buffer_.a_[i*buffer_.lda_+j].s[1] =
-// static_cast<cl_float>(rand())/static_cast<cl_float>(RAND_MAX);
-// }
-// }
-// else
-// {
-// buffer_.a_[i*buffer_.lda_+j].s[0] = 0.0f;
-// buffer_.a_[i*buffer_.lda_+j].s[1] = 0.0f;
-// }
-// }
-// }
-//
-//
-//}
-
-//template<>
-//void
-//xGer<cl_double2>::
-//initialize_cpu_buffer()
-//{
-// srand(10);
-// for (size_t i = 0; i < buffer_.m_; ++i)
-// {
-// buffer_.x_[i].s[0] =
-// static_cast<cl_double>(rand())/static_cast<cl_double>(RAND_MAX);
-// buffer_.x_[i].s[1] =
-// static_cast<cl_double>(rand())/static_cast<cl_double>(RAND_MAX);
-// }
-//
-// for (size_t i = 0; i < buffer_.a_num_vectors_; ++i)
-// {
-// for (size_t j = 0; j < buffer_.lda_; ++j)
-// {
-// if (i == j)
-// {
-// if (buffer_.diag_ == clblasUnit)
-// {
-// buffer_.a_[i*buffer_.lda_+j].s[0] = 1.0;
-// buffer_.a_[i*buffer_.lda_+j].s[1] = 0.0;
-// }
-// else
-// {
-// buffer_.a_[i*buffer_.lda_+j].s[0] =
-// static_cast<cl_double>(rand())/static_cast<cl_double>(RAND_MAX);
-// buffer_.a_[i*buffer_.lda_+j].s[1] =
-// static_cast<cl_double>(rand())/static_cast<cl_double>(RAND_MAX);
-// }
-// }
-// else
-// {
-// buffer_.a_[i*buffer_.lda_+j].s[0] = 0.0;
-// buffer_.a_[i*buffer_.lda_+j].s[1] = 0.0;
-// }
-// }
-// }
-//}
-
-//template<>
-//double
-//xGer<cl_float2>::
-//gflops()
-//{
-// return 2.0*buffer_.m_*(buffer_.m_+1)/time_in_ns();
-//}
-//
-//template<>
-//double
-//xGer<cl_double2>::
-//gflops()
-//{
-// return 2.0*buffer_.m_*(buffer_.m_+1)/time_in_ns();
-//}
-//
-//template<>
-//std::string
-//xGer<cl_float2>::
-//gflops_formula()
-//{
-// return "2.0*M*(M+1)/time";
-//}
-//
-//template<>
-//std::string
-//xGer<cl_double2>::
-//gflops_formula()
-//{
-// return "2.0*M*(M+1)/time";
-//}
-
-
-#endif // ifndef CLBLAS_BENCHMARK_XGER_HXX__