summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorcnugteren <web@cedricnugteren.nl>2016-03-30 21:37:56 -0700
committercnugteren <web@cedricnugteren.nl>2016-03-30 21:37:56 -0700
commit8c3c6db7d07adaacb702fdaabfdf18f74fbfea13 (patch)
treef6dcd3f9d4f987ec74f87b1939c4b3600a7d42d0 /include
parent6578102ae996ce0aa52b45704f38c1cd5a10d3c0 (diff)
parent5409f349a17f60ba68133fd0cc9789fb2918f790 (diff)
Merge branch 'level1_routines' into development
Diffstat (limited to 'include')
-rw-r--r--include/clblast.h7
-rw-r--r--include/clblast_c.h18
-rw-r--r--include/internal/routines/level1/xnrm2.h55
-rw-r--r--include/internal/utilities.h6
4 files changed, 84 insertions, 2 deletions
diff --git a/include/clblast.h b/include/clblast.h
index ac16188f..5e5c5a46 100644
--- a/include/clblast.h
+++ b/include/clblast.h
@@ -174,6 +174,13 @@ StatusCode Dotc(const size_t n,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event = nullptr);
+// Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2
+template <typename T>
+StatusCode Nrm2(const size_t n,
+ cl_mem nrm2_buffer, const size_t nrm2_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event = nullptr);
+
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
// =================================================================================================
diff --git a/include/clblast_c.h b/include/clblast_c.h
index a5563951..dcb3ae3a 100644
--- a/include/clblast_c.h
+++ b/include/clblast_c.h
@@ -260,6 +260,24 @@ StatusCode PUBLIC_API CLBlastZdotc(const size_t n,
const cl_mem y_buffer, const size_t y_offset, const size_t y_inc,
cl_command_queue* queue, cl_event* event);
+// Euclidian norm of a vector: SNRM2/DNRM2/ScNRM2/DzNRM2
+StatusCode PUBLIC_API CLBlastSnrm2(const size_t n,
+ cl_mem nrm2_buffer, const size_t nrm2_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event);
+StatusCode PUBLIC_API CLBlastDnrm2(const size_t n,
+ cl_mem nrm2_buffer, const size_t nrm2_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event);
+StatusCode PUBLIC_API CLBlastScnrm2(const size_t n,
+ cl_mem nrm2_buffer, const size_t nrm2_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event);
+StatusCode PUBLIC_API CLBlastDznrm2(const size_t n,
+ cl_mem nrm2_buffer, const size_t nrm2_offset,
+ const cl_mem x_buffer, const size_t x_offset, const size_t x_inc,
+ cl_command_queue* queue, cl_event* event);
+
// =================================================================================================
// BLAS level-2 (matrix-vector) routines
// =================================================================================================
diff --git a/include/internal/routines/level1/xnrm2.h b/include/internal/routines/level1/xnrm2.h
new file mode 100644
index 00000000..b3fffef6
--- /dev/null
+++ b/include/internal/routines/level1/xnrm2.h
@@ -0,0 +1,55 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+// Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xnrm2 routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XNRM2_H_
+#define CLBLAST_ROUTINES_XNRM2_H_
+
+#include "internal/routine.h"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xnrm2: public Routine<T> {
+ public:
+
+ // Members and methods from the base class
+ using Routine<T>::db_;
+ using Routine<T>::source_string_;
+ using Routine<T>::queue_;
+ using Routine<T>::context_;
+ using Routine<T>::GetProgramFromCache;
+ using Routine<T>::TestVectorX;
+ using Routine<T>::TestVectorDot;
+ using Routine<T>::RunKernel;
+ using Routine<T>::ErrorIn;
+
+ // Constructor
+ Xnrm2(Queue &queue, Event &event, const std::string &name = "NRM2");
+
+ // Templated-precision implementation of the routine
+ StatusCode DoNrm2(const size_t n,
+ const Buffer<T> &nrm2_buffer, const size_t nrm2_offset,
+ const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+
+ private:
+ // Static variable to get the precision
+ const static Precision precision_;
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XNRM2_H_
+#endif
diff --git a/include/internal/utilities.h b/include/internal/utilities.h
index b6307a85..35f76722 100644
--- a/include/internal/utilities.h
+++ b/include/internal/utilities.h
@@ -61,6 +61,7 @@ constexpr auto kArgBOffset = "offb";
constexpr auto kArgCOffset = "offc";
constexpr auto kArgAPOffset = "offap";
constexpr auto kArgDotOffset = "offdot";
+constexpr auto kArgNrm2Offset = "offnrm2";
constexpr auto kArgAlpha = "alpha";
constexpr auto kArgBeta = "beta";
@@ -113,6 +114,7 @@ struct Arguments {
size_t c_offset = 0;
size_t ap_offset = 0;
size_t dot_offset = 0;
+ size_t nrm2_offset = 0;
T alpha = T{1.0};
T beta = T{1.0};
size_t x_size = 1;
@@ -121,7 +123,7 @@ struct Arguments {
size_t b_size = 1;
size_t c_size = 1;
size_t ap_size = 1;
- size_t dot_size = 1;
+ size_t scalar_size = 1;
// Tuner-specific arguments
double fraction = 1.0;
// Client-specific arguments
@@ -149,7 +151,7 @@ struct Buffers {
Buffer<T> b_mat;
Buffer<T> c_mat;
Buffer<T> ap_mat;
- Buffer<T> dot;
+ Buffer<T> scalar;
};
// =================================================================================================