23 files changed, 566 insertions, 10 deletions
diff --git a/src/routines/level1/xamax.cc b/src/routines/level1/xamax.cc
index b4add2a3..6b6e7f9e 100644
--- a/src/routines/level1/xamax.cc
+++ b/src/routines/level1/xamax.cc
@@ -11,7 +11,7 @@
 //
 // =================================================================================================
 
-#include "internal/routines/level1/xamax.h"
+#include "routines/level1/xamax.hpp"
 
 #include <string>
 #include <vector>
diff --git a/src/routines/level1/xamax.hpp b/src/routines/level1/xamax.hpp
new file mode 100644
index 00000000..aa45a8e4
--- /dev/null
+++ b/src/routines/level1/xamax.hpp
@@ -0,0 +1,40 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xamax routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XAMAX_H_
+#define CLBLAST_ROUTINES_XAMAX_H_
+
+#include "routine.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xamax: public Routine {
+ public:
+
+  // Constructor
+  Xamax(Queue &queue, EventPointer event, const std::string &name = "AMAX");
+
+  // Templated-precision implementation of the routine
+  StatusCode DoAmax(const size_t n,
+                    const Buffer<unsigned int> &imax_buffer, const size_t imax_offset,
+                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XAMAX_H_
+#endif
diff --git a/src/routines/level1/xasum.cc b/src/routines/level1/xasum.cc
index 80f04829..0c1ce903 100644
--- a/src/routines/level1/xasum.cc
+++ b/src/routines/level1/xasum.cc
@@ -11,7 +11,7 @@
 //
 // =================================================================================================
 
-#include "internal/routines/level1/xasum.h"
+#include "routines/level1/xasum.hpp"
 
 #include <string>
 #include <vector>
diff --git a/src/routines/level1/xasum.hpp b/src/routines/level1/xasum.hpp
new file mode 100644
index 00000000..5a253f4d
--- /dev/null
+++ b/src/routines/level1/xasum.hpp
@@ -0,0 +1,40 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xasum routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XASUM_H_
+#define CLBLAST_ROUTINES_XASUM_H_
+
+#include "routine.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xasum: public Routine {
+ public:
+
+  // Constructor
+  Xasum(Queue &queue, EventPointer event, const std::string &name = "ASUM");
+
+  // Templated-precision implementation of the routine
+  StatusCode DoAsum(const size_t n,
+                    const Buffer<T> &asum_buffer, const size_t asum_offset,
+                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XASUM_H_
+#endif
diff --git a/src/routines/level1/xaxpy.cc b/src/routines/level1/xaxpy.cc
index 4a548757..5b6c9e77 100644
--- a/src/routines/level1/xaxpy.cc
+++ b/src/routines/level1/xaxpy.cc
@@ -11,7 +11,7 @@
 //
 // =================================================================================================
 
-#include "internal/routines/level1/xaxpy.h"
+#include "routines/level1/xaxpy.hpp"
 
 #include <string>
 #include <vector>
diff --git a/src/routines/level1/xaxpy.hpp b/src/routines/level1/xaxpy.hpp
new file mode 100644
index 00000000..caac871e
--- /dev/null
+++ b/src/routines/level1/xaxpy.hpp
@@ -0,0 +1,40 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xaxpy routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XAXPY_H_
+#define CLBLAST_ROUTINES_XAXPY_H_
+
+#include "routine.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xaxpy: public Routine {
+ public:
+
+  // Constructor
+  Xaxpy(Queue &queue, EventPointer event, const std::string &name = "AXPY");
+
+  // Templated-precision implementation of the routine
+  StatusCode DoAxpy(const size_t n, const T alpha,
+                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XAXPY_H_
+#endif
diff --git a/src/routines/level1/xcopy.cc b/src/routines/level1/xcopy.cc
index 92d31786..673ef349 100644
--- a/src/routines/level1/xcopy.cc
+++ b/src/routines/level1/xcopy.cc
@@ -11,7 +11,7 @@
 //
 // =================================================================================================
 
-#include "internal/routines/level1/xcopy.h"
+#include "routines/level1/xcopy.hpp"
 
 #include <string>
 #include <vector>
diff --git a/src/routines/level1/xcopy.hpp b/src/routines/level1/xcopy.hpp
new file mode 100644
index 00000000..0c424ba3
--- /dev/null
+++ b/src/routines/level1/xcopy.hpp
@@ -0,0 +1,40 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xcopy routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XCOPY_H_
+#define CLBLAST_ROUTINES_XCOPY_H_
+
+#include "routine.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xcopy: public Routine {
+ public:
+
+  // Constructor
+  Xcopy(Queue &queue, EventPointer event, const std::string &name = "COPY");
+
+  // Templated-precision implementation of the routine
+  StatusCode DoCopy(const size_t n,
+                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XCOPY_H_
+#endif
diff --git a/src/routines/level1/xdot.cc b/src/routines/level1/xdot.cc
index 8709c541..bafea157 100644
--- a/src/routines/level1/xdot.cc
+++ b/src/routines/level1/xdot.cc
@@ -11,7 +11,7 @@
 //
 // =================================================================================================
 
-#include "internal/routines/level1/xdot.h"
+#include "routines/level1/xdot.hpp"
 
 #include <string>
 #include <vector>
diff --git a/src/routines/level1/xdot.hpp b/src/routines/level1/xdot.hpp
new file mode 100644
index 00000000..02c1efaa
--- /dev/null
+++ b/src/routines/level1/xdot.hpp
@@ -0,0 +1,42 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xdot routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XDOT_H_
+#define CLBLAST_ROUTINES_XDOT_H_
+
+#include "routine.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xdot: public Routine {
+ public:
+
+  // Constructor
+  Xdot(Queue &queue, EventPointer event, const std::string &name = "DOT");
+
+  // Templated-precision implementation of the routine
+  StatusCode DoDot(const size_t n,
+                   const Buffer<T> &dot_buffer, const size_t dot_offset,
+                   const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+                   const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc,
+                   const bool do_conjugate = false);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XDOT_H_
+#endif
diff --git a/src/routines/level1/xdotc.cc b/src/routines/level1/xdotc.cc
index b3a01079..27cf2bab 100644
--- a/src/routines/level1/xdotc.cc
+++ b/src/routines/level1/xdotc.cc
@@ -11,7 +11,7 @@
 //
 // =================================================================================================
 
-#include "internal/routines/level1/xdotc.h"
+#include "routines/level1/xdotc.hpp"
 
 #include <string>
 #include <vector>
diff --git a/src/routines/level1/xdotc.hpp b/src/routines/level1/xdotc.hpp
new file mode 100644
index 00000000..b8cbdaf5
--- /dev/null
+++ b/src/routines/level1/xdotc.hpp
@@ -0,0 +1,44 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xdotc routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XDOTC_H_
+#define CLBLAST_ROUTINES_XDOTC_H_
+
+#include "routines/level1/xdot.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xdotc: public Xdot<T> {
+ public:
+
+  // Uses the regular Xdot routine
+  using Xdot<T>::DoDot;
+
+  // Constructor
+  Xdotc(Queue &queue, EventPointer event, const std::string &name = "DOTC");
+
+  // Templated-precision implementation of the routine
+  StatusCode DoDotc(const size_t n,
+                    const Buffer<T> &dot_buffer, const size_t dot_offset,
+                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XDOTC_H_
+#endif
diff --git a/src/routines/level1/xdotu.cc b/src/routines/level1/xdotu.cc
index 8dded6e0..0bce70b7 100644
--- a/src/routines/level1/xdotu.cc
+++ b/src/routines/level1/xdotu.cc
@@ -11,7 +11,7 @@
 //
 // =================================================================================================
 
-#include "internal/routines/level1/xdotu.h"
+#include "routines/level1/xdotu.hpp"
 
 #include <string>
 
diff --git a/src/routines/level1/xdotu.hpp b/src/routines/level1/xdotu.hpp
new file mode 100644
index 00000000..b3f73086
--- /dev/null
+++ b/src/routines/level1/xdotu.hpp
@@ -0,0 +1,44 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xdotu routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XDOTU_H_
+#define CLBLAST_ROUTINES_XDOTU_H_
+
+#include "routines/level1/xdot.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xdotu: public Xdot<T> {
+ public:
+
+  // Uses the regular Xdot routine
+  using Xdot<T>::DoDot;
+
+  // Constructor
+  Xdotu(Queue &queue, EventPointer event, const std::string &name = "DOTU");
+
+  // Templated-precision implementation of the routine
+  StatusCode DoDotu(const size_t n,
+                    const Buffer<T> &dot_buffer, const size_t dot_offset,
+                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XDOTU_H_
+#endif
diff --git a/src/routines/level1/xmax.hpp b/src/routines/level1/xmax.hpp
new file mode 100644
index 00000000..5a0236f2
--- /dev/null
+++ b/src/routines/level1/xmax.hpp
@@ -0,0 +1,49 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xmax routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XMAX_H_
+#define CLBLAST_ROUTINES_XMAX_H_
+
+#include "routine.hpp"
+#include "routines/level1/xamax.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xmax: public Xamax<T> {
+ public:
+
+  // Members and methods from the base class
+  using Xamax<T>::DoAmax;
+
+  // Constructor
+  Xmax(Queue &queue, EventPointer event, const std::string &name = "MAX"):
+    Xamax<T>(queue, event, name) {
+  }
+
+  // Forwards to the regular absolute version. The implementation difference is realised in the
+  // kernel through a pre-processor macro based on the name of the routine.
+  StatusCode DoMax(const size_t n,
+                   const Buffer<unsigned int> &imax_buffer, const size_t imax_offset,
+                   const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+    return DoAmax(n, imax_buffer, imax_offset, x_buffer, x_offset, x_inc);
+  }
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XMAX_H_
+#endif
diff --git a/src/routines/level1/xmin.hpp b/src/routines/level1/xmin.hpp
new file mode 100644
index 00000000..6befec64
--- /dev/null
+++ b/src/routines/level1/xmin.hpp
@@ -0,0 +1,49 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xmin routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XMIN_H_
+#define CLBLAST_ROUTINES_XMIN_H_
+
+#include "routine.hpp"
+#include "routines/level1/xamax.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xmin: public Xamax<T> {
+ public:
+
+  // Members and methods from the base class
+  using Xamax<T>::DoAmax;
+
+  // Constructor
+  Xmin(Queue &queue, EventPointer event, const std::string &name = "MIN"):
+    Xamax<T>(queue, event, name) {
+  }
+
+  // Forwards to the regular max-absolute version. The implementation difference is realised in the
+  // kernel through a pre-processor macro based on the name of the routine.
+  StatusCode DoMin(const size_t n,
+                   const Buffer<unsigned int> &imin_buffer, const size_t imin_offset,
+                   const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+    return DoAmax(n, imin_buffer, imin_offset, x_buffer, x_offset, x_inc);
+  }
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XMIN_H_
+#endif
diff --git a/src/routines/level1/xnrm2.cc b/src/routines/level1/xnrm2.cc
index 105f991c..97615d8b 100644
--- a/src/routines/level1/xnrm2.cc
+++ b/src/routines/level1/xnrm2.cc
@@ -11,7 +11,7 @@
 //
 // =================================================================================================
 
-#include "internal/routines/level1/xnrm2.h"
+#include "routines/level1/xnrm2.hpp"
 
 #include <string>
 #include <vector>
diff --git a/src/routines/level1/xnrm2.hpp b/src/routines/level1/xnrm2.hpp
new file mode 100644
index 00000000..7baf07f5
--- /dev/null
+++ b/src/routines/level1/xnrm2.hpp
@@ -0,0 +1,40 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xnrm2 routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XNRM2_H_
+#define CLBLAST_ROUTINES_XNRM2_H_
+
+#include "routine.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xnrm2: public Routine {
+ public:
+
+  // Constructor
+  Xnrm2(Queue &queue, EventPointer event, const std::string &name = "NRM2");
+
+  // Templated-precision implementation of the routine
+  StatusCode DoNrm2(const size_t n,
+                    const Buffer<T> &nrm2_buffer, const size_t nrm2_offset,
+                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XNRM2_H_
+#endif
diff --git a/src/routines/level1/xscal.cc b/src/routines/level1/xscal.cc
index 3c1b5257..bcc43c3b 100644
--- a/src/routines/level1/xscal.cc
+++ b/src/routines/level1/xscal.cc
@@ -11,7 +11,7 @@
 //
 // =================================================================================================
 
-#include "internal/routines/level1/xscal.h"
+#include "routines/level1/xscal.hpp"
 
 #include <string>
 #include <vector>
diff --git a/src/routines/level1/xscal.hpp b/src/routines/level1/xscal.hpp
new file mode 100644
index 00000000..6c585cb2
--- /dev/null
+++ b/src/routines/level1/xscal.hpp
@@ -0,0 +1,39 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xscal routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XSCAL_H_
+#define CLBLAST_ROUTINES_XSCAL_H_
+
+#include "routine.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xscal: public Routine {
+ public:
+
+  // Constructor
+  Xscal(Queue &queue, EventPointer event, const std::string &name = "SCAL");
+
+  // Templated-precision implementation of the routine
+  StatusCode DoScal(const size_t n, const T alpha,
+                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XSCAL_H_
+#endif
diff --git a/src/routines/level1/xsum.hpp b/src/routines/level1/xsum.hpp
new file mode 100644
index 00000000..84e20bea
--- /dev/null
+++ b/src/routines/level1/xsum.hpp
@@ -0,0 +1,49 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xsum routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XSUM_H_
+#define CLBLAST_ROUTINES_XSUM_H_
+
+#include "routine.hpp"
+#include "routines/level1/xasum.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xsum: public Xasum<T> {
+ public:
+
+  // Members and methods from the base class
+  using Xasum<T>::DoAsum;
+
+  // Constructor
+  Xsum(Queue &queue, EventPointer event, const std::string &name = "SUM"):
+    Xasum<T>(queue, event, name) {
+  }
+
+  // Forwards to the regular absolute version. The implementation difference is realised in the
+  // kernel through a pre-processor macro based on the name of the routine.
+  StatusCode DoSum(const size_t n,
+                   const Buffer<T> &sum_buffer, const size_t sum_offset,
+                   const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc) {
+    return DoAsum(n, sum_buffer, sum_offset, x_buffer, x_offset, x_inc);
+  }
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XSUM_H_
+#endif
diff --git a/src/routines/level1/xswap.cc b/src/routines/level1/xswap.cc
index 27eb9b13..03907cbd 100644
--- a/src/routines/level1/xswap.cc
+++ b/src/routines/level1/xswap.cc
@@ -11,7 +11,7 @@
 //
 // =================================================================================================
 
-#include "internal/routines/level1/xswap.h"
+#include "routines/level1/xswap.hpp"
 
 #include <string>
 #include <vector>
diff --git a/src/routines/level1/xswap.hpp b/src/routines/level1/xswap.hpp
new file mode 100644
index 00000000..4f9ea36d
--- /dev/null
+++ b/src/routines/level1/xswap.hpp
@@ -0,0 +1,40 @@
+
+// =================================================================================================
+// This file is part of the CLBlast project. The project is licensed under Apache Version 2.0. This
+// project loosely follows the Google C++ styleguide and uses a tab-size of two spaces and a max-
+// width of 100 characters per line.
+//
+// Author(s):
+//   Cedric Nugteren <www.cedricnugteren.nl>
+//
+// This file implements the Xswap routine. The precision is implemented using a template argument.
+//
+// =================================================================================================
+
+#ifndef CLBLAST_ROUTINES_XSWAP_H_
+#define CLBLAST_ROUTINES_XSWAP_H_
+
+#include "routine.hpp"
+
+namespace clblast {
+// =================================================================================================
+
+// See comment at top of file for a description of the class
+template <typename T>
+class Xswap: public Routine {
+ public:
+
+  // Constructor
+  Xswap(Queue &queue, EventPointer event, const std::string &name = "SWAP");
+
+  // Templated-precision implementation of the routine
+  StatusCode DoSwap(const size_t n,
+                    const Buffer<T> &x_buffer, const size_t x_offset, const size_t x_inc,
+                    const Buffer<T> &y_buffer, const size_t y_offset, const size_t y_inc);
+};
+
+// =================================================================================================
+} // namespace clblast
+
+// CLBLAST_ROUTINES_XSWAP_H_
+#endif