summaryrefslogtreecommitdiff
path: root/ot/gpu/cudamat/cudamat/learn.cu
diff options
context:
space:
mode:
Diffstat (limited to 'ot/gpu/cudamat/cudamat/learn.cu')
-rw-r--r--ot/gpu/cudamat/cudamat/learn.cu34
1 files changed, 34 insertions, 0 deletions
diff --git a/ot/gpu/cudamat/cudamat/learn.cu b/ot/gpu/cudamat/cudamat/learn.cu
new file mode 100644
index 0000000..3d9260c
--- /dev/null
+++ b/ot/gpu/cudamat/cudamat/learn.cu
@@ -0,0 +1,34 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <cublas.h>
+#include "learn_kernels.cuh"
+#include "cudamat.cuh"
+
+extern "C" {
+
+inline bool checkCUDAError() {
+ cudaError_t err = cudaGetLastError();
+
+ if (cudaSuccess != err)
+ printf("%s\n", cudaGetErrorString( err));
+ return cudaSuccess != err;
+}
+
+EXPORT int mult_by_sigmoid_deriv(cudamat* target, cudamat* acts) {
+ int len = acts->size[0]*acts->size[1];
+
+ if (acts->is_trans != target->is_trans)
+ return ERROR_TRANSPOSED;
+
+ if (acts->size[0] != target->size[0] || acts->size[1] != target->size[1])
+ return ERROR_INCOMPATIBLE_DIMENSIONS;
+
+ kMultiplyBySigmoidGrad<<<NUM_VECTOR_OP_BLOCKS,NUM_VECTOR_OP_THREADS_PER_BLOCK>>>(acts->data_device, target->data_device, len);
+
+ if (checkCUDAError())
+ return CUDA_ERROR;
+
+ return 0;
+}
+
+}