summaryrefslogtreecommitdiff
path: root/ot/gpu/cudamat/cudamat/learn.cu
blob: 3d9260cac56bbfe610c2907b2d4c03b6b1e62151 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
#include <stdio.h>
#include <stdlib.h>
#include <cublas.h>
#include "learn_kernels.cuh"
#include "cudamat.cuh"

extern "C" {

inline bool checkCUDAError() {
    cudaError_t err = cudaGetLastError();

    if (cudaSuccess != err)
        printf("%s\n", cudaGetErrorString( err));
    return cudaSuccess != err;
}

EXPORT int mult_by_sigmoid_deriv(cudamat* target, cudamat* acts) {
    int len = acts->size[0]*acts->size[1];

    if (acts->is_trans != target->is_trans)
        return ERROR_TRANSPOSED;

    if (acts->size[0] != target->size[0] || acts->size[1] != target->size[1])
        return ERROR_INCOMPATIBLE_DIMENSIONS;

    kMultiplyBySigmoidGrad<<<NUM_VECTOR_OP_BLOCKS,NUM_VECTOR_OP_THREADS_PER_BLOCK>>>(acts->data_device, target->data_device, len);

    if (checkCUDAError())
        return CUDA_ERROR;

    return 0;
}

}