void caffe_gpu_abs(const int n, const Dtype* a, Dtype* y);
template <typename Dtype>
+void caffe_gpu_exp(const int n, const Dtype* a, Dtype* y);
+
+template <typename Dtype>
void caffe_gpu_powx(const int n, const Dtype* a, const Dtype b, Dtype* y);
// caffe_gpu_rng_uniform with two arguments generates integers in the range
template <typename Dtype>
+__global__ void exp_kernel(const int n, const Dtype* a, Dtype* y) {
+ CUDA_KERNEL_LOOP(index, n) {
+ y[index] = exp(a[index]);
+ }
+}
+
+template <>
+void caffe_gpu_exp<float>(const int N, const float* a, float* y) {
+ // NOLINT_NEXT_LINE(whitespace/operators)
+ exp_kernel<float><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+ N, a, y);
+}
+
+template <>
+void caffe_gpu_exp<double>(const int N, const double* a, double* y) {
+ // NOLINT_NEXT_LINE(whitespace/operators)
+ exp_kernel<double><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+ N, a, y);
+}
+
+template <typename Dtype>
__global__ void powx_kernel(const int n, const Dtype* a,
const Dtype alpha, Dtype* y) {
CUDA_KERNEL_LOOP(index, n) {