add caffe_gpu_add() and caffe_gpu_sub()

author Evan Shelhamer <shelhamer@imaginarynumber.net>

Thu, 22 May 2014 01:57:33 +0000 (18:57 -0700)

committer Evan Shelhamer <shelhamer@imaginarynumber.net>

Thu, 22 May 2014 02:48:29 +0000 (19:48 -0700)
author Evan Shelhamer <shelhamer@imaginarynumber.net>
Thu, 22 May 2014 01:57:33 +0000 (18:57 -0700)
committer Evan Shelhamer <shelhamer@imaginarynumber.net>
Thu, 22 May 2014 02:48:29 +0000 (19:48 -0700)
diff --git a/include/caffe/util/math_functions.hpp b/include/caffe/util/math_functions.hpp

index d9c7835..25ed470 100644 (file)
--- a/include/caffe/util/math_functions.hpp
+++ b/include/caffe/util/math_functions.hpp
@@ -87,9 +87,15 @@ template <typename Dtype>
  void caffe_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
  
  template <typename Dtype>
+void caffe_gpu_add(const int N, const Dtype* a, const Dtype* b, Dtype* y);
+
+template <typename Dtype>
  void caffe_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y);
  
  template <typename Dtype>
+void caffe_gpu_sub(const int N, const Dtype* a, const Dtype* b, Dtype* y);
+
+template <typename Dtype>
  void caffe_mul(const int N, const Dtype* a, const Dtype* b, Dtype* y);
  
  template <typename Dtype>
diff --git a/src/caffe/util/math_functions.cu b/src/caffe/util/math_functions.cu

index 184613c..63c8fac 100644 (file)
--- a/src/caffe/util/math_functions.cu
+++ b/src/caffe/util/math_functions.cu
@@ -64,6 +64,54 @@ void caffe_gpu_add_scalar(const int N, const double alpha, double* Y) {
  }
  
  template <typename Dtype>
+__global__ void add_kernel(const int n, const Dtype* a,
+    const Dtype* b, Dtype* y) {
+  CUDA_KERNEL_LOOP(index, n) {
+    y[index] = a[index] + b[index];
+  }
+}
+
+template <>
+void caffe_gpu_add<float>(const int N, const float* a, const float* b,
+    float* y) {
+  // NOLINT_NEXT_LINE(whitespace/operators)
+  add_kernel<float><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+      N, a, b, y);
+}
+
+template <>
+void caffe_gpu_add<double>(const int N, const double* a, const double* b,
+    double* y) {
+  // NOLINT_NEXT_LINE(whitespace/operators)
+  add_kernel<double><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+      N, a, b, y);
+}
+
+template <typename Dtype>
+__global__ void sub_kernel(const int n, const Dtype* a,
+    const Dtype* b, Dtype* y) {
+  CUDA_KERNEL_LOOP(index, n) {
+    y[index] = a[index] - b[index];
+  }
+}
+
+template <>
+void caffe_gpu_sub<float>(const int N, const float* a, const float* b,
+    float* y) {
+  // NOLINT_NEXT_LINE(whitespace/operators)
+  sub_kernel<float><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+      N, a, b, y);
+}
+
+template <>
+void caffe_gpu_sub<double>(const int N, const double* a, const double* b,
+    double* y) {
+  // NOLINT_NEXT_LINE(whitespace/operators)
+  sub_kernel<double><<<CAFFE_GET_BLOCKS(N), CAFFE_CUDA_NUM_THREADS>>>(
+      N, a, b, y);
+}
+
+template <typename Dtype>
  __global__ void mul_kernel(const int n, const Dtype* a,
      const Dtype* b, Dtype* y) {
    CUDA_KERNEL_LOOP(index, n) {
author	Evan Shelhamer <shelhamer@imaginarynumber.net>
	Thu, 22 May 2014 01:57:33 +0000 (18:57 -0700)
committer	Evan Shelhamer <shelhamer@imaginarynumber.net>
	Thu, 22 May 2014 02:48:29 +0000 (19:48 -0700)
include/caffe/util/math_functions.hpp		patch \| blob \| history
src/caffe/util/math_functions.cu		patch \| blob \| history