more const in LRN layer CUDA kernels

author Jonathan L Long <jonlong@cs.berkeley.edu>

Wed, 20 May 2015 05:59:27 +0000 (22:59 -0700)

committer Jonathan L Long <jonlong@cs.berkeley.edu>

Wed, 20 May 2015 06:41:06 +0000 (23:41 -0700)
author Jonathan L Long <jonlong@cs.berkeley.edu>
Wed, 20 May 2015 05:59:27 +0000 (22:59 -0700)
committer Jonathan L Long <jonlong@cs.berkeley.edu>
Wed, 20 May 2015 06:41:06 +0000 (23:41 -0700)
diff --git a/src/caffe/layers/lrn_layer.cu b/src/caffe/layers/lrn_layer.cu

index e50ae8d..001b3c3 100644 (file)
--- a/src/caffe/layers/lrn_layer.cu
+++ b/src/caffe/layers/lrn_layer.cu
@@ -7,22 +7,22 @@
  namespace caffe {
  
  template <typename Dtype>
-__global__ void LRNFillScale(const int nthreads, const Dtype* in,
+__global__ void LRNFillScale(const int nthreads, const Dtype* const in,
      const int num, const int channels, const int height,
      const int width, const int size, const Dtype alpha_over_size,
-    const Dtype k, Dtype* scale) {
+    const Dtype k, Dtype* const scale) {
    CUDA_KERNEL_LOOP(index, nthreads) {
      // find out the local offset
-    int w = index % width;
-    int h = (index / width) % height;
-    int n = index / width / height;
-    int offset = (n * channels * height + h) * width + w;
-    int step = height * width;
+    const int w = index % width;
+    const int h = (index / width) % height;
+    const int n = index / width / height;
+    const int offset = (n * channels * height + h) * width + w;
+    const int step = height * width;
      const Dtype* const in_off = in + offset;
      Dtype* const scale_off = scale + offset;
      int head = 0;
-    int pre_pad = (size - 1) / 2;
-    int post_pad = size - pre_pad - 1;
+    const int pre_pad = (size - 1) / 2;
+    const int post_pad = size - pre_pad - 1;
      Dtype accum_scale = 0;
      // fill the scale at [n, :, h, w]
      // accumulate values
@@ -70,8 +70,8 @@ void LRNLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
  
  // TODO: check if it would be faster to just put it into the previous kernel.
  template <typename Dtype>
-__global__ void LRNComputeOutput(const int nthreads, const Dtype* in,
-    const Dtype* scale, const Dtype negative_beta, Dtype* out) {
+__global__ void LRNComputeOutput(const int nthreads, const Dtype* const in,
+    const Dtype* const scale, const Dtype negative_beta, Dtype* const out) {
    CUDA_KERNEL_LOOP(index, nthreads) {
      out[index] = in[index] * pow(scale[index], negative_beta);
    }
@@ -120,27 +120,27 @@ void LRNLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
  }
  
  template <typename Dtype>
-__global__ void LRNComputeDiff(const int nthreads, const Dtype* bottom_data,
-    const Dtype* top_data, const Dtype* scale, const Dtype* top_diff,
+__global__ void LRNComputeDiff(const int nthreads,
+    const Dtype* const bottom_data, const Dtype* const top_data,
+    const Dtype* const scale, const Dtype* const top_diff,
      const int num, const int channels, const int height,
      const int width, const int size, const Dtype negative_beta,
-    const Dtype cache_ratio,
-    Dtype* bottom_diff) {
+    const Dtype cache_ratio, Dtype* const bottom_diff) {
    CUDA_KERNEL_LOOP(index, nthreads) {
      // find out the local offset
-    int w = index % width;
-    int h = (index / width) % height;
-    int n = index / width / height;
-    int offset = (n * channels * height + h) * width + w;
-    int step = height * width;
+    const int w = index % width;
+    const int h = (index / width) % height;
+    const int n = index / width / height;
+    const int offset = (n * channels * height + h) * width + w;
+    const int step = height * width;
      const Dtype* const bottom_off = bottom_data + offset;
      const Dtype* const top_off = top_data + offset;
      const Dtype* const scale_off = scale + offset;
      const Dtype* const top_diff_off = top_diff + offset;
      Dtype* const bottom_diff_off = bottom_diff + offset;
      int head = 0;
-    int pre_pad = size - (size + 1) / 2;
-    int post_pad = size - pre_pad - 1;
+    const int pre_pad = size - (size + 1) / 2;
+    const int post_pad = size - pre_pad - 1;
      Dtype accum_ratio = 0;
      // accumulate values
      while (head < post_pad && head < channels) {
author	Jonathan L Long <jonlong@cs.berkeley.edu>
	Wed, 20 May 2015 05:59:27 +0000 (22:59 -0700)
committer	Jonathan L Long <jonlong@cs.berkeley.edu>
	Wed, 20 May 2015 06:41:06 +0000 (23:41 -0700)