lrn backward

author Yangqing Jia <jiayq84@gmail.com>

Sun, 22 Sep 2013 01:49:39 +0000 (18:49 -0700)

committer Yangqing Jia <jiayq84@gmail.com>

Sun, 22 Sep 2013 01:49:39 +0000 (18:49 -0700)
author Yangqing Jia <jiayq84@gmail.com>
Sun, 22 Sep 2013 01:49:39 +0000 (18:49 -0700)
committer Yangqing Jia <jiayq84@gmail.com>
Sun, 22 Sep 2013 01:49:39 +0000 (18:49 -0700)
diff --git a/src/caffeine/layers/lrn_layer.cu b/src/caffeine/layers/lrn_layer.cu

index 5eb7efa..2dc5143 100644 (file)
--- a/src/caffeine/layers/lrn_layer.cu
+++ b/src/caffeine/layers/lrn_layer.cu
@@ -43,7 +43,7 @@ __global__ void LRNFillScale(const int nthreads, const Dtype* in,
        ++head;
      }
      // subtract only
-    while (head < size + post_pad) {
+    while (head < channels + post_pad) {
        accum_scale -= in[(head - size) * step] * in[(head - size) * step];
        scale[(head - post_pad) * step] = 1. + accum_scale * alpha_over_size;
        ++head;
@@ -51,6 +51,8 @@ __global__ void LRNFillScale(const int nthreads, const Dtype* in,
    }
  }
  
+
+// TODO: check if it would be faster to just put it into the previous kernel.
  template <typename Dtype>
  __global__ void LRNComputeOutput(const int nthreads, const Dtype* in,
      const Dtype* scale, const Dtype negative_beta, Dtype* out) {
@@ -80,13 +82,82 @@ void LRNLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
    CUDA_POST_KERNEL_CHECK;
  }
  
+
+template <typename Dtype>
+__global__ void LRNComputeDiff(const int nthreads, const Dtype* bottom_data,
+    const Dtype* top_data, const Dtype* scale, const Dtype* top_diff,
+    const int num, const int channels, const int height,
+    const int width, const int size, const Dtype negative_beta,
+    const Dtype cache_ratio,
+    Dtype* bottom_diff) {
+  int index = threadIdx.x + blockIdx.x * blockDim.x;
+  if (index < nthreads) {
+    // find out the local offset
+    int w = index % width;
+    int h = (index / width) % height;
+    int n = index / width / height;
+    int offset = (n * channels * height + h) * width + w;
+    int step = height * width;
+    bottom_data += offset;
+    top_data += offset;
+    scale += offset;
+    top_diff += offset;
+    bottom_diff += offset;
+    int head = 0;
+    int pre_pad = size - (size + 1) / 2;
+    int post_pad = size - pre_pad - 1;
+    Dtype accum_ratio = 0;
+    // accumulate values 
+    while (head < post_pad) {
+      accum_ratio += top_diff[head * step] * top_data[head * step] /
+          scale[head * step];
+      ++head;
+    }
+    // until we reach size, nothing needs to be subtracted
+    while (head < size) {
+      accum_ratio += top_diff[head * step] * top_data[head * step] /
+          scale[head * step];
+      bottom_diff[(head - post_pad) * step] = top_diff[(head - post_pad) * step]
+          * pow(scale[(head - post_pad) * step], negative_beta) - cache_ratio *
+          bottom_data[(head - post_pad) * step] * accum_ratio;
+      ++head;
+    }
+    // both add and subtract
+    while (head < channels) {
+      accum_ratio += top_diff[head * step] * top_data[head * step] /
+          scale[head * step];
+      accum_ratio -= top_diff[(head - size) * step] *
+          top_data[(head - size) * step] / scale[(head - size) * step];
+      bottom_diff[(head - post_pad) * step] = top_diff[(head - post_pad) * step]
+          * pow(scale[(head - post_pad) * step], negative_beta) - cache_ratio *
+          bottom_data[(head - post_pad) * step] * accum_ratio;
+      ++head;
+    }
+    // subtract only
+    while (head < channels + post_pad) {
+      accum_ratio -= top_diff[(head - size) * step] *
+          top_data[(head - size) * step] / scale[(head - size) * step];
+      bottom_diff[(head - post_pad) * step] = top_diff[(head - post_pad) * step]
+          * pow(scale[(head - post_pad) * step], negative_beta) - cache_ratio *
+          bottom_data[(head - post_pad) * step] * accum_ratio;
+      ++head;
+    }
+  }
+}
+
  template <typename Dtype>
  Dtype LRNLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
      const bool propagate_down, vector<Blob<Dtype>*>* bottom) {
-  NOT_IMPLEMENTED;
+  int n_threads = num_ * height_ * width_;
+  LRNComputeDiff<<<CAFFEINE_GET_BLOCKS(n_threads), CAFFEINE_CUDA_NUM_THREADS>>>(
+      n_threads, (*bottom)[0]->gpu_data(), top[0]->gpu_data(),
+      scale_.gpu_data(), top[0]->gpu_diff(), num_, channels_, height_, width_,
+      size_, -beta_, Dtype(2. * alpha_ * beta_ / size_),
+      (*bottom)[0]->mutable_gpu_diff());
    return Dtype(0.);
  }
  
+
  INSTANTIATE_CLASS(LRNLayer);
  
  }  // namespace caffeine
diff --git a/src/caffeine/test/test_lrn_layer.cpp b/src/caffeine/test/test_lrn_layer.cpp

index 54daecb..f8cfb21 100644 (file)
--- a/src/caffeine/test/test_lrn_layer.cpp
+++ b/src/caffeine/test/test_lrn_layer.cpp
@@ -26,7 +26,8 @@ class LRNLayerTest : public ::testing::Test {
        : blob_bottom_(new Blob<Dtype>()),
          blob_top_(new Blob<Dtype>()) {};
    virtual void SetUp() {
-    blob_bottom_->Reshape(2,7,3,3);
+    Caffeine::set_random_seed(1701);
+    blob_bottom_->Reshape(2, 7, 3, 3);
      // fill the values
      FillerParameter filler_param;
      GaussianFiller<Dtype> filler(filler_param);
@@ -87,7 +88,7 @@ TYPED_TEST(LRNLayerTest, TestSetup) {
    EXPECT_EQ(this->blob_top_->width(), 3);
  }
  
-TYPED_TEST(LRNLayerTest, TestCPU) {
+TYPED_TEST(LRNLayerTest, TestCPUForward) {
    LayerParameter layer_param;
    LRNLayer<TypeParam> layer(layer_param);
    Caffeine::set_mode(Caffeine::CPU);
@@ -102,9 +103,17 @@ TYPED_TEST(LRNLayerTest, TestCPU) {
      EXPECT_LE(this->blob_top_->cpu_data()[i],
          top_reference.cpu_data()[i] + 1e-5);
    }
+}
  
+TYPED_TEST(LRNLayerTest, TestGPUForward) {
+  LayerParameter layer_param;
+  LRNLayer<TypeParam> layer(layer_param);
    Caffeine::set_mode(Caffeine::GPU);
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
    layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  Blob<TypeParam> top_reference;
+  this->ReferenceLRNForward(*(this->blob_bottom_), layer_param,
+      &top_reference);
    for (int i = 0; i < this->blob_bottom_->count(); ++i) {
      EXPECT_GE(this->blob_top_->cpu_data()[i],
          top_reference.cpu_data()[i] - 1e-5);
@@ -116,13 +125,34 @@ TYPED_TEST(LRNLayerTest, TestCPU) {
  TYPED_TEST(LRNLayerTest, TestCPUGradient) {
    LayerParameter layer_param;
    LRNLayer<TypeParam> layer(layer_param);
+  GradientChecker<TypeParam> checker(1e-2, 1e-2);
    Caffeine::set_mode(Caffeine::CPU);
-  // when testing the GPU gradient, let's do a small shape.
-  this->blob_bottom_->Reshape(2, 7, 3, 3);
-  FillerParameter filler_param;
-  GaussianFiller<TypeParam> filler(filler_param);
-  filler.Fill(this->blob_bottom_);
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  for (int i = 0; i < this->blob_top_->count(); ++i) {
+    this->blob_top_->mutable_cpu_diff()[i] = 1.;
+  }
+  layer.Backward(this->blob_top_vec_, true, &(this->blob_bottom_vec_));
+  //for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+  //  std::cout << "CPU diff " << this->blob_bottom_->cpu_diff()[i] << std::endl;
+  //}
+  checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
+} 
+
+TYPED_TEST(LRNLayerTest, TestGPUGradient) {
+  LayerParameter layer_param;
+  LRNLayer<TypeParam> layer(layer_param);
    GradientChecker<TypeParam> checker(1e-2, 1e-2);
+  Caffeine::set_mode(Caffeine::GPU);
+  layer.SetUp(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  layer.Forward(this->blob_bottom_vec_, &(this->blob_top_vec_));
+  for (int i = 0; i < this->blob_top_->count(); ++i) {
+    this->blob_top_->mutable_cpu_diff()[i] = 1.;
+  }
+  layer.Backward(this->blob_top_vec_, true, &(this->blob_bottom_vec_));
+  //for (int i = 0; i < this->blob_bottom_->count(); ++i) {
+  //  std::cout << "GPU diff " << this->blob_bottom_->cpu_diff()[i] << std::endl;
+  //}
    checker.CheckGradientExhaustive(layer, this->blob_bottom_vec_, this->blob_top_vec_);
  }
author	Yangqing Jia <jiayq84@gmail.com>
	Sun, 22 Sep 2013 01:49:39 +0000 (18:49 -0700)
committer	Yangqing Jia <jiayq84@gmail.com>
	Sun, 22 Sep 2013 01:49:39 +0000 (18:49 -0700)
src/caffeine/layers/lrn_layer.cu		patch \| blob \| history
src/caffeine/test/test_lrn_layer.cpp		patch \| blob \| history