Make ConvolutionLayer and InnerProductLayer abide by param_propagate_down_

author Jeff Donahue <jeff.donahue@gmail.com>

Wed, 9 Jul 2014 22:05:57 +0000 (15:05 -0700)

committer Jeff Donahue <jeff.donahue@gmail.com>

Tue, 15 Jul 2014 20:48:09 +0000 (13:48 -0700)
author Jeff Donahue <jeff.donahue@gmail.com>
Wed, 9 Jul 2014 22:05:57 +0000 (15:05 -0700)
committer Jeff Donahue <jeff.donahue@gmail.com>
Tue, 15 Jul 2014 20:48:09 +0000 (13:48 -0700)
diff --git a/src/caffe/layers/conv_layer.cpp b/src/caffe/layers/conv_layer.cpp

index 963dc68..0d6871d 100644 (file)
--- a/src/caffe/layers/conv_layer.cpp
+++ b/src/caffe/layers/conv_layer.cpp
@@ -85,6 +85,7 @@ void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
          bias_multiplier_data[i] = 1.;
      }
    }
+  this->param_propagate_down_.resize(this->blobs_.size(), true);
  }
  
  
@@ -124,11 +125,15 @@ Dtype ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
  template <typename Dtype>
  void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
        const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
-  const Dtype* weight = this->blobs_[0]->cpu_data();
-  Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
-  caffe_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
+  const Dtype* weight = NULL;
+  Dtype* weight_diff = NULL;
+  if (this->param_propagate_down_[0]) {
+    weight = this->blobs_[0]->cpu_data();
+    weight_diff = this->blobs_[0]->mutable_cpu_diff();
+    caffe_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
+  }
    Dtype* bias_diff = NULL;
-  if (bias_term_) {
+  if (bias_term_ && this->param_propagate_down_[1]) {
      bias_diff = this->blobs_[1]->mutable_cpu_diff();
      caffe_set(this->blobs_[1]->count(), Dtype(0), bias_diff);
    }
@@ -136,14 +141,10 @@ void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    const int col_offset = K_ * N_;
    const int top_offset = M_ * N_;
    for (int i = 0; i < top.size(); ++i) {
-    const Dtype* top_diff = top[i]->cpu_diff();
-    const Dtype* bottom_data = (*bottom)[i]->cpu_data();
-    Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff();
-    Dtype* col_data = col_buffer_.mutable_cpu_data();
-    Dtype* col_diff = col_buffer_.mutable_cpu_diff();
-
+    const Dtype* top_diff = NULL;
      // Bias gradient, if necessary.
-    if (bias_term_) {
+    if (bias_term_ && this->param_propagate_down_[1]) {
+      top_diff = top[i]->cpu_diff();
        for (int n = 0; n < num_; ++n) {
          caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
              1., top_diff + top[0]->offset(n),
@@ -151,29 +152,40 @@ void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
              bias_diff);
        }
      }
-    for (int n = 0; n < num_; ++n) {
-      // Since we saved memory in the forward pass by not storing all col data,
-      // we will need to recompute them.
-      im2col_cpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
-                 width_, kernel_size_, pad_, stride_, col_data);
-      // gradient w.r.t. weight. Note that we will accumulate diffs.
-      for (int g = 0; g < group_; ++g) {
-        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
-          (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
-          col_data + col_offset * g, (Dtype)1.,
-          weight_diff + weight_offset * g);
+    if (this->param_propagate_down_[0] || propagate_down[i]) {
+      if (!top_diff) {
+        top_diff = top[i]->cpu_diff();
        }
-      // gradient w.r.t. bottom data, if necessary
-      if (propagate_down[i]) {
-        for (int g = 0; g < group_; ++g) {
-          caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
-            (Dtype)1., weight + weight_offset * g,
-            top_diff + top[i]->offset(n) + top_offset * g,
-            (Dtype)0., col_diff + col_offset * g);
+      Dtype* col_data = col_buffer_.mutable_cpu_data();
+      Dtype* col_diff = col_buffer_.mutable_cpu_diff();
+      const Dtype* bottom_data = (*bottom)[i]->cpu_data();
+      Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff();
+      for (int n = 0; n < num_; ++n) {
+        // Since we saved memory in the forward pass by not storing all col
+        // data, we will need to recompute them.
+        im2col_cpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
+                   width_, kernel_size_, pad_, stride_, col_data);
+        // gradient w.r.t. weight. Note that we will accumulate diffs.
+        if (this->param_propagate_down_[0]) {
+          for (int g = 0; g < group_; ++g) {
+            caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+                (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
+                col_data + col_offset * g, (Dtype)1.,
+                weight_diff + weight_offset * g);
+          }
+        }
+        // gradient w.r.t. bottom data, if necessary
+        if (propagate_down[i]) {
+          for (int g = 0; g < group_; ++g) {
+            caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+                (Dtype)1., weight + weight_offset * g,
+                top_diff + top[i]->offset(n) + top_offset * g,
+                (Dtype)0., col_diff + col_offset * g);
+          }
+          // col2im back to the data
+          col2im_cpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
+              stride_, bottom_diff + (*bottom)[i]->offset(n));
          }
-        // col2im back to the data
-        col2im_cpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
-            stride_, bottom_diff + (*bottom)[i]->offset(n));
        }
      }
    }
diff --git a/src/caffe/layers/conv_layer.cu b/src/caffe/layers/conv_layer.cu

index 59ec58d..7406788 100644 (file)
--- a/src/caffe/layers/conv_layer.cu
+++ b/src/caffe/layers/conv_layer.cu
@@ -46,13 +46,15 @@ Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
  template <typename Dtype>
  void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
        const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
-  const Dtype* weight = this->blobs_[0]->gpu_data();
-  Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
-  caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
-  Dtype* col_data = col_buffer_.mutable_gpu_data();
-  Dtype* col_diff = col_buffer_.mutable_gpu_diff();
+  const Dtype* weight = NULL;
+  Dtype* weight_diff = NULL;
+  if (this->param_propagate_down_[0]) {
+    weight = this->blobs_[0]->gpu_data();
+    weight_diff = this->blobs_[0]->mutable_gpu_diff();
+    caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
+  }
    Dtype* bias_diff = NULL;
-  if (bias_term_) {
+  if (bias_term_ && this->param_propagate_down_[1]) {
      bias_diff = this->blobs_[1]->mutable_gpu_diff();
      caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff);
    }
@@ -60,41 +62,51 @@ void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
    const int col_offset = K_ * N_;
    const int top_offset = M_ * N_;
    for (int i = 0; i < top.size(); ++i) {
-    const Dtype* top_diff = top[i]->gpu_diff();
-    const Dtype* bottom_data = (*bottom)[i]->gpu_data();
-    Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff();
+    const Dtype* top_diff = NULL;
      // Bias gradient, if necessary.
-    if (bias_term_) {
+    if (bias_term_ && this->param_propagate_down_[1]) {
+      top_diff = top[i]->gpu_diff();
        for (int n = 0; n < num_; ++n) {
          caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
              1., top_diff + top[0]->offset(n),
-            static_cast<const Dtype*>(bias_multiplier_->gpu_data()),
-            1., bias_diff);
+            static_cast<const Dtype*>(bias_multiplier_->gpu_data()), 1.,
+            bias_diff);
        }
      }
-    for (int n = 0; n < num_; ++n) {
-      // since we saved memory in the forward pass by not storing all col data,
-      // we will need to recompute them.
-      im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
-                        width_, kernel_size_, pad_, stride_, col_data);
-      // gradient w.r.t. weight. Note that we will accumulate diffs.
-      for (int g = 0; g < group_; ++g) {
-        caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
-          (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
-          col_data + col_offset * g, (Dtype)1.,
-          weight_diff + weight_offset * g);
+    if (this->param_propagate_down_[0] || propagate_down[i]) {
+      if (!top_diff) {
+        top_diff = top[i]->gpu_diff();
        }
-      // gradient w.r.t. bottom data, if necessary
-      if (propagate_down[i]) {
-        for (int g = 0; g < group_; ++g) {
-          caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
-            (Dtype)1., weight + weight_offset * g,
-            top_diff + top[i]->offset(n) + top_offset * g,
-            (Dtype)0., col_diff + col_offset * g);
+      Dtype* col_data = col_buffer_.mutable_gpu_data();
+      Dtype* col_diff = col_buffer_.mutable_gpu_diff();
+      const Dtype* bottom_data = (*bottom)[i]->gpu_data();
+      Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff();
+      for (int n = 0; n < num_; ++n) {
+        // Since we saved memory in the forward pass by not storing all col
+        // data, we will need to recompute them.
+        im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
+                   width_, kernel_size_, pad_, stride_, col_data);
+        // gradient w.r.t. weight. Note that we will accumulate diffs.
+        if (this->param_propagate_down_[0]) {
+          for (int g = 0; g < group_; ++g) {
+            caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+                (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
+                col_data + col_offset * g, (Dtype)1.,
+                weight_diff + weight_offset * g);
+          }
+        }
+        // gradient w.r.t. bottom data, if necessary
+        if (propagate_down[i]) {
+          for (int g = 0; g < group_; ++g) {
+            caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+                (Dtype)1., weight + weight_offset * g,
+                top_diff + top[i]->offset(n) + top_offset * g,
+                (Dtype)0., col_diff + col_offset * g);
+          }
+          // col2im back to the data
+          col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
+              stride_, bottom_diff + (*bottom)[i]->offset(n));
          }
-        // col2im back to the data
-        col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
-            stride_, bottom_diff + (*bottom)[i]->offset(n));
        }
      }
    }
diff --git a/src/caffe/layers/inner_product_layer.cpp b/src/caffe/layers/inner_product_layer.cpp

index ddf55e4..29df6bf 100644 (file)
--- a/src/caffe/layers/inner_product_layer.cpp
+++ b/src/caffe/layers/inner_product_layer.cpp
@@ -54,6 +54,7 @@ void InnerProductLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
          bias_multiplier_data[i] = 1.;
      }
    }
+  this->param_propagate_down_.resize(this->blobs_.size(), true);
  }
  
  template <typename Dtype>
@@ -76,18 +77,22 @@ template <typename Dtype>
  void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down,
      vector<Blob<Dtype>*>* bottom) {
-  const Dtype* top_diff = top[0]->cpu_diff();
-  const Dtype* bottom_data = (*bottom)[0]->cpu_data();
-  // Gradient with respect to weight
-  caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
-      top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_cpu_diff());
-  if (bias_term_) {
+  if (this->param_propagate_down_[0]) {
+    const Dtype* top_diff = top[0]->cpu_diff();
+    const Dtype* bottom_data = (*bottom)[0]->cpu_data();
+    // Gradient with respect to weight
+    caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
+        top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_cpu_diff());
+  }
+  if (bias_term_ && this->param_propagate_down_[1]) {
+    const Dtype* top_diff = top[0]->cpu_diff();
      // Gradient with respect to bias
      caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
          reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), (Dtype)0.,
          this->blobs_[1]->mutable_cpu_diff());
    }
    if (propagate_down[0]) {
+    const Dtype* top_diff = top[0]->cpu_diff();
      // Gradient with respect to bottom data
      caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1.,
          top_diff, this->blobs_[0]->cpu_data(), (Dtype)0.,
diff --git a/src/caffe/layers/inner_product_layer.cu b/src/caffe/layers/inner_product_layer.cu

index 5b95a57..f3e4946 100644 (file)
--- a/src/caffe/layers/inner_product_layer.cu
+++ b/src/caffe/layers/inner_product_layer.cu
@@ -33,18 +33,22 @@ template <typename Dtype>
  void InnerProductLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
      const vector<bool>& propagate_down,
      vector<Blob<Dtype>*>* bottom) {
-  const Dtype* top_diff = top[0]->gpu_diff();
-  const Dtype* bottom_data = (*bottom)[0]->gpu_data();
-  // Gradient with respect to weight
-  caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
-      top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_gpu_diff());
-  if (bias_term_) {
+  if (this->param_propagate_down_[0]) {
+    const Dtype* top_diff = top[0]->gpu_diff();
+    const Dtype* bottom_data = (*bottom)[0]->gpu_data();
+    // Gradient with respect to weight
+    caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
+        top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_gpu_diff());
+  }
+  if (bias_term_ && this->param_propagate_down_[1]) {
+    const Dtype* top_diff = top[0]->gpu_diff();
      // Gradient with respect to bias
      caffe_gpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
-        reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
-        (Dtype)0., this->blobs_[1]->mutable_gpu_diff());
+        reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()), (Dtype)0.,
+        this->blobs_[1]->mutable_gpu_diff());
    }
    if (propagate_down[0]) {
+    const Dtype* top_diff = top[0]->gpu_diff();
      // Gradient with respect to bottom data
      caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1.,
          top_diff, this->blobs_[0]->gpu_data(), (Dtype)0.,
author	Jeff Donahue <jeff.donahue@gmail.com>
	Wed, 9 Jul 2014 22:05:57 +0000 (15:05 -0700)
committer	Jeff Donahue <jeff.donahue@gmail.com>
	Tue, 15 Jul 2014 20:48:09 +0000 (13:48 -0700)
src/caffe/layers/conv_layer.cpp		patch \| blob \| history
src/caffe/layers/conv_layer.cu		patch \| blob \| history
src/caffe/layers/inner_product_layer.cpp		patch \| blob \| history
src/caffe/layers/inner_product_layer.cu		patch \| blob \| history