Make ConvolutionLayer and InnerProductLayer abide by param_propagate_down_
authorJeff Donahue <jeff.donahue@gmail.com>
Wed, 9 Jul 2014 22:05:57 +0000 (15:05 -0700)
committerJeff Donahue <jeff.donahue@gmail.com>
Tue, 15 Jul 2014 20:48:09 +0000 (13:48 -0700)
src/caffe/layers/conv_layer.cpp
src/caffe/layers/conv_layer.cu
src/caffe/layers/inner_product_layer.cpp
src/caffe/layers/inner_product_layer.cu

index 963dc68..0d6871d 100644 (file)
@@ -85,6 +85,7 @@ void ConvolutionLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
         bias_multiplier_data[i] = 1.;
     }
   }
+  this->param_propagate_down_.resize(this->blobs_.size(), true);
 }
 
 
@@ -124,11 +125,15 @@ Dtype ConvolutionLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
-  const Dtype* weight = this->blobs_[0]->cpu_data();
-  Dtype* weight_diff = this->blobs_[0]->mutable_cpu_diff();
-  caffe_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
+  const Dtype* weight = NULL;
+  Dtype* weight_diff = NULL;
+  if (this->param_propagate_down_[0]) {
+    weight = this->blobs_[0]->cpu_data();
+    weight_diff = this->blobs_[0]->mutable_cpu_diff();
+    caffe_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
+  }
   Dtype* bias_diff = NULL;
-  if (bias_term_) {
+  if (bias_term_ && this->param_propagate_down_[1]) {
     bias_diff = this->blobs_[1]->mutable_cpu_diff();
     caffe_set(this->blobs_[1]->count(), Dtype(0), bias_diff);
   }
@@ -136,14 +141,10 @@ void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
   const int col_offset = K_ * N_;
   const int top_offset = M_ * N_;
   for (int i = 0; i < top.size(); ++i) {
-    const Dtype* top_diff = top[i]->cpu_diff();
-    const Dtype* bottom_data = (*bottom)[i]->cpu_data();
-    Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff();
-    Dtype* col_data = col_buffer_.mutable_cpu_data();
-    Dtype* col_diff = col_buffer_.mutable_cpu_diff();
-
+    const Dtype* top_diff = NULL;
     // Bias gradient, if necessary.
-    if (bias_term_) {
+    if (bias_term_ && this->param_propagate_down_[1]) {
+      top_diff = top[i]->cpu_diff();
       for (int n = 0; n < num_; ++n) {
         caffe_cpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
             1., top_diff + top[0]->offset(n),
@@ -151,29 +152,40 @@ void ConvolutionLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
             bias_diff);
       }
     }
-    for (int n = 0; n < num_; ++n) {
-      // Since we saved memory in the forward pass by not storing all col data,
-      // we will need to recompute them.
-      im2col_cpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
-                 width_, kernel_size_, pad_, stride_, col_data);
-      // gradient w.r.t. weight. Note that we will accumulate diffs.
-      for (int g = 0; g < group_; ++g) {
-        caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
-          (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
-          col_data + col_offset * g, (Dtype)1.,
-          weight_diff + weight_offset * g);
+    if (this->param_propagate_down_[0] || propagate_down[i]) {
+      if (!top_diff) {
+        top_diff = top[i]->cpu_diff();
       }
-      // gradient w.r.t. bottom data, if necessary
-      if (propagate_down[i]) {
-        for (int g = 0; g < group_; ++g) {
-          caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
-            (Dtype)1., weight + weight_offset * g,
-            top_diff + top[i]->offset(n) + top_offset * g,
-            (Dtype)0., col_diff + col_offset * g);
+      Dtype* col_data = col_buffer_.mutable_cpu_data();
+      Dtype* col_diff = col_buffer_.mutable_cpu_diff();
+      const Dtype* bottom_data = (*bottom)[i]->cpu_data();
+      Dtype* bottom_diff = (*bottom)[i]->mutable_cpu_diff();
+      for (int n = 0; n < num_; ++n) {
+        // Since we saved memory in the forward pass by not storing all col
+        // data, we will need to recompute them.
+        im2col_cpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
+                   width_, kernel_size_, pad_, stride_, col_data);
+        // gradient w.r.t. weight. Note that we will accumulate diffs.
+        if (this->param_propagate_down_[0]) {
+          for (int g = 0; g < group_; ++g) {
+            caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+                (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
+                col_data + col_offset * g, (Dtype)1.,
+                weight_diff + weight_offset * g);
+          }
+        }
+        // gradient w.r.t. bottom data, if necessary
+        if (propagate_down[i]) {
+          for (int g = 0; g < group_; ++g) {
+            caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+                (Dtype)1., weight + weight_offset * g,
+                top_diff + top[i]->offset(n) + top_offset * g,
+                (Dtype)0., col_diff + col_offset * g);
+          }
+          // col2im back to the data
+          col2im_cpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
+              stride_, bottom_diff + (*bottom)[i]->offset(n));
         }
-        // col2im back to the data
-        col2im_cpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
-            stride_, bottom_diff + (*bottom)[i]->offset(n));
       }
     }
   }
index 59ec58d..7406788 100644 (file)
@@ -46,13 +46,15 @@ Dtype ConvolutionLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
 template <typename Dtype>
 void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
       const vector<bool>& propagate_down, vector<Blob<Dtype>*>* bottom) {
-  const Dtype* weight = this->blobs_[0]->gpu_data();
-  Dtype* weight_diff = this->blobs_[0]->mutable_gpu_diff();
-  caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
-  Dtype* col_data = col_buffer_.mutable_gpu_data();
-  Dtype* col_diff = col_buffer_.mutable_gpu_diff();
+  const Dtype* weight = NULL;
+  Dtype* weight_diff = NULL;
+  if (this->param_propagate_down_[0]) {
+    weight = this->blobs_[0]->gpu_data();
+    weight_diff = this->blobs_[0]->mutable_gpu_diff();
+    caffe_gpu_set(this->blobs_[0]->count(), Dtype(0), weight_diff);
+  }
   Dtype* bias_diff = NULL;
-  if (bias_term_) {
+  if (bias_term_ && this->param_propagate_down_[1]) {
     bias_diff = this->blobs_[1]->mutable_gpu_diff();
     caffe_gpu_set(this->blobs_[1]->count(), Dtype(0), bias_diff);
   }
@@ -60,41 +62,51 @@ void ConvolutionLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
   const int col_offset = K_ * N_;
   const int top_offset = M_ * N_;
   for (int i = 0; i < top.size(); ++i) {
-    const Dtype* top_diff = top[i]->gpu_diff();
-    const Dtype* bottom_data = (*bottom)[i]->gpu_data();
-    Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff();
+    const Dtype* top_diff = NULL;
     // Bias gradient, if necessary.
-    if (bias_term_) {
+    if (bias_term_ && this->param_propagate_down_[1]) {
+      top_diff = top[i]->gpu_diff();
       for (int n = 0; n < num_; ++n) {
         caffe_gpu_gemv<Dtype>(CblasNoTrans, num_output_, N_,
             1., top_diff + top[0]->offset(n),
-            static_cast<const Dtype*>(bias_multiplier_->gpu_data()),
-            1., bias_diff);
+            static_cast<const Dtype*>(bias_multiplier_->gpu_data()), 1.,
+            bias_diff);
       }
     }
-    for (int n = 0; n < num_; ++n) {
-      // since we saved memory in the forward pass by not storing all col data,
-      // we will need to recompute them.
-      im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
-                        width_, kernel_size_, pad_, stride_, col_data);
-      // gradient w.r.t. weight. Note that we will accumulate diffs.
-      for (int g = 0; g < group_; ++g) {
-        caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
-          (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
-          col_data + col_offset * g, (Dtype)1.,
-          weight_diff + weight_offset * g);
+    if (this->param_propagate_down_[0] || propagate_down[i]) {
+      if (!top_diff) {
+        top_diff = top[i]->gpu_diff();
       }
-      // gradient w.r.t. bottom data, if necessary
-      if (propagate_down[i]) {
-        for (int g = 0; g < group_; ++g) {
-          caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
-            (Dtype)1., weight + weight_offset * g,
-            top_diff + top[i]->offset(n) + top_offset * g,
-            (Dtype)0., col_diff + col_offset * g);
+      Dtype* col_data = col_buffer_.mutable_gpu_data();
+      Dtype* col_diff = col_buffer_.mutable_gpu_diff();
+      const Dtype* bottom_data = (*bottom)[i]->gpu_data();
+      Dtype* bottom_diff = (*bottom)[i]->mutable_gpu_diff();
+      for (int n = 0; n < num_; ++n) {
+        // Since we saved memory in the forward pass by not storing all col
+        // data, we will need to recompute them.
+        im2col_gpu(bottom_data + (*bottom)[i]->offset(n), channels_, height_,
+                   width_, kernel_size_, pad_, stride_, col_data);
+        // gradient w.r.t. weight. Note that we will accumulate diffs.
+        if (this->param_propagate_down_[0]) {
+          for (int g = 0; g < group_; ++g) {
+            caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasTrans, M_, K_, N_,
+                (Dtype)1., top_diff + top[i]->offset(n) + top_offset * g,
+                col_data + col_offset * g, (Dtype)1.,
+                weight_diff + weight_offset * g);
+          }
+        }
+        // gradient w.r.t. bottom data, if necessary
+        if (propagate_down[i]) {
+          for (int g = 0; g < group_; ++g) {
+            caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, K_, N_, M_,
+                (Dtype)1., weight + weight_offset * g,
+                top_diff + top[i]->offset(n) + top_offset * g,
+                (Dtype)0., col_diff + col_offset * g);
+          }
+          // col2im back to the data
+          col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
+              stride_, bottom_diff + (*bottom)[i]->offset(n));
         }
-        // col2im back to the data
-        col2im_gpu(col_diff, channels_, height_, width_, kernel_size_, pad_,
-            stride_, bottom_diff + (*bottom)[i]->offset(n));
       }
     }
   }
index ddf55e4..29df6bf 100644 (file)
@@ -54,6 +54,7 @@ void InnerProductLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
         bias_multiplier_data[i] = 1.;
     }
   }
+  this->param_propagate_down_.resize(this->blobs_.size(), true);
 }
 
 template <typename Dtype>
@@ -76,18 +77,22 @@ template <typename Dtype>
 void InnerProductLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down,
     vector<Blob<Dtype>*>* bottom) {
-  const Dtype* top_diff = top[0]->cpu_diff();
-  const Dtype* bottom_data = (*bottom)[0]->cpu_data();
-  // Gradient with respect to weight
-  caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
-      top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_cpu_diff());
-  if (bias_term_) {
+  if (this->param_propagate_down_[0]) {
+    const Dtype* top_diff = top[0]->cpu_diff();
+    const Dtype* bottom_data = (*bottom)[0]->cpu_data();
+    // Gradient with respect to weight
+    caffe_cpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
+        top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_cpu_diff());
+  }
+  if (bias_term_ && this->param_propagate_down_[1]) {
+    const Dtype* top_diff = top[0]->cpu_diff();
     // Gradient with respect to bias
     caffe_cpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
         reinterpret_cast<const Dtype*>(bias_multiplier_->cpu_data()), (Dtype)0.,
         this->blobs_[1]->mutable_cpu_diff());
   }
   if (propagate_down[0]) {
+    const Dtype* top_diff = top[0]->cpu_diff();
     // Gradient with respect to bottom data
     caffe_cpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1.,
         top_diff, this->blobs_[0]->cpu_data(), (Dtype)0.,
index 5b95a57..f3e4946 100644 (file)
@@ -33,18 +33,22 @@ template <typename Dtype>
 void InnerProductLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
     const vector<bool>& propagate_down,
     vector<Blob<Dtype>*>* bottom) {
-  const Dtype* top_diff = top[0]->gpu_diff();
-  const Dtype* bottom_data = (*bottom)[0]->gpu_data();
-  // Gradient with respect to weight
-  caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
-      top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_gpu_diff());
-  if (bias_term_) {
+  if (this->param_propagate_down_[0]) {
+    const Dtype* top_diff = top[0]->gpu_diff();
+    const Dtype* bottom_data = (*bottom)[0]->gpu_data();
+    // Gradient with respect to weight
+    caffe_gpu_gemm<Dtype>(CblasTrans, CblasNoTrans, N_, K_, M_, (Dtype)1.,
+        top_diff, bottom_data, (Dtype)0., this->blobs_[0]->mutable_gpu_diff());
+  }
+  if (bias_term_ && this->param_propagate_down_[1]) {
+    const Dtype* top_diff = top[0]->gpu_diff();
     // Gradient with respect to bias
     caffe_gpu_gemv<Dtype>(CblasTrans, M_, N_, (Dtype)1., top_diff,
-        reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()),
-        (Dtype)0., this->blobs_[1]->mutable_gpu_diff());
+        reinterpret_cast<const Dtype*>(bias_multiplier_->gpu_data()), (Dtype)0.,
+        this->blobs_[1]->mutable_gpu_diff());
   }
   if (propagate_down[0]) {
+    const Dtype* top_diff = top[0]->gpu_diff();
     // Gradient with respect to bottom data
     caffe_gpu_gemm<Dtype>(CblasNoTrans, CblasNoTrans, M_, K_, N_, (Dtype)1.,
         top_diff, this->blobs_[0]->gpu_data(), (Dtype)0.,