optionally output the mask to a top blob instead of storing internally

author Jeff Donahue <jeff.donahue@gmail.com>

Sun, 25 May 2014 01:09:05 +0000 (18:09 -0700)

committer Jeff Donahue <jeff.donahue@gmail.com>

Sun, 25 May 2014 01:43:17 +0000 (18:43 -0700)
author Jeff Donahue <jeff.donahue@gmail.com>
Sun, 25 May 2014 01:09:05 +0000 (18:09 -0700)
committer Jeff Donahue <jeff.donahue@gmail.com>
Sun, 25 May 2014 01:43:17 +0000 (18:43 -0700)
diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp

index 3d216de..928c8c7 100644 (file)
--- a/src/caffe/layers/pooling_layer.cpp
+++ b/src/caffe/layers/pooling_layer.cpp
@@ -19,7 +19,15 @@ template <typename Dtype>
  void PoolingLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
        vector<Blob<Dtype>*>* top) {
    CHECK_EQ(bottom.size(), 1) << "PoolingLayer takes a single blob as input.";
-  CHECK_EQ(top->size(), 1) << "PoolingLayer takes a single blob as output.";
+  if (this->layer_param_.pooling_param().pool() ==
+      PoolingParameter_PoolMethod_MAX) {
+    CHECK_GE(top->size(), 1)
+        << "MaxPoolingLayer takes at least one blob as output.";
+    CHECK_LE(top->size(), 2)
+        << "MaxPoolingLayer takes at most two blobs as output.";
+  } else {
+    CHECK_EQ(top->size(), 1) << "PoolingLayer takes a single blob as output.";
+  }
    kernel_size_ = this->layer_param_.pooling_param().kernel_size();
    stride_ = this->layer_param_.pooling_param().stride();
    pad_ = this->layer_param_.pooling_param().pad();
@@ -37,9 +45,12 @@ void PoolingLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
        width_ + 2 * pad_ - kernel_size_) / stride_)) + 1;
    (*top)[0]->Reshape(bottom[0]->num(), channels_, pooled_height_,
        pooled_width_);
+  if (top->size() > 1) {
+    (*top)[1]->ReshapeLike(*(*top)[0]);
+  }
    // If max pooling, we will initialize the vector index part.
    if (this->layer_param_.pooling_param().pool() ==
-      PoolingParameter_PoolMethod_MAX) {
+      PoolingParameter_PoolMethod_MAX && top->size() == 1) {
      max_idx_.reset(new Blob<int>(bottom[0]->num(), channels_,
                                   pooled_height_, pooled_width_));
    }
@@ -61,15 +72,23 @@ Dtype PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
    // Different pooling methods. We explicitly do the switch outside the for
    // loop to save time, although this results in more codes.
    int top_count = (*top)[0]->count();
+  // We'll output the mask to top[1] if it's of size >1.
+  const bool use_top_mask = top->size() > 1;
    int* mask;
+  Dtype* top_mask;
    switch (this->layer_param_.pooling_param().pool()) {
    case PoolingParameter_PoolMethod_MAX:
-  // Initialize
-    mask = max_idx_->mutable_cpu_data();
-    for (int i = 0; i < top_count; ++i) {
-      top_data[i] = -FLT_MAX;
-      mask[i] = -1;
+    // Initialize
+    if (use_top_mask) {
+      top_mask = (*top)[1]->mutable_cpu_data();
+      caffe_set(top_count, Dtype(-1), top_mask);
+    } else {
+      mask = max_idx_->mutable_cpu_data();
+      for (int i = 0; i < top_count; ++i) {
+        mask[i] = -1;
+      }
      }
+    caffe_set(top_count, Dtype(-FLT_MAX), top_data);
      // The main loop
      for (int n = 0; n < bottom[0]->num(); ++n) {
        for (int c = 0; c < channels_; ++c) {
@@ -85,7 +104,11 @@ Dtype PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
                  const int index = h * width_ + w;
                  if (bottom_data[index] > top_data[pool_index]) {
                    top_data[pool_index] = bottom_data[index];
-                  mask[pool_index] = index;
+                  if (use_top_mask) {
+                    top_mask[pool_index] = static_cast<Dtype>(index);
+                  } else {
+                    mask[pool_index] = index;
+                  }
                  }
                }
              }
@@ -94,7 +117,11 @@ Dtype PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
          // compute offset
          bottom_data += bottom[0]->offset(0, 1);
          top_data += (*top)[0]->offset(0, 1);
-        mask += (*top)[0]->offset(0, 1);
+        if (use_top_mask) {
+          top_mask += (*top)[0]->offset(0, 1);
+        } else {
+          mask += (*top)[0]->offset(0, 1);
+        }
        }
      }
      break;
@@ -150,23 +177,36 @@ void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
    Dtype* bottom_diff = (*bottom)[0]->mutable_cpu_diff();
    // Different pooling methods. We explicitly do the switch outside the for
    // loop to save time, although this results in more codes.
-  memset(bottom_diff, 0, (*bottom)[0]->count() * sizeof(Dtype));
+  caffe_set((*bottom)[0]->count(), Dtype(0), bottom_diff);
+  // We'll output the mask to top[1] if it's of size >1.
+  const bool use_top_mask = top.size() > 1;
    const int* mask;
+  const Dtype* top_mask;
    switch (this->layer_param_.pooling_param().pool()) {
    case PoolingParameter_PoolMethod_MAX:
      // The main loop
-    mask = max_idx_->cpu_data();
+    if (use_top_mask) {
+      top_mask = top[1]->cpu_data();
+    } else {
+      mask = max_idx_->cpu_data();
+    }
      for (int n = 0; n < top[0]->num(); ++n) {
        for (int c = 0; c < channels_; ++c) {
          for (int ph = 0; ph < pooled_height_; ++ph) {
            for (int pw = 0; pw < pooled_width_; ++pw) {
              const int index = ph * pooled_width_ + pw;
-            bottom_diff[mask[index]] += top_diff[index];
+            const int bottom_index =
+                use_top_mask ? top_mask[index] : mask[index];
+            bottom_diff[bottom_index] += top_diff[index];
            }
          }
          bottom_diff += (*bottom)[0]->offset(0, 1);
          top_diff += top[0]->offset(0, 1);
-        mask += top[0]->offset(0, 1);
+        if (use_top_mask) {
+          top_mask += top[0]->offset(0, 1);
+        } else {
+          mask += top[0]->offset(0, 1);
+        }
        }
      }
      break;
diff --git a/src/caffe/layers/pooling_layer.cu b/src/caffe/layers/pooling_layer.cu

index 49e5d77..31be47e 100644 (file)
--- a/src/caffe/layers/pooling_layer.cu
+++ b/src/caffe/layers/pooling_layer.cu
@@ -17,7 +17,8 @@ template <typename Dtype>
  __global__ void MaxPoolForward(const int nthreads, const Dtype* bottom_data,
      const int num, const int channels, const int height,
      const int width, const int pooled_height, const int pooled_width,
-    const int kernel_size, const int stride, Dtype* top_data, int* mask) {
+    const int kernel_size, const int stride, Dtype* top_data,
+    int* mask, Dtype* top_mask) {
    CUDA_KERNEL_LOOP(index, nthreads) {
      int pw = index % pooled_width;
      int ph = (index / pooled_width) % pooled_height;
@@ -39,7 +40,11 @@ __global__ void MaxPoolForward(const int nthreads, const Dtype* bottom_data,
        }
      }
      top_data[index] = maxval;
-    mask[index] = maxidx;
+    if (mask) {
+      mask[index] = maxidx;
+    } else {
+      top_mask[index] = maxidx;
+    }
    }
  }
  
@@ -150,15 +155,22 @@ Dtype PoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
    const Dtype* bottom_data = bottom[0]->gpu_data();
    Dtype* top_data = (*top)[0]->mutable_gpu_data();
    int count = (*top)[0]->count();
-  int* mask;
+  // We'll output the mask to top[1] if it's of size >1.
+  const bool use_top_mask = top->size() > 1;
+  int* mask = NULL;
+  Dtype* top_mask = NULL;
    switch (this->layer_param_.pooling_param().pool()) {
    case PoolingParameter_PoolMethod_MAX:
-    mask = max_idx_->mutable_gpu_data();
+    if (use_top_mask) {
+      top_mask = (*top)[1]->mutable_gpu_data();
+    } else {
+      mask = max_idx_->mutable_gpu_data();
+    }
      // NOLINT_NEXT_LINE(whitespace/operators)
      MaxPoolForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
          count, bottom_data, bottom[0]->num(), channels_,
          height_, width_, pooled_height_, pooled_width_, kernel_size_, stride_,
-        top_data, mask);
+        top_data, mask, top_mask);
      break;
    case PoolingParameter_PoolMethod_AVE:
      // NOLINT_NEXT_LINE(whitespace/operators)
@@ -197,9 +209,10 @@ Dtype PoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
  
  template <typename Dtype>
  __global__ void MaxPoolBackward(const int nthreads, const Dtype* top_diff,
-    const int* mask, const int num, const int channels, const int height,
-    const int width, const int pooled_height, const int pooled_width,
-    const int kernel_size, const int stride, Dtype* bottom_diff) {
+    const int* mask, const Dtype* top_mask, const int num, const int channels,
+    const int height, const int width, const int pooled_height,
+    const int pooled_width, const int kernel_size, const int stride,
+    Dtype* bottom_diff) {
    CUDA_KERNEL_LOOP(index, nthreads) {
      // find out the local index
      // find out the local offset
@@ -212,12 +225,25 @@ __global__ void MaxPoolBackward(const int nthreads, const Dtype* top_diff,
      int pwstart = (w < kernel_size) ? 0 : (w - kernel_size) / stride + 1;
      int pwend = min(w / stride + 1, pooled_width);
      Dtype gradient = 0;
-    top_diff += (n * channels + c) * pooled_height * pooled_width;
-    mask += (n * channels + c) * pooled_height * pooled_width;
-    for (int ph = phstart; ph < phend; ++ph) {
-      for (int pw = pwstart; pw < pwend; ++pw) {
-        if (mask[ph * pooled_width + pw] == h * width + w)
-          gradient += top_diff[ph * pooled_width + pw];
+    int offset = (n * channels + c) * pooled_height * pooled_width;
+    top_diff += offset;
+    if (mask) {
+      mask += offset;
+      for (int ph = phstart; ph < phend; ++ph) {
+        for (int pw = pwstart; pw < pwend; ++pw) {
+          if (mask[ph * pooled_width + pw] == h * width + w) {
+            gradient += top_diff[ph * pooled_width + pw];
+          }
+        }
+      }
+    } else {
+      top_mask += offset;
+      for (int ph = phstart; ph < phend; ++ph) {
+        for (int pw = pwstart; pw < pwend; ++pw) {
+          if (top_mask[ph * pooled_width + pw] == h * width + w) {
+            gradient += top_diff[ph * pooled_width + pw];
+          }
+        }
        }
      }
      bottom_diff[index] = gradient;
@@ -300,14 +326,21 @@ void PoolingLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
    Dtype* bottom_diff = (*bottom)[0]->mutable_gpu_diff();
    int count = (*bottom)[0]->count();
    CUDA_CHECK(cudaMemset(bottom_diff, 0, sizeof(Dtype) * count));
-  const int* mask;
+  // We'll output the mask to top[1] if it's of size >1.
+  const bool use_top_mask = top.size() > 1;
+  const int* mask = NULL;
+  const Dtype* top_mask = NULL;
    switch (this->layer_param_.pooling_param().pool()) {
    case PoolingParameter_PoolMethod_MAX:
-    mask = max_idx_->gpu_data();
+    if (use_top_mask) {
+      top_mask = top[1]->gpu_data();
+    } else {
+      mask = max_idx_->gpu_data();
+    }
      caffe_gpu_set(count, Dtype(0.), bottom_diff);
      // NOLINT_NEXT_LINE(whitespace/operators)
      MaxPoolBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
-        count, top_diff, mask, top[0]->num(), channels_,
+        count, top_diff, mask, top_mask, top[0]->num(), channels_,
          height_, width_, pooled_height_, pooled_width_,
          kernel_size_, stride_, bottom_diff);
      break;
author	Jeff Donahue <jeff.donahue@gmail.com>
	Sun, 25 May 2014 01:09:05 +0000 (18:09 -0700)
committer	Jeff Donahue <jeff.donahue@gmail.com>
	Sun, 25 May 2014 01:43:17 +0000 (18:43 -0700)
src/caffe/layers/pooling_layer.cpp		patch \| blob \| history
src/caffe/layers/pooling_layer.cu		patch \| blob \| history