padding for max pooling

author Evan Shelhamer <shelhamer@imaginarynumber.net>

Thu, 5 Jun 2014 17:53:42 +0000 (10:53 -0700)

committer Evan Shelhamer <shelhamer@imaginarynumber.net>

Sun, 8 Jun 2014 05:55:48 +0000 (22:55 -0700)
author Evan Shelhamer <shelhamer@imaginarynumber.net>
Thu, 5 Jun 2014 17:53:42 +0000 (10:53 -0700)
committer Evan Shelhamer <shelhamer@imaginarynumber.net>
Sun, 8 Jun 2014 05:55:48 +0000 (22:55 -0700)
diff --git a/src/caffe/layers/pooling_layer.cpp b/src/caffe/layers/pooling_layer.cpp

index 5d6921f..80ce4df 100644 (file)
--- a/src/caffe/layers/pooling_layer.cpp
+++ b/src/caffe/layers/pooling_layer.cpp
@@ -32,9 +32,11 @@ void PoolingLayer<Dtype>::SetUp(const vector<Blob<Dtype>*>& bottom,
    stride_ = this->layer_param_.pooling_param().stride();
    pad_ = this->layer_param_.pooling_param().pad();
    if (pad_ != 0) {
-    CHECK_EQ(this->layer_param_.pooling_param().pool(),
-             PoolingParameter_PoolMethod_AVE)
-        << "Padding implemented only for average pooling.";
+    CHECK(this->layer_param_.pooling_param().pool()
+        == PoolingParameter_PoolMethod_AVE
+        || this->layer_param_.pooling_param().pool()
+        == PoolingParameter_PoolMethod_MAX)
+        << "Padding implemented only for average and max pooling.";
    }
    channels_ = bottom[0]->channels();
    height_ = bottom[0]->height();
@@ -92,10 +94,12 @@ Dtype PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
        for (int c = 0; c < channels_; ++c) {
          for (int ph = 0; ph < pooled_height_; ++ph) {
            for (int pw = 0; pw < pooled_width_; ++pw) {
-            int hstart = ph * stride_;
-            int wstart = pw * stride_;
+            int hstart = ph * stride_ - pad_;
+            int wstart = pw * stride_ - pad_;
              int hend = min(hstart + kernel_size_, height_);
              int wend = min(wstart + kernel_size_, width_);
+            hstart = max(hstart, 0);
+            wstart = max(wstart, 0);
              const int pool_index = ph * pooled_width_ + pw;
              for (int h = hstart; h < hend; ++h) {
                for (int w = wstart; w < wend; ++w) {
diff --git a/src/caffe/layers/pooling_layer.cu b/src/caffe/layers/pooling_layer.cu

index 31be47e..7bb397b 100644 (file)
--- a/src/caffe/layers/pooling_layer.cu
+++ b/src/caffe/layers/pooling_layer.cu
@@ -17,17 +17,19 @@ template <typename Dtype>
  __global__ void MaxPoolForward(const int nthreads, const Dtype* bottom_data,
      const int num, const int channels, const int height,
      const int width, const int pooled_height, const int pooled_width,
-    const int kernel_size, const int stride, Dtype* top_data,
+    const int kernel_size, const int stride, const int pad, Dtype* top_data,
      int* mask, Dtype* top_mask) {
    CUDA_KERNEL_LOOP(index, nthreads) {
      int pw = index % pooled_width;
      int ph = (index / pooled_width) % pooled_height;
      int c = (index / pooled_width / pooled_height) % channels;
      int n = index / pooled_width / pooled_height / channels;
-    int hstart = ph * stride;
+    int hstart = ph * stride - pad;
+    int wstart = pw * stride - pad;
      int hend = min(hstart + kernel_size, height);
-    int wstart = pw * stride;
      int wend = min(wstart + kernel_size, width);
+    hstart = max(hstart, 0);
+    wstart = max(wstart, 0);
      Dtype maxval = -FLT_MAX;
      int maxidx = -1;
      bottom_data += (n * channels + c) * height * width;
@@ -170,7 +172,7 @@ Dtype PoolingLayer<Dtype>::Forward_gpu(const vector<Blob<Dtype>*>& bottom,
      MaxPoolForward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
          count, bottom_data, bottom[0]->num(), channels_,
          height_, width_, pooled_height_, pooled_width_, kernel_size_, stride_,
-        top_data, mask, top_mask);
+        pad_, top_data, mask, top_mask);
      break;
    case PoolingParameter_PoolMethod_AVE:
      // NOLINT_NEXT_LINE(whitespace/operators)
@@ -212,7 +214,7 @@ __global__ void MaxPoolBackward(const int nthreads, const Dtype* top_diff,
      const int* mask, const Dtype* top_mask, const int num, const int channels,
      const int height, const int width, const int pooled_height,
      const int pooled_width, const int kernel_size, const int stride,
-    Dtype* bottom_diff) {
+    const int pad, Dtype* bottom_diff) {
    CUDA_KERNEL_LOOP(index, nthreads) {
      // find out the local index
      // find out the local offset
@@ -220,10 +222,12 @@ __global__ void MaxPoolBackward(const int nthreads, const Dtype* top_diff,
      int h = (index / width) % height;
      int c = (index / width / height) % channels;
      int n = index / width / height / channels;
-    int phstart = (h < kernel_size) ? 0 : (h - kernel_size) / stride + 1;
-    int phend = min(h / stride + 1, pooled_height);
-    int pwstart = (w < kernel_size) ? 0 : (w - kernel_size) / stride + 1;
-    int pwend = min(w / stride + 1, pooled_width);
+    int phstart =
+        (h + pad < kernel_size) ? 0 : (h + pad - kernel_size) / stride + 1;
+    int phend = min((h + pad) / stride + 1, pooled_height);
+    int pwstart =
+        (w + pad < kernel_size) ? 0 : (w + pad - kernel_size) / stride + 1;
+    int pwend = min((w + pad) / stride + 1, pooled_width);
      Dtype gradient = 0;
      int offset = (n * channels + c) * pooled_height * pooled_width;
      top_diff += offset;
@@ -342,7 +346,7 @@ void PoolingLayer<Dtype>::Backward_gpu(const vector<Blob<Dtype>*>& top,
      MaxPoolBackward<Dtype><<<CAFFE_GET_BLOCKS(count), CAFFE_CUDA_NUM_THREADS>>>(
          count, top_diff, mask, top_mask, top[0]->num(), channels_,
          height_, width_, pooled_height_, pooled_width_,
-        kernel_size_, stride_, bottom_diff);
+        kernel_size_, stride_, pad_, bottom_diff);
      break;
    case PoolingParameter_PoolMethod_AVE:
      // NOLINT_NEXT_LINE(whitespace/operators)
diff --git a/src/caffe/proto/caffe.proto b/src/caffe/proto/caffe.proto

index 7d44085..e540a95 100644 (file)
--- a/src/caffe/proto/caffe.proto
+++ b/src/caffe/proto/caffe.proto
@@ -353,7 +353,8 @@ message PoolingParameter {
    optional PoolMethod pool = 1 [default = MAX]; // The pooling method
    optional uint32 kernel_size = 2; // The kernel size
    optional uint32 stride = 3 [default = 1]; // The stride
-  // The padding size -- currently implemented only for average pooling.
+  // The padding size -- currently implemented only for average and max pooling.
+  // average pooling zero pads. max pooling -inf pads.
    optional uint32 pad = 4 [default = 0];
  }
author	Evan Shelhamer <shelhamer@imaginarynumber.net>
	Thu, 5 Jun 2014 17:53:42 +0000 (10:53 -0700)
committer	Evan Shelhamer <shelhamer@imaginarynumber.net>
	Sun, 8 Jun 2014 05:55:48 +0000 (22:55 -0700)
src/caffe/layers/pooling_layer.cpp		patch \| blob \| history
src/caffe/layers/pooling_layer.cu		patch \| blob \| history
src/caffe/proto/caffe.proto		patch \| blob \| history