src/caffe/layers/pooling_layer.cpp

   1 #include <algorithm>
   2 #include <cfloat>
   3 #include <vector>
   4
   5 #include "caffe/layers/pooling_layer.hpp"
   6 #include "caffe/util/math_functions.hpp"
   7
   8 namespace caffe {
   9
  10 using std::min;
  11 using std::max;
  12
  13 template <typename Dtype>
  14 void PoolingLayer<Dtype>::LayerSetUp(const vector<Blob<Dtype>*>& bottom,
  15       const vector<Blob<Dtype>*>& top) {
  16   PoolingParameter pool_param = this->layer_param_.pooling_param();
  17   if (pool_param.global_pooling()) {
  18     CHECK(!(pool_param.has_kernel_size() ||
  19       pool_param.has_kernel_h() || pool_param.has_kernel_w()))
  20       << "With Global_pooling: true Filter size cannot specified";
  21   } else {
  22     CHECK(!pool_param.has_kernel_size() !=
  23       !(pool_param.has_kernel_h() && pool_param.has_kernel_w()))
  24       << "Filter size is kernel_size OR kernel_h and kernel_w; not both";
  25     CHECK(pool_param.has_kernel_size() ||
  26       (pool_param.has_kernel_h() && pool_param.has_kernel_w()))
  27       << "For non-square filters both kernel_h and kernel_w are required.";
  28   }
  29   CHECK((!pool_param.has_pad() && pool_param.has_pad_h()
  30       && pool_param.has_pad_w())
  31       || (!pool_param.has_pad_h() && !pool_param.has_pad_w()))
  32       << "pad is pad OR pad_h and pad_w are required.";
  33   CHECK((!pool_param.has_stride() && pool_param.has_stride_h()
  34       && pool_param.has_stride_w())
  35       || (!pool_param.has_stride_h() && !pool_param.has_stride_w()))
  36       << "Stride is stride OR stride_h and stride_w are required.";
  37   global_pooling_ = pool_param.global_pooling();
  38   round_mode_ = pool_param.round_mode();
  39   if (global_pooling_) {
  40     kernel_h_ = bottom[0]->height();
  41     kernel_w_ = bottom[0]->width();
  42   } else {
  43     if (pool_param.has_kernel_size()) {
  44       kernel_h_ = kernel_w_ = pool_param.kernel_size();
  45     } else {
  46       kernel_h_ = pool_param.kernel_h();
  47       kernel_w_ = pool_param.kernel_w();
  48     }
  49   }
  50   CHECK_GT(kernel_h_, 0) << "Filter dimensions cannot be zero.";
  51   CHECK_GT(kernel_w_, 0) << "Filter dimensions cannot be zero.";
  52   if (!pool_param.has_pad_h()) {
  53     pad_h_ = pad_w_ = pool_param.pad();
  54   } else {
  55     pad_h_ = pool_param.pad_h();
  56     pad_w_ = pool_param.pad_w();
  57   }
  58   if (!pool_param.has_stride_h()) {
  59     stride_h_ = stride_w_ = pool_param.stride();
  60   } else {
  61     stride_h_ = pool_param.stride_h();
  62     stride_w_ = pool_param.stride_w();
  63   }
  64   if (global_pooling_) {
  65     CHECK(pad_h_ == 0 && pad_w_ == 0 && stride_h_ == 1 && stride_w_ == 1)
  66       << "With Global_pooling: true; only pad = 0 and stride = 1";
  67   }
  68   if (pad_h_ != 0 || pad_w_ != 0) {
  69     CHECK(this->layer_param_.pooling_param().pool()
  70         == PoolingParameter_PoolMethod_AVE
  71         || this->layer_param_.pooling_param().pool()
  72         == PoolingParameter_PoolMethod_MAX)
  73         << "Padding implemented only for average and max pooling.";
  74     CHECK_LT(pad_h_, kernel_h_);
  75     CHECK_LT(pad_w_, kernel_w_);
  76   }
  77 }
  78
  79 template <typename Dtype>
  80 void PoolingLayer<Dtype>::Reshape(const vector<Blob<Dtype>*>& bottom,
  81       const vector<Blob<Dtype>*>& top) {
  82   CHECK_EQ(4, bottom[0]->num_axes()) << "Input must have 4 axes, "
  83       << "corresponding to (num, channels, height, width)";
  84   channels_ = bottom[0]->channels();
  85   height_ = bottom[0]->height();
  86   width_ = bottom[0]->width();
  87   if (global_pooling_) {
  88     kernel_h_ = bottom[0]->height();
  89     kernel_w_ = bottom[0]->width();
  90   }
  91   switch (round_mode_) {
  92   case PoolingParameter_RoundMode_CEIL:
  93     pooled_height_ = static_cast<int>(ceil(static_cast<float>(
  94         height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
  95     pooled_width_ = static_cast<int>(ceil(static_cast<float>(
  96         width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
  97     break;
  98   case PoolingParameter_RoundMode_FLOOR:
  99     pooled_height_ = static_cast<int>(floor(static_cast<float>(
 100         height_ + 2 * pad_h_ - kernel_h_) / stride_h_)) + 1;
 101     pooled_width_ = static_cast<int>(floor(static_cast<float>(
 102         width_ + 2 * pad_w_ - kernel_w_) / stride_w_)) + 1;
 103     break;
 104   default:
 105     LOG(FATAL) << "Unknown rounding mode.";
 106   }
 107   if (pad_h_ || pad_w_) {
 108     // If we have padding, ensure that the last pooling starts strictly
 109     // inside the image (instead of at the padding); otherwise clip the last.
 110     if ((pooled_height_ - 1) * stride_h_ >= height_ + pad_h_) {
 111       --pooled_height_;
 112     }
 113     if ((pooled_width_ - 1) * stride_w_ >= width_ + pad_w_) {
 114       --pooled_width_;
 115     }
 116     CHECK_LT((pooled_height_ - 1) * stride_h_, height_ + pad_h_);
 117     CHECK_LT((pooled_width_ - 1) * stride_w_, width_ + pad_w_);
 118   }
 119   top[0]->Reshape(bottom[0]->num(), channels_, pooled_height_,
 120       pooled_width_);
 121   if (top.size() > 1) {
 122     top[1]->ReshapeLike(*top[0]);
 123   }
 124   // If max pooling, we will initialize the vector index part.
 125   if (this->layer_param_.pooling_param().pool() ==
 126       PoolingParameter_PoolMethod_MAX && top.size() == 1) {
 127     max_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
 128         pooled_width_);
 129   }
 130   // If stochastic pooling, we will initialize the random index part.
 131   if (this->layer_param_.pooling_param().pool() ==
 132       PoolingParameter_PoolMethod_STOCHASTIC) {
 133     rand_idx_.Reshape(bottom[0]->num(), channels_, pooled_height_,
 134       pooled_width_);
 135   }
 136 }
 137
 138 // TODO(Yangqing): Is there a faster way to do pooling in the channel-first
 139 // case?
 140 template <typename Dtype>
 141 void PoolingLayer<Dtype>::Forward_cpu(const vector<Blob<Dtype>*>& bottom,
 142       const vector<Blob<Dtype>*>& top) {
 143   const Dtype* bottom_data = bottom[0]->cpu_data();
 144   Dtype* top_data = top[0]->mutable_cpu_data();
 145   const int top_count = top[0]->count();
 146   // We'll output the mask to top[1] if it's of size >1.
 147   const bool use_top_mask = top.size() > 1;
 148   int* mask = NULL;  // suppress warnings about uninitalized variables
 149   Dtype* top_mask = NULL;
 150   // Different pooling methods. We explicitly do the switch outside the for
 151   // loop to save time, although this results in more code.
 152   switch (this->layer_param_.pooling_param().pool()) {
 153   case PoolingParameter_PoolMethod_MAX:
 154     // Initialize
 155     if (use_top_mask) {
 156       top_mask = top[1]->mutable_cpu_data();
 157       caffe_set(top_count, Dtype(-1), top_mask);
 158     } else {
 159       mask = max_idx_.mutable_cpu_data();
 160       caffe_set(top_count, -1, mask);
 161     }
 162     caffe_set(top_count, Dtype(-FLT_MAX), top_data);
 163     // The main loop
 164     for (int n = 0; n < bottom[0]->num(); ++n) {
 165       for (int c = 0; c < channels_; ++c) {
 166         for (int ph = 0; ph < pooled_height_; ++ph) {
 167           for (int pw = 0; pw < pooled_width_; ++pw) {
 168             int hstart = ph * stride_h_ - pad_h_;
 169             int wstart = pw * stride_w_ - pad_w_;
 170             int hend = min(hstart + kernel_h_, height_);
 171             int wend = min(wstart + kernel_w_, width_);
 172             hstart = max(hstart, 0);
 173             wstart = max(wstart, 0);
 174             const int pool_index = ph * pooled_width_ + pw;
 175             for (int h = hstart; h < hend; ++h) {
 176               for (int w = wstart; w < wend; ++w) {
 177                 const int index = h * width_ + w;
 178                 if (bottom_data[index] > top_data[pool_index]) {
 179                   top_data[pool_index] = bottom_data[index];
 180                   if (use_top_mask) {
 181                     top_mask[pool_index] = static_cast<Dtype>(index);
 182                   } else {
 183                     mask[pool_index] = index;
 184                   }
 185                 }
 186               }
 187             }
 188           }
 189         }
 190         // compute offset
 191         bottom_data += bottom[0]->offset(0, 1);
 192         top_data += top[0]->offset(0, 1);
 193         if (use_top_mask) {
 194           top_mask += top[0]->offset(0, 1);
 195         } else {
 196           mask += top[0]->offset(0, 1);
 197         }
 198       }
 199     }
 200     break;
 201   case PoolingParameter_PoolMethod_AVE:
 202     for (int i = 0; i < top_count; ++i) {
 203       top_data[i] = 0;
 204     }
 205     // The main loop
 206     for (int n = 0; n < bottom[0]->num(); ++n) {
 207       for (int c = 0; c < channels_; ++c) {
 208         for (int ph = 0; ph < pooled_height_; ++ph) {
 209           for (int pw = 0; pw < pooled_width_; ++pw) {
 210             int hstart = ph * stride_h_ - pad_h_;
 211             int wstart = pw * stride_w_ - pad_w_;
 212             int hend = min(hstart + kernel_h_, height_ + pad_h_);
 213             int wend = min(wstart + kernel_w_, width_ + pad_w_);
 214             int pool_size = (hend - hstart) * (wend - wstart);
 215             hstart = max(hstart, 0);
 216             wstart = max(wstart, 0);
 217             hend = min(hend, height_);
 218             wend = min(wend, width_);
 219             for (int h = hstart; h < hend; ++h) {
 220               for (int w = wstart; w < wend; ++w) {
 221                 top_data[ph * pooled_width_ + pw] +=
 222                     bottom_data[h * width_ + w];
 223               }
 224             }
 225             top_data[ph * pooled_width_ + pw] /= pool_size;
 226           }
 227         }
 228         // compute offset
 229         bottom_data += bottom[0]->offset(0, 1);
 230         top_data += top[0]->offset(0, 1);
 231       }
 232     }
 233     break;
 234   case PoolingParameter_PoolMethod_STOCHASTIC:
 235     NOT_IMPLEMENTED;
 236     break;
 237   default:
 238     LOG(FATAL) << "Unknown pooling method.";
 239   }
 240 }
 241
 242 template <typename Dtype>
 243 void PoolingLayer<Dtype>::Backward_cpu(const vector<Blob<Dtype>*>& top,
 244       const vector<bool>& propagate_down, const vector<Blob<Dtype>*>& bottom) {
 245   if (!propagate_down[0]) {
 246     return;
 247   }
 248   const Dtype* top_diff = top[0]->cpu_diff();
 249   Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
 250   // Different pooling methods. We explicitly do the switch outside the for
 251   // loop to save time, although this results in more codes.
 252   caffe_set(bottom[0]->count(), Dtype(0), bottom_diff);
 253   // We'll output the mask to top[1] if it's of size >1.
 254   const bool use_top_mask = top.size() > 1;
 255   const int* mask = NULL;  // suppress warnings about uninitialized variables
 256   const Dtype* top_mask = NULL;
 257   switch (this->layer_param_.pooling_param().pool()) {
 258   case PoolingParameter_PoolMethod_MAX:
 259     // The main loop
 260     if (use_top_mask) {
 261       top_mask = top[1]->cpu_data();
 262     } else {
 263       mask = max_idx_.cpu_data();
 264     }
 265     for (int n = 0; n < top[0]->num(); ++n) {
 266       for (int c = 0; c < channels_; ++c) {
 267         for (int ph = 0; ph < pooled_height_; ++ph) {
 268           for (int pw = 0; pw < pooled_width_; ++pw) {
 269             const int index = ph * pooled_width_ + pw;
 270             const int bottom_index =
 271                 use_top_mask ? top_mask[index] : mask[index];
 272             bottom_diff[bottom_index] += top_diff[index];
 273           }
 274         }
 275         bottom_diff += bottom[0]->offset(0, 1);
 276         top_diff += top[0]->offset(0, 1);
 277         if (use_top_mask) {
 278           top_mask += top[0]->offset(0, 1);
 279         } else {
 280           mask += top[0]->offset(0, 1);
 281         }
 282       }
 283     }
 284     break;
 285   case PoolingParameter_PoolMethod_AVE:
 286     // The main loop
 287     for (int n = 0; n < top[0]->num(); ++n) {
 288       for (int c = 0; c < channels_; ++c) {
 289         for (int ph = 0; ph < pooled_height_; ++ph) {
 290           for (int pw = 0; pw < pooled_width_; ++pw) {
 291             int hstart = ph * stride_h_ - pad_h_;
 292             int wstart = pw * stride_w_ - pad_w_;
 293             int hend = min(hstart + kernel_h_, height_ + pad_h_);
 294             int wend = min(wstart + kernel_w_, width_ + pad_w_);
 295             int pool_size = (hend - hstart) * (wend - wstart);
 296             hstart = max(hstart, 0);
 297             wstart = max(wstart, 0);
 298             hend = min(hend, height_);
 299             wend = min(wend, width_);
 300             for (int h = hstart; h < hend; ++h) {
 301               for (int w = wstart; w < wend; ++w) {
 302                 bottom_diff[h * width_ + w] +=
 303                   top_diff[ph * pooled_width_ + pw] / pool_size;
 304               }
 305             }
 306           }
 307         }
 308         // offset
 309         bottom_diff += bottom[0]->offset(0, 1);
 310         top_diff += top[0]->offset(0, 1);
 311       }
 312     }
 313     break;
 314   case PoolingParameter_PoolMethod_STOCHASTIC:
 315     NOT_IMPLEMENTED;
 316     break;
 317   default:
 318     LOG(FATAL) << "Unknown pooling method.";
 319   }
 320 }
 321
 322
 323 #ifdef CPU_ONLY
 324 STUB_GPU(PoolingLayer);
 325 #endif
 326
 327 INSTANTIATE_CLASS(PoolingLayer);
 328
 329 }  // namespace caffe