inps.getUMatVector(inputs);
outs.getUMatVector(outputs);
- if (type == AVE && padMode == "SAME")
- return false;
-
if (poolOp.empty())
{
OCL4DNNPoolConfig config;
config.pool_method = type == MAX ? LIBDNN_POOLING_METHOD_MAX :
(type == AVE ? LIBDNN_POOLING_METHOD_AVE :
LIBDNN_POOLING_METHOD_STO);
+ config.avePoolPaddedArea = avePoolPaddedArea;
poolOp = Ptr<OCL4DNNPool<float> >(new OCL4DNNPool<float>(config));
}
dilation(1, 1),
channels(0),
pool_method(LIBDNN_POOLING_METHOD_MAX),
- global_pooling(false)
+ global_pooling(false),
+ avePoolPaddedArea(false)
{}
MatShape in_shape;
MatShape out_shape;
int channels;
ocl4dnnPoolingMethod_t pool_method; // = LIBDNN_POOLING_METHOD_MAX;
bool global_pooling; // = false;
+ bool avePoolPaddedArea;
};
template<typename Dtype>
int32_t width_;
int32_t pooled_height_;
int32_t pooled_width_;
+ bool avePoolPaddedArea;
};
struct OCL4DNNInnerProductConfig
channels_ = config.channels;
pool_method_ = config.pool_method;
+ avePoolPaddedArea = config.avePoolPaddedArea;
for (int i = 0; i < spatial_dims; ++i)
{
ocl::dnn::ocl4dnn_pooling_oclsrc,
format("-D KERNEL_AVE_POOL=1 -D KERNEL_W=%d -D KERNEL_H=%d"
" -D STRIDE_W=%d -D STRIDE_H=%d"
- " -D PAD_W=%d -D PAD_H=%d",
+ " -D PAD_W=%d -D PAD_H=%d%s",
kernel_w_, kernel_h_,
stride_w_, stride_h_,
- pad_w_, pad_h_
+ pad_w_, pad_h_,
+ avePoolPaddedArea ? " -D AVE_POOL_PADDING_AREA" : ""
));
if (oclk_ave_pool_forward.empty())
int wstart = pw * STRIDE_W - PAD_W;
int hend = min(hstart + KERNEL_H, height + PAD_H);
int wend = min(wstart + KERNEL_W, width + PAD_W);
- const int pool_size = (hend - hstart) * (wend - wstart);
+ int pool_size;
+#ifdef AVE_POOL_PADDING_AREA
+ pool_size = (hend - hstart) * (wend - wstart);
hstart = max(hstart, (int)0);
wstart = max(wstart, (int)0);
hend = min(hend, height);
wend = min(wend, width);
+#else
+ hstart = max(hstart, (int)0);
+ wstart = max(wstart, (int)0);
+ hend = min(hend, height);
+ wend = min(wend, width);
+ pool_size = (hend - hstart) * (wend - wstart);
+#endif
Dtype aveval = 0;
__global const Dtype* bottom_slice = bottom_data
+ (n * channels + c) * height * width;