From b597c87bedc303377494ae11f3bceadbd0e2052e Mon Sep 17 00:00:00 2001 From: Alexander Alekhin Date: Tue, 24 Jul 2018 15:49:59 +0300 Subject: [PATCH] dnn(ocl): avoid memory access violation --- modules/dnn/src/opencl/conv_layer_spatial.cl | 35 ++++++++++++++-------------- 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl index adeb385..c60b8fc 100644 --- a/modules/dnn/src/opencl/conv_layer_spatial.cl +++ b/modules/dnn/src/opencl/conv_layer_spatial.cl @@ -248,39 +248,38 @@ convolve_simd( int curr_y = or * STRIDE_Y; int curr_x = oc * STRIDE_X + lid; -#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0 - int saved_y = curr_y; -#endif + int in_addr = input_batch_offset + (curr_y - INPUT_PAD_H) * INPUT_WIDTH // y tile offset + curr_x - INPUT_PAD_W; // x tile offset + const int in_limit = (get_global_size(2) / ALIGNED_NUM_FILTERS) * TOTAL_INPUT_DEPTH_SIZE * INPUT_PITCH - 1; + Dtype in_buf[INVEC_SIZE]; for(int kd = 0; kd < INPUT_DEPTH; kd++) { +#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0 + const bool cx_out_of_range = !(curr_x >= INPUT_PAD_W && curr_x < INPUT_WIDTH + INPUT_PAD_W); int in_offset = in_addr; __attribute__((opencl_unroll_hint(INVEC_SIZE))) - for (int reg = 0; reg < INVEC_SIZE; reg++) + for (int reg = 0; reg < INVEC_SIZE; reg++, in_offset += INPUT_WIDTH) { - in_buf[reg] = inputs[in_offset]; -#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0 - if (!(curr_y >= INPUT_PAD_H && curr_y < INPUT_HEIGHT + INPUT_PAD_H && - curr_x >= INPUT_PAD_W && curr_x < INPUT_WIDTH + INPUT_PAD_W)) - { - in_buf[reg] = 0; - } -#endif - curr_y += 1; - in_offset += INPUT_WIDTH; + Dtype input = inputs[clamp(in_offset, 0, in_limit)]; + int cy = curr_y + reg; + in_buf[reg] = (cx_out_of_range || cy < INPUT_PAD_H || cy >= INPUT_HEIGHT + INPUT_PAD_H) ? 0 : input; } +#else + int in_offset = in_addr; + __attribute__((opencl_unroll_hint(INVEC_SIZE))) + for (int reg = 0; reg < INVEC_SIZE; reg++, in_offset += INPUT_WIDTH) + { + in_buf[reg] = inputs[min(in_offset, in_limit)]; + } +#endif in_addr += INPUT_PITCH; -#if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 || INPUT_PAD_BOTTOM != 0 || INPUT_PAD_RIGHT != 0 - curr_y = saved_y; -#endif - Dtype weight_buf[WEIGHT_PREF]; int w_idx=0; -- 2.7.4