From c5fc8e03ff7f31cb9249419ffd2934d85e899006 Mon Sep 17 00:00:00 2001 From: Li Peng Date: Thu, 21 Dec 2017 22:01:44 +0800 Subject: [PATCH] cleanup unnecessary macros in convolution ocl kernel Signed-off-by: Li Peng --- modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp | 4 ---- modules/dnn/src/opencl/conv_layer_spatial.cl | 10 ++++------ 2 files changed, 4 insertions(+), 10 deletions(-) diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp index eca2d1d..6a60a9e 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp @@ -257,11 +257,7 @@ void OCL4DNNConvSpatial::setupKernelDetails(int32_t kernelType, addDef("INPUT_DEPTH", channels_ / group_); addDef("TOTAL_INPUT_DEPTH_SIZE", channels_); addDef("TOTAL_OUTPUT_DEPTH", num_output_); - addDef("INPUT_START_X", 0); - addDef("INPUT_START_Y", 0); - addDef("INPUT_START_Z", 0); addDef("NUM_FILTERS", M_); - addDef("OUT_BUFF_OFFSET", 0); addDef("TILE_X", tile_x); addDef("TILE_Y", tile_y); addDef("TILE_Y_STRIDE", tile_y_stride); diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl index 2457cf7..130d46e 100644 --- a/modules/dnn/src/opencl/conv_layer_spatial.cl +++ b/modules/dnn/src/opencl/conv_layer_spatial.cl @@ -189,10 +189,8 @@ __kernel void ConvolveBasic( // NDRange: (output_width+pad)/ OUT_BLOCK_WIDTH, (output_height+pad)/OUT_BLOCK_HEIGHT, NUM_FILTERS/OUT_BLOCK_DEPTH // NOTE: for beignet this reqd_work_group_size does not guarantee that SIMD16 mode will be used, the compiler could choose to use two SIMD8 threads, and if that happens the code will break. -#ifndef __BEIGNET__ __attribute__((reqd_work_group_size(1, 1, SIMD_SIZE))) __attribute__((intel_reqd_sub_group_size(SIMD_SIZE))) -#endif __kernel void convolve_simd( ELTWISE_DATA_ARG @@ -232,12 +230,12 @@ convolve_simd( int curr_local_y = ( lid / ( TILE_X / 4 ) ); int curr_local_x = ( lid % ( TILE_X / 4 ) ) * 4; - int curr_y = or * STRIDE_Y + INPUT_START_Y + curr_local_y; - int curr_x = oc * STRIDE_X + INPUT_START_X + curr_local_x; + int curr_y = or * STRIDE_Y + curr_local_y; + int curr_x = oc * STRIDE_X + curr_local_x; #if INPUT_PAD_W != 0 || INPUT_PAD_H != 0 int saved_y = curr_y; #endif - in_addr = input_batch_offset + INPUT_START_Z * input_height * input_width + in_addr = input_batch_offset + (curr_y - INPUT_PAD_H) * input_width // y tile offset + curr_x - INPUT_PAD_W; // x tile offset union { @@ -363,7 +361,7 @@ convolve_simd( fm = fm % ALIGNED_NUM_FILTERS; if ((ALIGNED_NUM_FILTERS == NUM_FILTERS || fm < NUM_FILTERS)) { - unsigned int out_addr = OUT_BUFF_OFFSET + ( num_in_batch * TOTAL_OUTPUT_DEPTH + fm ) * output_width * output_height; + unsigned int out_addr = ( num_in_batch * TOTAL_OUTPUT_DEPTH + fm ) * output_width * output_height; out_addr += or * output_width + oc; // we need this address calculation for biases because we support views and batching #if APPLY_BIAS -- 2.7.4