cleanup unnecessary macros in convolution ocl kernel

author Li Peng <peng.li@intel.com>

Thu, 21 Dec 2017 14:01:44 +0000 (22:01 +0800)

committer Li Peng <peng.li@intel.com>

Thu, 21 Dec 2017 12:32:36 +0000 (20:32 +0800)
author Li Peng <peng.li@intel.com>
Thu, 21 Dec 2017 14:01:44 +0000 (22:01 +0800)
committer Li Peng <peng.li@intel.com>
Thu, 21 Dec 2017 12:32:36 +0000 (20:32 +0800)
diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp

index eca2d1d..6a60a9e 100644 (file)
--- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
+++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp
@@ -257,11 +257,7 @@ void OCL4DNNConvSpatial<Dtype>::setupKernelDetails(int32_t kernelType,
          addDef("INPUT_DEPTH", channels_ / group_);
          addDef("TOTAL_INPUT_DEPTH_SIZE", channels_);
          addDef("TOTAL_OUTPUT_DEPTH", num_output_);
-        addDef("INPUT_START_X", 0);
-        addDef("INPUT_START_Y", 0);
-        addDef("INPUT_START_Z", 0);
          addDef("NUM_FILTERS", M_);
-        addDef("OUT_BUFF_OFFSET", 0);
          addDef("TILE_X", tile_x);
          addDef("TILE_Y", tile_y);
          addDef("TILE_Y_STRIDE", tile_y_stride);
diff --git a/modules/dnn/src/opencl/conv_layer_spatial.cl b/modules/dnn/src/opencl/conv_layer_spatial.cl

index 2457cf7..130d46e 100644 (file)
--- a/modules/dnn/src/opencl/conv_layer_spatial.cl
+++ b/modules/dnn/src/opencl/conv_layer_spatial.cl
@@ -189,10 +189,8 @@ __kernel void ConvolveBasic(
  // NDRange:  (output_width+pad)/ OUT_BLOCK_WIDTH, (output_height+pad)/OUT_BLOCK_HEIGHT, NUM_FILTERS/OUT_BLOCK_DEPTH
  
  // NOTE: for beignet this reqd_work_group_size does not guarantee that SIMD16 mode will be used, the compiler could choose to use two SIMD8 threads, and if that happens the code will break.
-#ifndef __BEIGNET__
  __attribute__((reqd_work_group_size(1, 1, SIMD_SIZE)))
  __attribute__((intel_reqd_sub_group_size(SIMD_SIZE)))
-#endif
  __kernel void
  convolve_simd(
      ELTWISE_DATA_ARG
@@ -232,12 +230,12 @@ convolve_simd(
  
    int curr_local_y = ( lid / ( TILE_X / 4 ) );
    int curr_local_x = ( lid % ( TILE_X / 4 ) ) * 4;
-  int curr_y = or * STRIDE_Y + INPUT_START_Y + curr_local_y;
-  int curr_x = oc * STRIDE_X + INPUT_START_X + curr_local_x;
+  int curr_y = or * STRIDE_Y + curr_local_y;
+  int curr_x = oc * STRIDE_X + curr_local_x;
  #if INPUT_PAD_W != 0 || INPUT_PAD_H != 0
    int saved_y = curr_y;
  #endif
-  in_addr = input_batch_offset + INPUT_START_Z * input_height * input_width
+  in_addr = input_batch_offset
              +  (curr_y - INPUT_PAD_H) * input_width             // y tile offset
              +   curr_x - INPUT_PAD_W;                        // x tile offset
    union {
@@ -363,7 +361,7 @@ convolve_simd(
    fm = fm % ALIGNED_NUM_FILTERS;
  
    if ((ALIGNED_NUM_FILTERS == NUM_FILTERS || fm < NUM_FILTERS)) {
-  unsigned int out_addr = OUT_BUFF_OFFSET + ( num_in_batch * TOTAL_OUTPUT_DEPTH + fm ) * output_width * output_height;
+  unsigned int out_addr = ( num_in_batch * TOTAL_OUTPUT_DEPTH + fm ) * output_width * output_height;
    out_addr += or * output_width + oc;
    // we need this address calculation for biases because we support views and batching
  #if APPLY_BIAS
author	Li Peng <peng.li@intel.com>
	Thu, 21 Dec 2017 14:01:44 +0000 (22:01 +0800)
committer	Li Peng <peng.li@intel.com>
	Thu, 21 Dec 2017 12:32:36 +0000 (20:32 +0800)
modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp		patch \| blob \| history
modules/dnn/src/opencl/conv_layer_spatial.cl		patch \| blob \| history