dnn(ocl4dnn): drop weights_buf
authorAlexander Alekhin <alexander.a.alekhin@gmail.com>
Sun, 30 Sep 2018 20:22:39 +0000 (20:22 +0000)
committerAlexander Alekhin <alexander.a.alekhin@gmail.com>
Sun, 30 Sep 2018 20:35:41 +0000 (20:35 +0000)
- avoid memory access violation during "prefetch" stage

modules/dnn/src/opencl/conv_layer_spatial.cl

index c60b8fc..37aceee 100644 (file)
@@ -280,15 +280,6 @@ convolve_simd(
 
     in_addr += INPUT_PITCH;
 
-    Dtype weight_buf[WEIGHT_PREF];
-    int w_idx=0;
-
-    for (int i = 0; i < WEIGHT_PREF; i++)
-    {
-        weight_buf[i] = weights[weight_addr];
-        weight_addr += SIMD_SIZE;
-    }
-
 #define BLOCK_IN(n, c) intel_sub_group_shuffle(in_buf[n], (c))
 
     int kr = 0;  // kr = Kernel Row
@@ -297,20 +288,18 @@ convolve_simd(
         int kc = 0;  // kc = Kernel Column
         LOOP(KERNEL_WIDTH, kc,
         {
+            Dtype weight_value = weights[weight_addr];
+            weight_addr += SIMD_SIZE;
             for (int br=0; br < OUT_BLOCK_HEIGHT; br++)
             {
                 for(int bc=0; bc < OUT_BLOCK_WIDTH; bc++)
                 {
                     Dtype input = BLOCK_IN((br * STRIDE_Y + kr * DILATION_Y), bc * STRIDE_X + kc * DILATION_X);
-                    out[br * OUT_BLOCK_WIDTH + bc] = mad(weight_buf[w_idx % WEIGHT_PREF], input, out[br * OUT_BLOCK_WIDTH + bc]);
+                    out[br * OUT_BLOCK_WIDTH + bc] = mad(weight_value, input, out[br * OUT_BLOCK_WIDTH + bc]);
                 }
             }
-            weight_buf[w_idx % WEIGHT_PREF] = weights[weight_addr];
-            weight_addr += SIMD_SIZE;
-            ++w_idx;
         });
     });
-    weight_addr -= WEIGHT_PREF * SIMD_SIZE;
   }
 
   fm = fm % ALIGNED_NUM_FILTERS;