Optimisation in Permute kernel (#4202)
authorShubham Gupta/SNAP /SRI-Bangalore/Engineer/삼성전자 <shub98.gupta@samsung.com>
Thu, 17 Jan 2019 09:32:48 +0000 (15:02 +0530)
committer이춘석/On-Device Lab(SR)/Staff Engineer/삼성전자 <chunseok.lee@samsung.com>
Thu, 17 Jan 2019 09:32:48 +0000 (18:32 +0900)
Since many of the assignment operations are present
in kernel which can be easily avoided and hence can
lead to some optmisation.
This patch will remove the unnecessary assignemnt operations.

Signed-off-by: shubham <shub98.gupta@samsung.com>
libs/ARMComputeEx/src/core/CL/cl_kernels/permute_ex.cl

index 7cc8b03..c628c88 100644 (file)
@@ -56,17 +56,16 @@ __kernel void permute_generic(
     Tensor4D in  = CONVERT_TO_TENSOR4D_STRUCT(input, DEPTH_IN);
     Tensor4D out = CONVERT_TO_TENSOR4D_STRUCT_NO_STEP(output, 0);
 
-   int out_index[4];
-   int in_index[4];
-   in_index[0] = get_global_id(0);//W
-   in_index[1] = get_global_id(1);//H
-   in_index[2] = get_global_id(2) % DEPTH_IN;//C
-   in_index[3] = get_global_id(2) / DEPTH_IN;//B
-   out_index[0] = in_index[P1];
-   out_index[1] = in_index[P2];
-   out_index[2] = in_index[P3];
-   out_index[3] = in_index[P4];
+    // WHCN format
+    int in_index[]={
+     get_global_id(0),
+     get_global_id(1),
+     get_global_id(2) % DEPTH_IN,
+     get_global_id(2) / DEPTH_IN,
+    };
 
-    *((__global DATA_TYPE *)tensor4D_offset(&out, out_index[0],out_index[1],out_index[2],out_index[3])) = *((__global DATA_TYPE *)in.ptr);
+    // New locations based on Permuted index calc as out_index[index] = in_index[new_index]
+    *((__global DATA_TYPE *)
+      tensor4D_offset(&out, in_index[P1], in_index[P2], in_index[P3], in_index[P4])) = *((__global DATA_TYPE *)in.ptr);
 }
 #endif // defined(DATA_TYPE) && defined(DEPTH_IN) && defined(P1) && defined(P2) && defined(P3) && defined(P4)