Removing function overhead of pow in Squarediff kernel (#3130)
authorShubham Gupta/System SW /SRI-Bangalore/Engineer/삼성전자 <shub98.gupta@samsung.com>
Tue, 16 Oct 2018 01:23:26 +0000 (06:53 +0530)
committer오형석/동작제어Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
Tue, 16 Oct 2018 01:23:26 +0000 (10:23 +0900)
This patch will remove function overhead of pow since pow(x,2) can be wirtten as x*x

Signed-off-by: shubham <shub98.gupta@samsung.com>
libs/ARMComputeEx/src/core/CL/cl_kernels/squared_difference.cl

index 7c12626..de8602d 100644 (file)
@@ -58,7 +58,12 @@ __kernel void squared_difference(
     Tensor3D input2  = CONVERT_TO_TENSOR3D_STRUCT(input2);
     Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
 
+    VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+    diff = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input1.ptr)- VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr);
+
+    VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
+    sq_diff = diff * diff;
+
     VSTORE(VEC_SIZE)
-    (pow(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input1.ptr) - VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input2.ptr), 2),
-     0, (__global DATA_TYPE *)output.ptr);
+    (sq_diff, 0, (__global DATA_TYPE *)output.ptr);
 }