Making comparison_op_quantized.cl generic with VEC_SIZE (#4042)

author Prasanna R/SNAP /SRI-Bangalore/Engineer/삼성전자 <prasanna.r@samsung.com>

Tue, 18 Dec 2018 04:47:17 +0000 (10:17 +0530)

committer 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>

Tue, 18 Dec 2018 04:47:17 +0000 (13:47 +0900)
author Prasanna R/SNAP /SRI-Bangalore/Engineer/삼성전자 <prasanna.r@samsung.com>
Tue, 18 Dec 2018 04:47:17 +0000 (10:17 +0530)
committer 오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
Tue, 18 Dec 2018 04:47:17 +0000 (13:47 +0900)
diff --git a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp

index 6326ee1..40efbcc 100644 (file)
--- a/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
+++ b/libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp
@@ -75,7 +75,7 @@ const std::map<std::string, std::string> CLKernelLibraryEx::_kernel_program_map
      {"combine_gradients_L1", "canny.cl"},
      {"combine_gradients_L2", "canny.cl"},
      {"comparison_op", "comparison_op.cl"},
-    {"comparison_op_quantized", "comparison_op_quantized.cl"},
+    {"comparison_op_qasymm8", "comparison_op_quantized.cl"},
      {"concatenate_depth", "concatenate.cl"},
      {"concatenate_width", "concatenate.cl"},
      {"convolution_rectangle", "convolution_rectangle.cl"},
diff --git a/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op_quantized.cl b/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op_quantized.cl

index 133fcee..41c90b7 100644 (file)
--- a/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op_quantized.cl
+++ b/libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op_quantized.cl
@@ -17,6 +17,12 @@
  #include "helpers.h"
  #define SUB(x, y) (x) - (y)
  
+#if defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(VEC_SIZE)
+
+#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
+#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
+#define VEC_OUT VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)
+
  /** Returns the truth value of comparison .
   * @attention Offset and Scale of both input should be given as a preprocessor argument using -DOFFSET_IN1=int, -DOFFSET_IN2=int, -DSCALE_IN1=float and -DSCALE_IN2=float. e.g. -DOFFSET_IN1=1, -DOFFSET_IN2=0, -DSCALE_IN1=0.5, -DSCALE_IN2=0.5
   * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
@@ -48,7 +54,7 @@
   * @param[in]  output_step_z                        output_stride_z * number of elements along Z processed per workitem(in bytes)
   * @param[in]  output_offset_first_element_in_bytes The offset of the first element in the destination tensor
   */
-__kernel void comparison_op_quantized(
+__kernel void comparison_op_qasymm8(
      TENSOR3D_DECLARATION(in1),
      TENSOR3D_DECLARATION(in2),
      TENSOR3D_DECLARATION(out))
@@ -58,17 +64,18 @@ __kernel void comparison_op_quantized(
      Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
      Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
  
-    int16 in_a = CONVERT(vload16(0, (__global uchar *)in1.ptr), int16);
-    int16 in_b = CONVERT(vload16(0, (__global uchar *)in2.ptr), int16);
+    VEC_INT in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in1.ptr), VEC_INT);
+    VEC_INT in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in2.ptr), VEC_INT);
  
-    in_a = SUB(in_a, (int16)((int)OFFSET_IN1));
-    in_b = SUB(in_b, (int16)((int)OFFSET_IN2));
+    in_a = SUB(in_a, (VEC_INT)((int)OFFSET_IN1));
+    in_b = SUB(in_b, (VEC_INT)((int)OFFSET_IN2));
  
-    const float16 in1f32  = convert_float16(in_a) * (float16)((float)SCALE_IN1);
-    const float16 in2f32  = convert_float16(in_b) * (float16)((float)SCALE_IN2);
+    const VEC_FLOAT in1f32  = CONVERT(in_a, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN1);
+    const VEC_FLOAT in2f32  = CONVERT(in_b, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN2);
  #if OPCODE == 0 //EQUAL QUANTIZED
-    vstore16(CONVERT(in1f32 == in2f32, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global uchar *)out.ptr);
+    VSTORE(VEC_SIZE)(CONVERT(in1f32 == in2f32, VEC_OUT), 0, (__global DATA_TYPE_OUT *)out.ptr);
  #elif OPCODE == 1 //NOT EQUAL QUANTIZED
-    vstore16(CONVERT(in1f32 != in2f32, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global uchar *)out.ptr);
+    VSTORE(VEC_SIZE)(CONVERT(in1f32 != in2f32, VEC_OUT), 0, (__global DATA_TYPE_OUT *)out.ptr);
  #endif
  }
+#endif // defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(VEC_SIZE)
diff --git a/libs/ARMComputeEx/src/core/CL/kernels/CLComparisonOpKernel.cpp b/libs/ARMComputeEx/src/core/CL/kernels/CLComparisonOpKernel.cpp

index 9174c87..136ed3a 100644 (file)
--- a/libs/ARMComputeEx/src/core/CL/kernels/CLComparisonOpKernel.cpp
+++ b/libs/ARMComputeEx/src/core/CL/kernels/CLComparisonOpKernel.cpp
@@ -103,7 +103,7 @@ void CLComparisonOpKernel::configure(const ICLTensor *input1, const ICLTensor *i
                         support::cpp11::to_string(input1->info()->quantization_info().scale));
      build_opts.emplace("-DSCALE_IN2=" +
                         support::cpp11::to_string(input2->info()->quantization_info().scale));
-    kernel_name += "_quantized";
+    kernel_name += "_qasymm8";
    }
  
    _kernel =
author	Prasanna R/SNAP /SRI-Bangalore/Engineer/삼성전자 <prasanna.r@samsung.com>
	Tue, 18 Dec 2018 04:47:17 +0000 (10:17 +0530)
committer	오형석/On-Device Lab(SR)/Staff Engineer/삼성전자 <hseok82.oh@samsung.com>
	Tue, 18 Dec 2018 04:47:17 +0000 (13:47 +0900)
libs/ARMComputeEx/src/core/CL/CLKernelLibrary.cpp		patch \| blob \| history
libs/ARMComputeEx/src/core/CL/cl_kernels/comparison_op_quantized.cl		patch \| blob \| history
libs/ARMComputeEx/src/core/CL/kernels/CLComparisonOpKernel.cpp		patch \| blob \| history