{"combine_gradients_L1", "canny.cl"},
{"combine_gradients_L2", "canny.cl"},
{"comparison_op", "comparison_op.cl"},
- {"comparison_op_quantized", "comparison_op_quantized.cl"},
+ {"comparison_op_qasymm8", "comparison_op_quantized.cl"},
{"concatenate_depth", "concatenate.cl"},
{"concatenate_width", "concatenate.cl"},
{"convolution_rectangle", "convolution_rectangle.cl"},
#include "helpers.h"
#define SUB(x, y) (x) - (y)
+#if defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(VEC_SIZE)
+
+#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
+#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
+#define VEC_OUT VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE)
+
/** Returns the truth value of comparison .
* @attention Offset and Scale of both input should be given as a preprocessor argument using -DOFFSET_IN1=int, -DOFFSET_IN2=int, -DSCALE_IN1=float and -DSCALE_IN2=float. e.g. -DOFFSET_IN1=1, -DOFFSET_IN2=0, -DSCALE_IN1=0.5, -DSCALE_IN2=0.5
* @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
*/
-__kernel void comparison_op_quantized(
+__kernel void comparison_op_qasymm8(
TENSOR3D_DECLARATION(in1),
TENSOR3D_DECLARATION(in2),
TENSOR3D_DECLARATION(out))
Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
- int16 in_a = CONVERT(vload16(0, (__global uchar *)in1.ptr), int16);
- int16 in_b = CONVERT(vload16(0, (__global uchar *)in2.ptr), int16);
+ VEC_INT in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in1.ptr), VEC_INT);
+ VEC_INT in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in2.ptr), VEC_INT);
- in_a = SUB(in_a, (int16)((int)OFFSET_IN1));
- in_b = SUB(in_b, (int16)((int)OFFSET_IN2));
+ in_a = SUB(in_a, (VEC_INT)((int)OFFSET_IN1));
+ in_b = SUB(in_b, (VEC_INT)((int)OFFSET_IN2));
- const float16 in1f32 = convert_float16(in_a) * (float16)((float)SCALE_IN1);
- const float16 in2f32 = convert_float16(in_b) * (float16)((float)SCALE_IN2);
+ const VEC_FLOAT in1f32 = CONVERT(in_a, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN1);
+ const VEC_FLOAT in2f32 = CONVERT(in_b, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN2);
#if OPCODE == 0 //EQUAL QUANTIZED
- vstore16(CONVERT(in1f32 == in2f32, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global uchar *)out.ptr);
+ VSTORE(VEC_SIZE)(CONVERT(in1f32 == in2f32, VEC_OUT), 0, (__global DATA_TYPE_OUT *)out.ptr);
#elif OPCODE == 1 //NOT EQUAL QUANTIZED
- vstore16(CONVERT(in1f32 != in2f32, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)), 0, (__global uchar *)out.ptr);
+ VSTORE(VEC_SIZE)(CONVERT(in1f32 != in2f32, VEC_OUT), 0, (__global DATA_TYPE_OUT *)out.ptr);
#endif
}
+#endif // defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(VEC_SIZE)