{"normalization_layer_cross_map", "normalization_layer.cl"},
{"normalization_layer_in_map", "normalization_layer.cl"},
{"notequal", "notequal.cl"},
+ {"notequal_quantized", "notequal_quantized.cl"},
{"NV12_to_IYUV_bt709", "color_convert.cl"},
{"NV12_to_RGB888_bt709", "color_convert.cl"},
{"NV12_to_RGBA8888_bt709", "color_convert.cl"},
#include "./cl_kernels/notequal.clembed"
},
{
+ "notequal_quantized.cl",
+#include "./cl_kernels/notequal_quantized.clembed"
+ },
+ {
"pad.cl",
#include "./cl_kernels/pad.clembed"
},
--- /dev/null
+/*
+ * Copyright (c) 2018 Samsung Electronics Co., Ltd. All Rights Reserved
+ * Copyright (c) 2016-2018 ARM Limited.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include "helpers.h"
+#define SUB(x, y) (x) - (y)
+
+/** Checks if values in both tensors are not equal.
+ * @attention Offset and Scale of both input should be given as a preprocessor argument using -DOFFSET_IN1=int, -DOFFSET_IN2=int, -DSCALE_IN1=float and -DSCALE_IN2=float. e.g. -DOFFSET_IN1=1, -DOFFSET_IN2=0, -DSCALE_IN1=0.5, -DSCALE_IN2=0.5
+ * @attention Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
+ *
+ * @param[in] input1_ptr Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in] input1_stride_x Stride of the source tensor in X dimension (in bytes)
+ * @param[in] input1_step_x input1_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] input1_stride_y Stride of the source tensor in Y dimension (in bytes)
+ * @param[in] input1_step_y input1_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] input1_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] input1_step_z input1_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] input1_offset_first_element_in_bytes The offset of the first element in the source tensor
+ *
+ * @param[in] input2_ptr Pointer to the source tensor. Supported data types: QASYMM8
+ * @param[in] input2_stride_x Stride of the source tensor in X dimension (in bytes)
+ * @param[in] input2_step_x input2_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] input2_stride_y Stride of the source tensor in Y dimension (in bytes)
+ * @param[in] input2_step_y input2_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] input2_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] input2_step_z input2_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] input2_offset_first_element_in_bytes The offset of the first element in the source tensor
+ *
+ * @param[out] output_ptr Pointer to the destination tensor. Supported data types: QASYMM8
+ * @param[in] output_stride_x Stride of the destination tensor in X dimension (in bytes)
+ * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] output_stride_y Stride of the destination tensor in Y dimension (in bytes)
+ * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] output_stride_z Stride of the destination tensor in Z dimension (in bytes)
+ * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination tensor
+ */
+__kernel void notequal_quantized(
+ TENSOR3D_DECLARATION(in1),
+ TENSOR3D_DECLARATION(in2),
+ TENSOR3D_DECLARATION(out))
+{
+ // Get pixels pointer
+ Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1);
+ Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
+ Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
+
+ int16 in_a = CONVERT(vload16(0, (__global uchar *)in1.ptr), int16);
+ int16 in_b = CONVERT(vload16(0, (__global uchar *)in2.ptr), int16);
+
+ in_a = SUB(in_a, (int16)((int)OFFSET_IN1));
+ in_b = SUB(in_b, (int16)((int)OFFSET_IN2));
+
+ const float16 in1f32 = convert_float16(in_a) * (float16)((float)SCALE_IN1);
+ const float16 in2f32 = convert_float16(in_b) * (float16)((float)SCALE_IN2);
+ const float16 qresf32 = convert_float16((in1f32 != in2f32) ? 1 : 0);
+ const uchar16 res = convert_uchar16_sat(convert_int16_rte(qresf32));
+ // Store result
+ vstore16(res, 0, (__global uchar *)out.ptr);
+}
const TensorShape &out_shape =
TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape());
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16,
- DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16,
- DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
+ input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32, DataType::QASYMM8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
+ input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32, DataType::QASYMM8);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0,
"Inputs are not broadcast compatible");
build_opts.emplace(
("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_processed_per_iteration)));
+ if (is_data_type_quantized_asymmetric(input1->info()->data_type()))
+ {
+ build_opts.emplace("-DOFFSET_IN1=" +
+ support::cpp11::to_string(input1->info()->quantization_info().offset));
+ build_opts.emplace("-DOFFSET_IN2=" +
+ support::cpp11::to_string(input2->info()->quantization_info().offset));
+ build_opts.emplace("-DSCALE_IN1=" +
+ support::cpp11::to_string(input1->info()->quantization_info().scale));
+ build_opts.emplace("-DSCALE_IN2=" +
+ support::cpp11::to_string(input2->info()->quantization_info().scale));
+ kernel_name += "_quantized";
+ }
+
_kernel =
static_cast<cl::Kernel>(CLKernelLibraryEx::get().create_kernel(kernel_name, build_opts));