2 * Copyright (c) 2016, 2017 ARM Limited.
4 * SPDX-License-Identifier: MIT
6 * Permission is hereby granted, free of charge, to any person obtaining a copy
7 * of this software and associated documentation files (the "Software"), to
8 * deal in the Software without restriction, including without limitation the
9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
10 * sell copies of the Software, and to permit persons to whom the Software is
11 * furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in all
14 * copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 #include "arm_compute/core/NEON/kernels/NEDepthConvertKernel.h"
26 #include "arm_compute/core/Error.h"
27 #include "arm_compute/core/Helpers.h"
28 #include "arm_compute/core/ITensor.h"
29 #include "arm_compute/core/TensorInfo.h"
30 #include "arm_compute/core/Validate.h"
34 using namespace arm_compute;
39 } // namespace arm_compute
41 NEDepthConvertKernel::NEDepthConvertKernel()
42 : _policy(), _shift(0)
46 void NEDepthConvertKernel::configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
48 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32);
49 ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32);
50 ARM_COMPUTE_ERROR_ON(shift >= 8);
51 ARM_COMPUTE_ERROR_ON(input == output);
52 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == output->info()->data_type(), "Input and output data_types must be different");
54 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::S16 && output->info()->data_type() != DataType::U16
55 && output->info()->data_type() != DataType::U32
56 && output->info()->data_type() != DataType::S32),
57 "Only data_types supported [in] U8 -> [out] U16, S16, U32, S32");
59 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32
60 && output->info()->data_type() != DataType::S32),
61 "Only data_types supported [in] U16 -> [out] U8, U32, S32");
63 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32
64 && output->info()->data_type() != DataType::S32),
65 "Only data_types supported [in] S16 -> [out] U8, U32, S32");
67 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U32 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U16
68 && output->info()->data_type() != DataType::S16),
69 "Only data_types supported [in] S16 -> [out] U8, U16, S16");
71 ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S32 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U16
72 && output->info()->data_type() != DataType::S16),
73 "Only data_types supported [in] S16 -> [out] U8, U16, S16");
78 constexpr unsigned int num_elems_processed_per_iteration(16);
79 INESimpleKernel::configure(input, output, num_elems_processed_per_iteration);
82 void NEDepthConvertKernel::run(const Window &window)
84 ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
85 ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INESimpleKernel::window(), window);
86 ARM_COMPUTE_ERROR_ON(nullptr == _input);
87 ARM_COMPUTE_ERROR_ON(nullptr == _output);
88 ARM_COMPUTE_ERROR_ON(_input == _output);
90 Iterator input(_input, window);
91 Iterator output(_output, window);
93 switch(_input->info()->data_type())
97 const int16x8_t b = vdupq_n_s16(_shift);
99 switch(_output->info()->data_type())
103 /* Up-conversion U8 -> S16 */
104 execute_window_loop(window, [&](const Coordinates & id)
106 const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
108 const int16x8x2_t texels =
111 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b),
112 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b)
116 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), texels.val[0]);
117 vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, texels.val[1]);
124 /* Up-conversion S16 -> S32 */
125 execute_window_loop(window, [&](const Coordinates & id)
127 const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
129 const int16x8x2_t texels =
132 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b),
133 vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b)
137 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), vmovl_s16(vget_low_s16(texels.val[0])));
138 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, vmovl_s16(vget_high_s16(texels.val[0])));
139 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 8, vmovl_s16(vget_low_s16(texels.val[1])));
140 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 12, vmovl_s16(vget_high_s16(texels.val[1])));
147 /* Up-conversion U8 -> U16 */
148 execute_window_loop(window, [&](const Coordinates & id)
150 const uint8x16_t texels_u8 = vld1q_u8(input.ptr());
152 const uint16x8x2_t texels =
155 vshlq_u16(vmovl_u8(vget_low_u8(texels_u8)), b),
156 vshlq_u16(vmovl_u8(vget_high_u8(texels_u8)), b)
160 vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()), texels.val[0]);
161 vst1q_u16(reinterpret_cast<uint16_t *>(output.ptr()) + 8, texels.val[1]);
167 ARM_COMPUTE_ERROR("Output data type not supported");
173 switch(_output->info()->data_type())
177 const int16x8_t b = vdupq_n_s16(-static_cast<int16_t>(_shift));
179 /* Down-conversion S16 -> U8 */
180 if(ConvertPolicy::SATURATE == _policy)
182 execute_window_loop(window, [&](const Coordinates & id)
184 const int16x8x2_t texels =
187 vqshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
188 vqshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
192 vst1q_u8(output.ptr(), vcombine_u8(vqmovun_s16(texels.val[0]), vqmovun_s16(texels.val[1])));
198 execute_window_loop(window, [&](const Coordinates & id)
200 const int16x8x2_t texels =
203 vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
204 vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
208 vst1q_u8(output.ptr(), vcombine_u8(vmovn_u16(vreinterpretq_u16_s16(texels.val[0])),
209 vmovn_u16(vreinterpretq_u16_s16(texels.val[1]))));
217 const int16x8_t b = vdupq_n_s16(_shift);
219 /* Up-conversion S16 -> S32 */
220 execute_window_loop(window, [&](const Coordinates & id)
222 const int16x8x2_t texels =
225 vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
226 vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
230 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), vmovl_s16(vget_low_s16(texels.val[0])));
231 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, vmovl_s16(vget_high_s16(texels.val[0])));
232 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 8, vmovl_s16(vget_low_s16(texels.val[1])));
233 vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 12, vmovl_s16(vget_high_s16(texels.val[1])));
239 ARM_COMPUTE_ERROR("Output data type not supported");
245 switch(_output->info()->data_type())
249 const int16x8_t b = vdupq_n_s16(-static_cast<int16_t>(_shift));
251 /* Down-conversion U16 -> U8 */
252 if(ConvertPolicy::SATURATE == _policy)
254 execute_window_loop(window, [&](const Coordinates & id)
256 const uint16x8x2_t texels =
259 vqshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())), b),
260 vqshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8), b)
264 vst1q_u8(output.ptr(), vcombine_u8(vqmovn_u16(texels.val[0]), vqmovn_u16(texels.val[1])));
270 execute_window_loop(window, [&](const Coordinates & id)
272 const uint16x8x2_t texels =
275 vshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())), b),
276 vshlq_u16(vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8), b)
280 vst1q_u8(output.ptr(), vcombine_u8(vqmovn_u16(texels.val[0]), vqmovn_u16(texels.val[1])));
288 const int32x4_t b = vdupq_n_s32(_shift);
290 /* Up-conversion U16 -> U32 */
291 execute_window_loop(window, [&](const Coordinates & id)
293 const uint16x8x2_t texels =
296 vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr())),
297 vld1q_u16(reinterpret_cast<uint16_t *>(input.ptr()) + 8)
301 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()), vshlq_u32(vmovl_u16(vget_low_u16(texels.val[0])), b));
302 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 4, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[0])), b));
303 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 8, vshlq_u32(vmovl_u16(vget_low_u16(texels.val[1])), b));
304 vst1q_u32(reinterpret_cast<uint32_t *>(output.ptr()) + 12, vshlq_u32(vmovl_u16(vget_high_u16(texels.val[1])), b));
310 ARM_COMPUTE_ERROR("Output data type not supported");
315 ARM_COMPUTE_ERROR("Not supported");