3 * Copyright (C) 2020 Samsung Electronics
4 * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
9 * @brief Data format converter for NPU Engine (NE) users.
10 * @author Dongju Chae <dongju.chae@samsung.com>
11 * @bug No known bugs except for NYI items
20 #define switch_npu_type(S, n) \
22 case DATA_TYPE_SRNPU: \
24 case DATA_TYPE_QASYMM8: \
25 quantizer = new QuantizerImpl<S, uint8_t>; \
27 case DATA_TYPE_QSYMM16: \
28 quantizer = new QuantizerImpl<S, int16_t>; \
31 logerr (TAG, "Unsupported datatype %d\n", n); \
35 #define switch_std_npu_type(s, n) \
37 case DATA_TYPE_INT8: \
38 switch_npu_type (int8_t, n); \
40 case DATA_TYPE_UINT8: \
41 switch_npu_type (uint8_t, n); \
43 case DATA_TYPE_INT16: \
44 switch_npu_type (int16_t, n); \
46 case DATA_TYPE_UINT16: \
47 switch_npu_type (uint16_t, n); \
49 case DATA_TYPE_INT32: \
50 switch_npu_type (int32_t, n); \
52 case DATA_TYPE_UINT32: \
53 switch_npu_type (uint32_t, n); \
55 case DATA_TYPE_INT64: \
56 switch_npu_type (int64_t, n); \
58 case DATA_TYPE_UINT64: \
59 switch_npu_type (uint64_t, n); \
61 case DATA_TYPE_FLOAT32: \
62 switch_npu_type (float, n); \
64 case DATA_TYPE_FLOAT64: \
65 switch_npu_type (double, n); \
68 logerr (TAG, "Unsupported datatype %d\n", s); \
72 #define DECLARE_QUANTIZER(st, dt) \
73 Quantizer *quantizer = nullptr; \
74 if (get_data_size (st) != get_data_size (dt)) { \
76 switch_std_npu_type (st, dt) else switch_std_npu_type (dt, st) \
77 quantizer->set_direction (to_npu_); \
78 quantizer->set_quant (zero_, scale_); \
81 /** @brief The base class for quantization */
84 Quantizer () : to_npu_ (true), zero_ (0), scale_ (0.0) {}
85 virtual ~Quantizer () {}
87 void set_direction (bool to_npu) { to_npu_ = to_npu; }
89 void set_quant (uint32_t zero, float scale) {
94 virtual void memcpy (void *dst, void *src, size_t size) {}
102 /** @brief The derived class for quantization with various data types */
103 template <typename S, typename N>
104 class QuantizerImpl : public Quantizer {
109 void memcpy (void *dst, void *src, size_t size);
112 void quantized_memcpy (void *dst, void *src, uint32_t num);
113 void dequantized_memcpy (void *dst, void *src, uint32_t num);
116 /** @brief memory copy wrapper for quantization */
117 template <typename S, typename N>
119 QuantizerImpl<S, N>::memcpy (void *dst, void *src, size_t size) {
121 quantized_memcpy (dst, src, size / sizeof (S));
123 dequantized_memcpy (dst, src, size / sizeof (N));
126 /** @brief quantized memory copy */
127 template <typename S, typename N>
129 QuantizerImpl<S, N>::quantized_memcpy (void *dst, void *src, uint32_t num) {
130 double scale = (double) scale_;
131 double zero = (double) zero_;
132 double min, max, val;
134 if (!src || !dst || num == 0 || scale == 0.0) {
135 logerr (TAG, "Invalid parameter detected\n");
139 switch (sizeof (N)) {
149 logerr (TAG, "Unsupported quantization size: %d\n", sizeof (N));
155 val = ((S *) src)[idx];
158 val = (val > max) ? max : val;
159 val = (val < min) ? min : val;
160 ((N *) dst)[idx++] = (N) val;
164 /** @brief dequantized memory copy */
165 template <typename S, typename N>
167 QuantizerImpl<S, N>::dequantized_memcpy (void *dst, void *src, uint32_t num) {
168 double scale = (double) scale_;
169 double zero = (double) zero_;
172 if (!src || !dst || num == 0) {
173 logerr (TAG, "Invalid parameter detected\n");
177 switch (sizeof (N)) {
181 zero = 0; /* ignored */
184 logerr (TAG, "Unsupported quantization size: %d\n", sizeof (N));
190 val = ((N *) src)[idx];
193 ((S *) dst)[idx++] = (S) val;
198 * @brief check data format conversion capability
199 * @return true or false
200 * @note Support NHWC/NCHW as the standard format
203 DataConverter::checkCapability () {
204 /* if not resolved yet */
205 if (src_layout_ == DATA_LAYOUT_MODEL || dst_layout_ == DATA_LAYOUT_MODEL)
208 if (src_type_ == DATA_TYPE_MODEL || dst_type_ == DATA_TYPE_MODEL)
212 if (src_layout_ == dst_layout_)
216 if (src_layout_ == DATA_LAYOUT_RAW || dst_layout_ == DATA_LAYOUT_RAW)
219 /* standard -> trinity */
220 if ((src_layout_ == DATA_LAYOUT_NHWC || src_layout_ == DATA_LAYOUT_NCHW) &&
221 (dst_layout_ == DATA_LAYOUT_TRIV || dst_layout_ == DATA_LAYOUT_TRIV2))
224 /* trinity -> standard */
225 if ((src_layout_ == DATA_LAYOUT_TRIV || src_layout_ == DATA_LAYOUT_TRIV2) &&
226 (dst_layout_ == DATA_LAYOUT_NHWC || dst_layout_ == DATA_LAYOUT_NCHW))
233 * @brief check layout conversion is required
234 * @return true if required. otherwise false
237 DataConverter::needLayoutConversion () {
238 /* don't care about DATA_LAYOUT_RAW */
239 if (to_npu_ && dst_layout_ == DATA_LAYOUT_RAW)
242 if (!to_npu_ && src_layout_ == DATA_LAYOUT_RAW)
245 return src_layout_ != dst_layout_;
249 * @brief convert data format, copying the converted source data to destination.
250 * @return the number of bytes copied
253 DataConverter::perform () {
254 /* check parameters first */
255 if (!src_ || !dst_ || size_ == 0 || !dims_) {
256 logerr (TAG, "Invalid parameter detected");
260 if (!checkCapability ()) {
261 logerr (TAG, "Unable to pass capability check");
265 DECLARE_QUANTIZER (src_type_, dst_type_);
267 if (needLayoutConversion ()) {
268 /* trinity device assumes the NHWC-based layout */
269 uint32_t batch = dims_[0];
270 uint32_t height = dims_[1];
271 uint32_t width = dims_[2];
272 uint32_t depth = dims_[3];
274 uint32_t src_data_size = get_data_size (src_type_);
275 uint32_t dst_data_size = get_data_size (dst_type_);
276 uint32_t npu_data_size;
278 npu_data_size = dst_data_size;
280 npu_data_size = src_data_size;
282 uint32_t granularity = DATA_GRANULARITY;
283 uint32_t granularity_div = 1;
286 granularity_div *= 2;
287 if (npu_data_size == 2)
288 granularity_div *= 2;
290 granularity /= granularity_div;
292 uint32_t MPA_L = granularity;
297 if (src_layout_ == DATA_LAYOUT_NHWC && dst_layout_ == DATA_LAYOUT_TRIV2) {
298 /* special handling: input image */
299 if (depth == 1 || depth == 3)
301 /* special handling: depth == granularity */
302 if (depth == granularity)
305 for (uint32_t n = 0; n < batch; n++) {
306 for (uint32_t h = 0; h < height; h++) {
307 for (uint32_t w = 0; w < width; w++) {
308 for (uint32_t d = 0; d < depth; d += MPA_L) {
309 src_offset = d + depth * (w + width * (h + n * height));
310 dst_offset = MPA_L * (w + width * (h + (n + d / MPA_L) * height));
312 src_offset *= src_data_size;
313 dst_offset *= dst_data_size;
315 slice_size = (depth - d >= MPA_L) ? MPA_L : depth - d;
316 slice_size *= src_data_size;
319 quantizer->memcpy (static_cast<char *> (dst_) + dst_offset,
320 static_cast<char *> (src_) + src_offset,
323 memcpy (static_cast<char *> (dst_) + dst_offset,
324 static_cast<char *> (src_) + src_offset, slice_size);
329 } else if (src_layout_ == DATA_LAYOUT_NCHW &&
330 dst_layout_ == DATA_LAYOUT_TRIV2) {
331 /* special handling: NHWC == NCHW */
337 slice_size = src_data_size;
338 for (uint32_t n = 0; n < batch; n++) {
339 for (uint32_t d = 0; d < depth; d++) {
340 for (uint32_t h = 0; h < height; h++) {
341 for (uint32_t w = 0; w < width; w++) {
342 src_offset = w + width * (h + height * (d + n * depth));
343 dst_offset = (d % MPA_L) +
344 MPA_L * (w + width * (h + (n + d / MPA_L) * height));
346 src_offset *= src_data_size;
347 dst_offset *= dst_data_size;
350 quantizer->memcpy (static_cast<char *> (dst_) + dst_offset,
351 static_cast<char *> (src_) + src_offset,
354 memcpy (static_cast<char *> (dst_) + dst_offset,
355 static_cast<char *> (src_) + src_offset, slice_size);
360 } else if (src_layout_ == DATA_LAYOUT_TRIV2 &&
361 dst_layout_ == DATA_LAYOUT_NHWC) {
362 /* special handling: depth == granularity */
363 if (depth == granularity)
366 for (uint32_t n = 0; n < batch; n++) {
367 for (uint32_t h = 0; h < height; h++) {
368 for (uint32_t w = 0; w < width; w++) {
369 for (uint32_t d = 0; d < depth; d += MPA_L) {
370 dst_offset = d + depth * (w + width * (h + n * height));
371 src_offset = MPA_L * (w + width * (h + (n + d / MPA_L) * height));
373 src_offset *= src_data_size;
374 dst_offset *= dst_data_size;
376 slice_size = (depth - d >= MPA_L) ? MPA_L : depth - d;
377 slice_size *= src_data_size;
380 quantizer->memcpy (static_cast<char *> (dst_) + dst_offset,
381 static_cast<char *> (src_) + src_offset,
384 memcpy (static_cast<char *> (dst_) + dst_offset,
385 static_cast<char *> (src_) + src_offset, slice_size);
390 } else if (src_layout_ == DATA_LAYOUT_TRIV2 &&
391 dst_layout_ == DATA_LAYOUT_NCHW) {
393 slice_size = src_data_size;
394 for (uint32_t n = 0; n < batch; n++) {
395 for (uint32_t d = 0; d < depth; d++) {
396 for (uint32_t h = 0; h < height; h++) {
397 for (uint32_t w = 0; w < width; w++) {
398 dst_offset = w + width * (h + height * (d + n * depth));
399 src_offset = (d % MPA_L) +
400 MPA_L * (w + width * (h + (n + d / MPA_L) * height));
402 src_offset *= src_data_size;
403 dst_offset *= dst_data_size;
406 quantizer->memcpy (static_cast<char *> (dst_) + dst_offset,
407 static_cast<char *> (src_) + src_offset,
410 memcpy (static_cast<char *> (dst_) + dst_offset,
411 static_cast<char *> (src_) + src_offset, slice_size);
422 quantizer->memcpy (dst_, src_, size_);
424 memcpy (dst_, src_, size_);