This patch supports data layout conversion between NCHW and TRIV2.
Signed-off-by: Dongju Chae <dongju.chae@samsung.com>
/**
* @brief check data format conversion capability
* @return true or false
- * @note Support only NHWC as the standard format
+ * @note Support NHWC/NCHW as the standard format
*/
bool
DataConverter::checkCapability () {
return true;
/* standard -> trinity */
- if ((src_layout_ == DATA_LAYOUT_NHWC) &&
+ if ((src_layout_ == DATA_LAYOUT_NHWC || src_layout_ == DATA_LAYOUT_NCHW) &&
(dst_layout_ == DATA_LAYOUT_TRIV || dst_layout_ == DATA_LAYOUT_TRIV2))
return true;
/* trinity -> standard */
if ((src_layout_ == DATA_LAYOUT_TRIV || src_layout_ == DATA_LAYOUT_TRIV2) &&
- (dst_layout_ == DATA_LAYOUT_NHWC))
+ (dst_layout_ == DATA_LAYOUT_NHWC || dst_layout_ == DATA_LAYOUT_NCHW))
return true;
return false;
granularity /= granularity_div;
- bool input_image = (to_npu_ && (depth == 1 || depth == 3));
- bool layout_conversion =
- (!input_image && (depth != granularity) && (src_layout_ != dst_layout_));
-
DECLARE_QUANTIZER (src_type_, dst_type_);
- if (layout_conversion) {
+ if (src_layout_ != dst_layout_) {
uint32_t MPA_L = granularity;
- uint32_t std_offset;
- uint32_t npu_offset;
uint32_t src_offset;
uint32_t dst_offset;
uint32_t slice_size;
- /* NHWC-based */
- for (uint32_t n = 0; n < batch; n++) {
- for (uint32_t h = 0; h < height; h++) {
- for (uint32_t w = 0; w < width; w++) {
- for (uint32_t d = 0; d < depth; d += MPA_L) {
- std_offset = d + depth * (w + width * (h + n * height));
- npu_offset = MPA_L * (w + width * (h + (n + d / MPA_L) * height));
- slice_size = (depth - d >= MPA_L) ? MPA_L : depth - d;
-
- if (to_npu_) {
- src_offset = std_offset * src_data_size;
- dst_offset = npu_offset * dst_data_size;
- } else {
- src_offset = npu_offset * src_data_size;
- dst_offset = std_offset * dst_data_size;
+ if (src_layout_ == DATA_LAYOUT_NHWC && dst_layout_ == DATA_LAYOUT_TRIV2) {
+ /* special handling: input image */
+ if (depth == 1 || depth == 3)
+ goto try_quantize;
+ /* special handling: depth == granularity */
+ if (depth == granularity)
+ goto try_quantize;
+ /* NHWC --> TRIV2 */
+ for (uint32_t n = 0; n < batch; n++) {
+ for (uint32_t h = 0; h < height; h++) {
+ for (uint32_t w = 0; w < width; w++) {
+ for (uint32_t d = 0; d < depth; d += MPA_L) {
+ src_offset = d + depth * (w + width * (h + n * height));
+ dst_offset = MPA_L * (w + width * (h + (n + d / MPA_L) * height));
+
+ src_offset *= src_data_size;
+ dst_offset *= dst_data_size;
+
+ slice_size = (depth - d >= MPA_L) ? MPA_L : depth - d;
+ slice_size *= src_data_size;
+
+ if (quantizer)
+ quantizer->memcpy (static_cast<char *> (dst_) + dst_offset,
+ static_cast<char *> (src_) + src_offset,
+ slice_size);
+ else
+ memcpy (static_cast<char *> (dst_) + dst_offset,
+ static_cast<char *> (src_) + src_offset, slice_size);
+ }
+ }
+ }
+ }
+ } else if (src_layout_ == DATA_LAYOUT_NCHW &&
+ dst_layout_ == DATA_LAYOUT_TRIV2) {
+ /* special handling: NHWC == NCHW */
+ if (depth == 1)
+ goto try_quantize;
+ if (depth == 3)
+ MPA_L = 3;
+ /* NCHW --> TRIV2 */
+ slice_size = src_data_size;
+ for (uint32_t n = 0; n < batch; n++) {
+ for (uint32_t d = 0; d < depth; d++) {
+ for (uint32_t h = 0; h < height; h++) {
+ for (uint32_t w = 0; w < width; w++) {
+ src_offset = w + width * (h + height * (d + n * depth));
+ dst_offset = (d % MPA_L) +
+ MPA_L * (w + width * (h + (n + d / MPA_L) * height));
+
+ src_offset *= src_data_size;
+ dst_offset *= dst_data_size;
+
+ if (quantizer)
+ quantizer->memcpy (static_cast<char *> (dst_) + dst_offset,
+ static_cast<char *> (src_) + src_offset,
+ slice_size);
+ else
+ memcpy (static_cast<char *> (dst_) + dst_offset,
+ static_cast<char *> (src_) + src_offset, slice_size);
+ }
+ }
+ }
+ }
+ } else if (src_layout_ == DATA_LAYOUT_TRIV2 &&
+ dst_layout_ == DATA_LAYOUT_NHWC) {
+ /* special handling: depth == granularity */
+ if (depth == granularity)
+ goto try_quantize;
+ /* TRIV2 --> NHWC */
+ for (uint32_t n = 0; n < batch; n++) {
+ for (uint32_t h = 0; h < height; h++) {
+ for (uint32_t w = 0; w < width; w++) {
+ for (uint32_t d = 0; d < depth; d += MPA_L) {
+ dst_offset = d + depth * (w + width * (h + n * height));
+ src_offset = MPA_L * (w + width * (h + (n + d / MPA_L) * height));
+
+ src_offset *= src_data_size;
+ dst_offset *= dst_data_size;
+
+ slice_size = (depth - d >= MPA_L) ? MPA_L : depth - d;
+ slice_size *= src_data_size;
+
+ if (quantizer)
+ quantizer->memcpy (static_cast<char *> (dst_) + dst_offset,
+ static_cast<char *> (src_) + src_offset,
+ slice_size);
+ else
+ memcpy (static_cast<char *> (dst_) + dst_offset,
+ static_cast<char *> (src_) + src_offset, slice_size);
+ }
+ }
+ }
+ }
+ } else if (src_layout_ == DATA_LAYOUT_TRIV2 &&
+ dst_layout_ == DATA_LAYOUT_NCHW) {
+ /* TRIV2 --> NCHW */
+ slice_size = src_data_size;
+ for (uint32_t n = 0; n < batch; n++) {
+ for (uint32_t d = 0; d < depth; d++) {
+ for (uint32_t h = 0; h < height; h++) {
+ for (uint32_t w = 0; w < width; w++) {
+ dst_offset = w + width * (h + height * (d + n * depth));
+ src_offset = (d % MPA_L) +
+ MPA_L * (w + width * (h + (n + d / MPA_L) * height));
+
+ src_offset *= src_data_size;
+ dst_offset *= dst_data_size;
+
+ if (quantizer)
+ quantizer->memcpy (static_cast<char *> (dst_) + dst_offset,
+ static_cast<char *> (src_) + src_offset,
+ slice_size);
+ else
+ memcpy (static_cast<char *> (dst_) + dst_offset,
+ static_cast<char *> (src_) + src_offset, slice_size);
}
-
- slice_size *= src_data_size;
-
- if (quantizer)
- quantizer->memcpy (static_cast<char *> (dst_) + dst_offset,
- static_cast<char *> (src_) + src_offset,
- slice_size);
- else
- memcpy (static_cast<char *> (dst_) + dst_offset,
- static_cast<char *> (src_) + src_offset, slice_size);
}
}
}
}
- } else if (quantizer) {
+ goto done;
+ }
+
+try_quantize:
+ if (quantizer) {
quantizer->memcpy (dst_, src_, size_);
} else {
memcpy (dst_, src_, size_);
}
+done:
if (quantizer)
delete quantizer;
void setDataDims (const uint32_t *dims) { dims_ = dims; }
void setDataLayout (data_layout src, data_layout dst) {
- src_layout_ = src;
- dst_layout_ = dst;
+ if (src == DATA_LAYOUT_NONE)
+ src_layout_ = DATA_LAYOUT_TRIV2;
+ else
+ src_layout_ = src;
+ if (dst == DATA_LAYOUT_NONE)
+ dst_layout_ = DATA_LAYOUT_TRIV2;
+ else
+ dst_layout_ = dst;
}
void setDataType (data_type src, data_type dst) {
if (info == nullptr)
return 0;
- converter.setDataLayout (info->layout, DATA_LAYOUT_TRIV2);
+ converter.setDataLayout (info->layout, meta->getInputSegmentLayout (idx));
converter.setDataType (info->type, meta->getInputQuantType (idx));
converter.setDataDims (meta->getInputDims (idx));
converter.setQuantZero (meta->getInputQuantZero (idx));
if (info == nullptr)
return 0;
- converter.setDataLayout (DATA_LAYOUT_TRIV2, info->layout);
+ converter.setDataLayout (meta->getOutputSegmentLayout (idx), info->layout);
converter.setDataType (meta->getOutputQuantType (idx), info->type);
converter.setDataDims (meta->getOutputDims (idx));
converter.setQuantZero (meta->getOutputQuantZero (idx));
virtual uint32_t getOutputSegmentIndex (uint32_t idx) const { return 0; }
virtual uint32_t getInputSegmentOffset (uint32_t idx) const { return 0; }
virtual uint32_t getOutputSegmentOffset (uint32_t idx) const { return 0; }
+ virtual data_layout getInputSegmentLayout (uint32_t idx) const {
+ return DATA_LAYOUT_NONE;
+ }
+ virtual data_layout getOutputSegmentLayout (uint32_t idx) const {
+ return DATA_LAYOUT_NONE;
+ }
virtual uint32_t getInputEmodY (uint32_t idx) const { return 0; }
virtual uint32_t getInputEmodZ (uint32_t idx) const { return 0; }
return meta_->output_seg_off[idx];
}
+ data_layout getInputSegmentLayout (uint32_t idx) const override {
+ return meta_->input_seg_layout[idx];
+ }
+ data_layout getOutputSegmentLayout (uint32_t idx) const override {
+ return meta_->output_seg_layout[idx];
+ }
+
private:
Metadata_v3 (npubin_meta *meta);
};
/* some layouts are not supported */
converter->setDataType (DATA_TYPE_QASYMM8, DATA_TYPE_QASYMM8);
- converter->setDataLayout (DATA_LAYOUT_NCHW, DATA_LAYOUT_TRIV2);
+ converter->setDataLayout (DATA_LAYOUT_TRIV, DATA_LAYOUT_TRIV2);
EXPECT_FALSE (converter->checkCapability ());
}
}
/**
+ * @brief Data conversion from NHWC to TRIV2
+ */
+TEST (ne_core_data_test, layout_conversion_from_nhwc) {
+ std::unique_ptr<DataConverter> converter (new DataConverter (true));
+
+ uint32_t N = 1, H = 10, W = 10;
+ uint32_t channels[] = {1, 3, 16, 32, 64};
+
+ for (auto C : channels) {
+ uint32_t data_dims[] = {N, H, W, C};
+ uint32_t MPA_L = 32;
+ if (C == 1 || C == 3)
+ MPA_L = C;
+
+ uint32_t src_size = N * H * W * C;
+ uint8_t *src_data = new uint8_t[src_size];
+
+ uint32_t dst_size = N * H * W * (((C + MPA_L - 1) / MPA_L) * MPA_L);
+ uint8_t *dst_data = new uint8_t[dst_size];
+
+ converter->setTops (2);
+ converter->setData (src_data, dst_data, src_size);
+ converter->setDataDims (data_dims);
+ converter->setDataLayout (DATA_LAYOUT_NHWC, DATA_LAYOUT_TRIV2);
+ converter->setDataType (DATA_TYPE_QASYMM8, DATA_TYPE_QASYMM8);
+
+ EXPECT_TRUE (converter->checkCapability ());
+
+ /* fill any data */
+ for (int i = 0; i < src_size; i++) src_data[i] = i;
+ for (int i = 0; i < dst_size; i++) dst_data[i] = 0;
+
+ EXPECT_EQ (converter->perform (), src_size);
+
+ uint32_t std_offset;
+ uint32_t npu_offset;
+ for (int n = 0; n < N; n++) {
+ for (int h = 0; h < H; h++) {
+ for (int w = 0; w < W; w++) {
+ for (int c = 0; c < C; c++) {
+ std_offset = c + C * (w + W * (h + n * H));
+ npu_offset =
+ c % MPA_L + MPA_L * (w + W * (h + (n + c / MPA_L) * H));
+ EXPECT_EQ (src_data[std_offset], dst_data[npu_offset]);
+ }
+ }
+ }
+ }
+
+ delete[] src_data;
+ delete[] dst_data;
+ }
+}
+
+/**
+ * @brief Data conversion from TRIV2 to NHWC
+ */
+TEST (ne_core_data_test, layout_conversion_to_nhwc) {
+ std::unique_ptr<DataConverter> converter (new DataConverter (false));
+
+ uint32_t N = 1, H = 10, W = 100;
+ uint32_t channels[] = {1, 3, 16, 32, 64};
+
+ for (auto C : channels) {
+ uint32_t data_dims[] = {N, H, W, C};
+ uint32_t MPA_L = 32;
+
+ uint32_t src_size = N * H * W * (((C + MPA_L - 1) / MPA_L) * MPA_L);
+ uint8_t *src_data = new uint8_t[src_size];
+
+ uint32_t dst_size = N * H * W * C;
+ uint8_t *dst_data = new uint8_t[dst_size];
+
+ converter->setTops (2);
+ converter->setData (src_data, dst_data, src_size);
+ converter->setDataDims (data_dims);
+ converter->setDataLayout (DATA_LAYOUT_TRIV2, DATA_LAYOUT_NHWC);
+ converter->setDataType (DATA_TYPE_QASYMM8, DATA_TYPE_QASYMM8);
+
+ EXPECT_TRUE (converter->checkCapability ());
+
+ /* fill any data */
+ for (int i = 0; i < src_size; i++) src_data[i] = i;
+ for (int i = 0; i < dst_size; i++) dst_data[i] = 0;
+
+ EXPECT_EQ (converter->perform (), src_size);
+
+ uint32_t std_offset;
+ uint32_t npu_offset;
+ for (int n = 0; n < N; n++) {
+ for (int h = 0; h < H; h++) {
+ for (int w = 0; w < W; w++) {
+ for (int c = 0; c < C; c++) {
+ std_offset = c + C * (w + W * (h + n * H));
+ npu_offset =
+ (c % MPA_L) + MPA_L * (w + W * (h + (n + c / MPA_L) * H));
+ EXPECT_EQ (src_data[npu_offset], dst_data[std_offset]);
+ }
+ }
+ }
+ }
+
+ delete[] src_data;
+ delete[] dst_data;
+ }
+}
+
+/**
+ * @brief Data conversion from NCHW to TRIV2
+ */
+TEST (ne_core_data_test, layout_conversion_from_nchw) {
+ std::unique_ptr<DataConverter> converter (new DataConverter (true));
+
+ uint32_t N = 1, H = 10, W = 10;
+ uint32_t channels[] = {1, 3, 16, 32, 64};
+
+ for (auto C : channels) {
+ uint32_t data_dims[] = {N, H, W, C};
+ uint32_t MPA_L = 32;
+ if (C == 1 || C == 3)
+ MPA_L = C;
+
+ uint32_t src_size = N * H * W * C;
+ uint8_t *src_data = new uint8_t[src_size];
+
+ uint32_t dst_size = N * H * W * (((C + MPA_L - 1) / MPA_L) * MPA_L);
+ uint8_t *dst_data = new uint8_t[dst_size];
+
+ converter->setTops (2);
+ converter->setData (src_data, dst_data, src_size);
+ converter->setDataDims (data_dims);
+ converter->setDataLayout (DATA_LAYOUT_NCHW, DATA_LAYOUT_TRIV2);
+ converter->setDataType (DATA_TYPE_QASYMM8, DATA_TYPE_QASYMM8);
+
+ EXPECT_TRUE (converter->checkCapability ());
+
+ /* fill any data */
+ for (int i = 0; i < src_size; i++) src_data[i] = i;
+ for (int i = 0; i < dst_size; i++) dst_data[i] = 0;
+
+ EXPECT_EQ (converter->perform (), src_size);
+
+ uint32_t std_offset;
+ uint32_t npu_offset;
+ for (int n = 0; n < N; n++) {
+ for (int c = 0; c < C; c++) {
+ for (int h = 0; h < H; h++) {
+ for (int w = 0; w < W; w++) {
+ std_offset = w + W * (h + H * (c + n * C));
+ npu_offset =
+ (c % MPA_L) + MPA_L * (w + W * (h + (n + c / MPA_L) * H));
+ EXPECT_EQ (src_data[std_offset], dst_data[npu_offset]);
+ }
+ }
+ }
+ }
+
+ delete[] src_data;
+ delete[] dst_data;
+ }
+}
+
+/**
+ * @brief Data conversion from TRIV2 to NCHW
+ */
+TEST (ne_core_data_test, layout_conversion_to_nchw) {
+ std::unique_ptr<DataConverter> converter (new DataConverter (false));
+
+ uint32_t N = 1, H = 10, W = 10;
+ uint32_t channels[] = {1, 3, 16, 32, 64};
+
+ for (auto C : channels) {
+ uint32_t data_dims[] = {N, H, W, C};
+ uint32_t MPA_L = 32;
+
+ uint32_t src_size = N * H * W * (((C + MPA_L - 1) / MPA_L) * MPA_L);
+ uint8_t *src_data = new uint8_t[src_size];
+
+ uint32_t dst_size = N * H * W * C;
+ uint8_t *dst_data = new uint8_t[dst_size];
+
+ converter->setTops (2);
+ converter->setData (src_data, dst_data, src_size);
+ converter->setDataDims (data_dims);
+ converter->setDataLayout (DATA_LAYOUT_TRIV2, DATA_LAYOUT_NCHW);
+ converter->setDataType (DATA_TYPE_QASYMM8, DATA_TYPE_QASYMM8);
+
+ EXPECT_TRUE (converter->checkCapability ());
+
+ /* fill any data */
+ for (int i = 0; i < src_size; i++) src_data[i] = i;
+ for (int i = 0; i < dst_size; i++) dst_data[i] = 0;
+
+ EXPECT_EQ (converter->perform (), src_size);
+
+ uint32_t std_offset;
+ uint32_t npu_offset;
+ for (int n = 0; n < N; n++) {
+ for (int c = 0; c < C; c++) {
+ for (int h = 0; h < H; h++) {
+ for (int w = 0; w < W; w++) {
+ std_offset = w + W * (h + H * (c + n * C));
+ npu_offset =
+ (c % MPA_L) + MPA_L * (w + W * (h + (n + c / MPA_L) * H));
+ EXPECT_EQ (src_data[npu_offset], dst_data[std_offset]);
+ }
+ }
+ }
+ }
+
+ delete[] src_data;
+ delete[] dst_data;
+ }
+}
+
+/**
* @brief main function for unit test
*/
int