uint32_t width = dims_[2];
uint32_t depth = dims_[3];
+ uint32_t src_data_size = get_data_size (src_type_);
+ uint32_t dst_data_size = get_data_size (dst_type_);
+ uint32_t npu_data_size;
+ if (to_npu_)
+ npu_data_size = dst_data_size;
+ else
+ npu_data_size = src_data_size;
+
+ uint32_t granularity = DATA_GRANULARITY;
+ uint32_t granularity_div = 1;
+
+ if (npu_tops_ == 2)
+ granularity_div *= 2;
+ if (npu_data_size == 2)
+ granularity_div *= 2;
+
+ granularity /= granularity_div;
+
bool input_image =
(to_npu_ && (depth == 1 || depth == 3));
bool layout_conversion =
- (!input_image && (depth != DATA_GRANULARITY) && (src_layout_ != dst_layout_));
+ (!input_image && (depth != granularity) && (src_layout_ != dst_layout_));
DECLARE_QUANTIZER(src_type_, dst_type_);
if (layout_conversion) {
- uint32_t MPA_L = DATA_GRANULARITY;
+ uint32_t MPA_L = granularity;
uint32_t std_offset;
uint32_t npu_offset;
uint32_t src_offset;
uint32_t dst_offset;
uint32_t slice_size;
- uint32_t src_data_size = get_data_size (src_type_);
- uint32_t dst_data_size = get_data_size (dst_type_);
/* NHWC-based */
for (uint32_t n = 0; n < batch; n++) {
{
public:
DataConverter (bool to_npu) :
- dims_ (nullptr), src_ (nullptr), dst_ (nullptr), size_ (0),
+ dims_ (nullptr), npu_tops_ (8), src_ (nullptr), dst_ (nullptr), size_ (0),
src_layout_ (DATA_LAYOUT_MODEL), dst_layout_ (DATA_LAYOUT_MODEL),
src_type_ (DATA_TYPE_MODEL), dst_type_ (DATA_TYPE_MODEL),
zero_ (0), scale_ (127.0), to_npu_ (to_npu) {}
scale_ = scale;
}
+ void setTops (uint32_t tops) {
+ npu_tops_ = tops;
+ }
+
private:
const uint32_t *dims_;
+ uint32_t npu_tops_;
void *src_;
void *dst_;
for (uint32_t rank_idx = 0; rank_idx < MAX_RANK; rank_idx++)
tensor_size *= dims[rank_idx];
+ uint32_t granularity = DATA_GRANULARITY;
+ uint32_t shift = DATA_GRANULARITY_SHIFT;
+ uint32_t div = 1;
+
+ if (elem_size == 2) {
+ div *= 2;
+ shift--;
+ }
+ granularity /= div;
+
/** special handling for TRIV */
if (layout == DATA_LAYOUT_SRNPU && dims[3] != 3 &&
- dims[3] % DATA_GRANULARITY != 0) {
+ dims[3] % granularity != 0) {
uint32_t depth;
- depth = (dims[3] + DATA_GRANULARITY - 1) >> DATA_GRANULARITY_SHIFT;
- depth = depth * DATA_GRANULARITY;
+ depth = (dims[3] + granularity - 1) >> shift;
+ depth = depth * granularity;
tensor_size /= dims[3];
tensor_size *= depth;
for (uint32_t rank_idx = 0; rank_idx < MAX_RANK; rank_idx++)
tensor_size *= dims[rank_idx];
+ uint32_t granularity = DATA_GRANULARITY;
+ uint32_t shift = DATA_GRANULARITY_SHIFT;
+ uint32_t div = 1;
+
+ if (elem_size == 2) {
+ div *= 2;
+ shift--;
+ }
+ granularity /= div;
+
/** special handling for TRIV */
if (layout == DATA_LAYOUT_SRNPU && dims[3] != 3 &&
- dims[3] % DATA_GRANULARITY != 0) {
+ dims[3] % granularity != 0) {
uint32_t depth;
- depth = (dims[3] + DATA_GRANULARITY - 1) >> DATA_GRANULARITY_SHIFT;
- depth = depth * DATA_GRANULARITY;
+ depth = (dims[3] + granularity - 1) >> shift;
+ depth = depth * granularity;
tensor_size /= dims[3];
tensor_size *= depth;
assert (idx < getInputNum ());
const uint32_t *dims = getInputDims (idx);
- uint32_t tensor_size = getInputElemSize (idx);
+ uint32_t elem_size = getInputElemSize (idx);
+ uint32_t tensor_size = elem_size;
uint32_t depth = dims[3];
bool image;
for (uint32_t rank_idx = 0; rank_idx < MAX_RANK; rank_idx++)
tensor_size *= dims[rank_idx];
+ uint32_t granularity = DATA_GRANULARITY;
+ uint32_t shift = DATA_GRANULARITY_SHIFT;
+ uint32_t div = 1;
+
+ if (getTops () == 2) {
+ div *= 2;
+ shift--;
+ }
+ if (elem_size == 2) {
+ div *= 2;
+ shift--;
+ }
+ granularity /= div;
image = (depth == 1 || depth == 3);
/** special handling for TRIV2 */
- if (layout == DATA_LAYOUT_TRIV2 && depth % DATA_GRANULARITY != 0 && !image) {
+ if (layout == DATA_LAYOUT_TRIV2 && depth % granularity != 0 && !image) {
uint32_t new_depth;
- new_depth = (depth + DATA_GRANULARITY - 1) >> DATA_GRANULARITY_SHIFT;
- new_depth = new_depth * DATA_GRANULARITY;
+ new_depth = (depth + granularity - 1) >> shift;
+ new_depth = new_depth * granularity;
tensor_size /= depth;
tensor_size *= new_depth;
assert (idx < getOutputNum ());
const uint32_t *dims = getOutputDims (idx);
- uint32_t tensor_size = getOutputElemSize (idx);
+ uint32_t elem_size = getOutputElemSize (idx);
+ uint32_t tensor_size = elem_size;
uint32_t depth = dims[3];
for (uint32_t rank_idx = 0; rank_idx < MAX_RANK; rank_idx++)
tensor_size *= dims[rank_idx];
+ uint32_t granularity = DATA_GRANULARITY;
+ uint32_t shift = DATA_GRANULARITY_SHIFT;
+ uint32_t div = 1;
+
+ if (getTops () == 2) {
+ div *= 2;
+ shift--;
+ }
+ if (elem_size == 2) {
+ div *= 2;
+ shift--;
+ }
+ granularity /= div;
+
/** special handling for TRIV2 */
- if (layout == DATA_LAYOUT_TRIV2 && depth % DATA_GRANULARITY != 0) {
+ if (layout == DATA_LAYOUT_TRIV2 && depth % granularity != 0) {
uint32_t new_depth;
- new_depth = (depth + DATA_GRANULARITY - 1) >> DATA_GRANULARITY_SHIFT;
- new_depth = new_depth * DATA_GRANULARITY;
+ new_depth = (depth + granularity - 1) >> shift;
+ new_depth = new_depth * granularity;
tensor_size /= depth;
tensor_size *= new_depth;