/* tensor data constrains */
#define MAX_TENSORS (16)
#define MAX_RANK (4)
+#define MAX_SEGMENTS (256) /* 8-bit segment indexing */
#define DATA_GRANULARITY (64) /* MPA_L */
/* npubinfmt magiccode macros */
uint32_t output_quant_z[MAX_TENSORS]; /**< output quantization parameter (zero-point) */
float output_quant_s[MAX_TENSORS]; /**< output quantization parameter (scale) */
};
+ struct {
+ /**
+ * npubinfmt v3; this format supports TRIV2 models.
+ * segment_num should be equal to or greater than input_seg_num and output_seg_num.
+ * Also, each tensor size should be fit within the corresponding segment.
+ * FYI, TRIV2 supports two quantization methods: QASYMM8 and QSYMM16.
+ */
+
+ /** segments to hold input/weight/output data */
+ uint32_t segment_num; /**< number of total segments */
+ uint32_t segment_size[MAX_SEGMENTS]; /**< size of each segment */
+
+ /** model weight segment */
+ uint32_t weight_seg_idx; /**< segment index for model weight */
+
+ /** input tensor segment */
+ uint32_t input_seg_num; /**< Number of input tensors (<= MAX_TENSORS) */
+ uint32_t input_seg_idx[MAX_TENSORS]; /**< segment index for input tensors */
+ uint32_t input_seg_dims[MAX_TENSORS][MAX_RANK]; /**< input tensor dimensions (N, H, W, C) */
+
+ uint32_t input_seg_emod_y[MAX_TENSORS]; /**< input tensor addressing info (emod_y) */
+ uint32_t input_seg_emod_z[MAX_TENSORS]; /**< input tensor addressing info (emod_z) */
+
+ data_type input_seg_quant_type[MAX_TENSORS]; /**< input quantization data type */
+ int32_t input_seg_quant_z[MAX_TENSORS]; /**< input quantization parameter (zero-point) */
+ float input_seg_quant_s[MAX_TENSORS]; /**< input quantization parameter (scale) */
+
+ /** output tensor segment */
+ uint32_t output_seg_num; /**< Number of output tensors (<= MAX_TENSORS) */
+ uint32_t output_seg_idx[MAX_TENSORS]; /**< segment index for output tensors */
+ uint32_t output_seg_dims[MAX_TENSORS][MAX_RANK]; /**< output tensor dimensions (N, H, W, C) */
+
+ uint32_t output_seg_emod_y[MAX_TENSORS]; /**< output tensor addressing info (emod_y) */
+ uint32_t output_seg_emod_z[MAX_TENSORS]; /**< output tensor addressing info (emod_z) */
+
+ data_type output_seg_quant_type[MAX_TENSORS]; /**< output quantization data type */
+ int32_t output_seg_quant_z[MAX_TENSORS]; /**< output quantization parameter (zero-point) */
+ float output_seg_quant_s[MAX_TENSORS]; /**< output quantization parameter (scale) */
+ };
/** If npubinfmt should be changed, append here as another version */
};
};
- char reserved_npu_engine[2048]; /**< Ensure NPU-Engine part is 2048 bytes */
+ char reserved_npu_engine[3072]; /**< Ensure NPU-Engine part is 3072 bytes */
};
-
- char reserved_compiler[2048]; /**< Reserved for NPU Compiler */
+ char reserved_compiler[1024]; /**< Reserved for NPU Compiler */
char reserved_extra[]; /**< Reserved for future; zero-length array */
} __attribute__((packed, aligned)) npubin_meta;