[npubinfmt/triv2] NPU binary format for TRIV2 models

author Dongju Chae <dongju.chae@samsung.com>

Wed, 11 Mar 2020 11:17:53 +0000 (20:17 +0900)

committer 송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>

Wed, 8 Apr 2020 01:54:23 +0000 (10:54 +0900)
author Dongju Chae <dongju.chae@samsung.com>
Wed, 11 Mar 2020 11:17:53 +0000 (20:17 +0900)
committer 송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>
Wed, 8 Apr 2020 01:54:23 +0000 (10:54 +0900)
diff --git a/include/common/npubinfmt.h b/include/common/npubinfmt.h

index 38e59ef..8644412 100644 (file)
--- a/include/common/npubinfmt.h
+++ b/include/common/npubinfmt.h
@@ -29,6 +29,7 @@
  /* tensor data constrains */
  #define MAX_TENSORS (16)
  #define MAX_RANK    (4)
+#define MAX_SEGMENTS (256) /* 8-bit segment indexing */
  #define DATA_GRANULARITY (64) /* MPA_L */
  
  /* npubinfmt magiccode macros */
@@ -148,13 +149,51 @@ typedef struct {
            uint32_t output_quant_z[MAX_TENSORS];        /**< output quantization parameter (zero-point) */
            float    output_quant_s[MAX_TENSORS];        /**< output quantization parameter (scale) */
          };
+        struct {
+          /**
+           * npubinfmt v3; this format supports TRIV2 models.
+           * segment_num should be equal to or greater than input_seg_num and output_seg_num.
+           * Also, each tensor size should be fit within the corresponding segment.
+           * FYI, TRIV2 supports two quantization methods: QASYMM8 and QSYMM16.
+           */
+
+          /** segments to hold input/weight/output data */
+          uint32_t segment_num;                             /**< number of total segments */
+          uint32_t segment_size[MAX_SEGMENTS];              /**< size of each segment */
+
+          /** model weight segment */
+          uint32_t weight_seg_idx;                          /**< segment index for model weight */
+
+          /** input tensor segment */
+          uint32_t input_seg_num;                           /**< Number of input tensors (<= MAX_TENSORS) */
+          uint32_t input_seg_idx[MAX_TENSORS];              /**< segment index for input tensors */
+          uint32_t input_seg_dims[MAX_TENSORS][MAX_RANK];   /**< input tensor dimensions (N, H, W, C) */
+
+          uint32_t input_seg_emod_y[MAX_TENSORS];           /**< input tensor addressing info (emod_y) */
+          uint32_t input_seg_emod_z[MAX_TENSORS];           /**< input tensor addressing info (emod_z) */
+
+          data_type input_seg_quant_type[MAX_TENSORS];      /**< input quantization data type */
+          int32_t input_seg_quant_z[MAX_TENSORS];           /**< input quantization parameter (zero-point) */
+          float input_seg_quant_s[MAX_TENSORS];             /**< input quantization parameter (scale) */
+
+          /** output tensor segment */
+          uint32_t output_seg_num;                          /**< Number of output tensors (<= MAX_TENSORS) */
+          uint32_t output_seg_idx[MAX_TENSORS];             /**< segment index for output tensors */
+          uint32_t output_seg_dims[MAX_TENSORS][MAX_RANK];  /**< output tensor dimensions (N, H, W, C) */
+
+          uint32_t output_seg_emod_y[MAX_TENSORS];          /**< output tensor addressing info (emod_y) */
+          uint32_t output_seg_emod_z[MAX_TENSORS];          /**< output tensor addressing info (emod_z) */
+
+          data_type output_seg_quant_type[MAX_TENSORS];     /**< output quantization data type */
+          int32_t output_seg_quant_z[MAX_TENSORS];          /**< output quantization parameter (zero-point) */
+          float output_seg_quant_s[MAX_TENSORS];            /**< output quantization parameter (scale) */
+        };
          /** If npubinfmt should be changed, append here as another version */
        };
      };
-    char reserved_npu_engine[2048]; /**< Ensure NPU-Engine part is 2048 bytes */
+    char reserved_npu_engine[3072]; /**< Ensure NPU-Engine part is 3072 bytes */
    };
-
-  char reserved_compiler[2048]; /**< Reserved for NPU Compiler */
+  char reserved_compiler[1024]; /**< Reserved for NPU Compiler */
    char reserved_extra[];      /**< Reserved for future; zero-length array */
  } __attribute__((packed, aligned)) npubin_meta;
  
diff --git a/include/common/typedef.h b/include/common/typedef.h

index 94a6332..d750c99 100644 (file)
--- a/include/common/typedef.h
+++ b/include/common/typedef.h
@@ -65,10 +65,12 @@ typedef enum {
   */
  typedef enum {
    /* 8 bits */
-  DATA_TYPE_SRNPU = 0,  /**< quantized for integer-arithmetic-only inference */
+  DATA_TYPE_SRNPU = 0,  /**< integer-arithmetic-only quantization (TRIV1) */
+  DATA_TYPE_QASYMM8,   /**< 8-bit asymmetric quantization (TRIV2) */
    DATA_TYPE_INT8,
    DATA_TYPE_UINT8,
    /* 16 bits */
+  DATA_TYPE_QSYMM16,   /**< 16-bit symmetric quantization (TRIV2) */
    DATA_TYPE_INT16,
    DATA_TYPE_UINT16,
    /* 32 bits */
author	Dongju Chae <dongju.chae@samsung.com>
	Wed, 11 Mar 2020 11:17:53 +0000 (20:17 +0900)
committer	송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>
	Wed, 8 Apr 2020 01:54:23 +0000 (10:54 +0900)
include/common/npubinfmt.h		patch \| blob \| history
include/common/typedef.h		patch \| blob \| history