[2TOPS/Fix] Use dynamic data granularity

author Dongju Chae <dongju.chae@samsung.com>

Wed, 30 Dec 2020 01:24:51 +0000 (10:24 +0900)

committer 송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>

Mon, 4 Jan 2021 02:47:09 +0000 (11:47 +0900)
author Dongju Chae <dongju.chae@samsung.com>
Wed, 30 Dec 2020 01:24:51 +0000 (10:24 +0900)
committer 송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>
Mon, 4 Jan 2021 02:47:09 +0000 (11:47 +0900)
diff --git a/include/common/npubinfmt.h b/include/common/npubinfmt.h

index 3df3981..ad08ca4 100644 (file)
--- a/include/common/npubinfmt.h
+++ b/include/common/npubinfmt.h
@@ -46,7 +46,7 @@ static inline uint64_t NPUBIN_META_TOTAL_SIZE (uint64_t magiccode) {
  #define MAX_TENSORS (16)
  #define MAX_RANK    (4)
  #define MAX_SEGMENTS (256) /* 8-bit segment indexing */
-#define DATA_GRANULARITY (64) /* MPA_L */
+#define DATA_GRANULARITY (64) /* Default MPA_L in TRIV1 and 8-TOPS TRIV2 */
  #define DATA_GRANULARITY_SHIFT (6)
  
  /* npubinfmt magiccode macros */
diff --git a/src/core/ne-data.cc b/src/core/ne-data.cc

index 30b73bd..98d016a 100644 (file)
--- a/src/core/ne-data.cc
+++ b/src/core/ne-data.cc
@@ -247,22 +247,38 @@ DataConverter::perform ()
    uint32_t width = dims_[2];
    uint32_t depth = dims_[3];
  
+  uint32_t src_data_size = get_data_size (src_type_);
+  uint32_t dst_data_size = get_data_size (dst_type_);
+  uint32_t npu_data_size;
+  if (to_npu_)
+    npu_data_size = dst_data_size;
+  else
+    npu_data_size = src_data_size;
+
+  uint32_t granularity = DATA_GRANULARITY;
+  uint32_t granularity_div = 1;
+
+  if (npu_tops_ == 2)
+    granularity_div *= 2;
+  if (npu_data_size == 2)
+    granularity_div *= 2;
+
+  granularity /= granularity_div;
+
    bool input_image =
      (to_npu_ && (depth == 1 || depth == 3));
    bool layout_conversion =
-    (!input_image && (depth != DATA_GRANULARITY) && (src_layout_ != dst_layout_));
+    (!input_image && (depth != granularity) && (src_layout_ != dst_layout_));
  
    DECLARE_QUANTIZER(src_type_, dst_type_);
  
    if (layout_conversion) {
-    uint32_t MPA_L = DATA_GRANULARITY;
+    uint32_t MPA_L = granularity;
      uint32_t std_offset;
      uint32_t npu_offset;
      uint32_t src_offset;
      uint32_t dst_offset;
      uint32_t slice_size;
-    uint32_t src_data_size = get_data_size (src_type_);
-    uint32_t dst_data_size = get_data_size (dst_type_);
  
      /* NHWC-based */
      for (uint32_t n = 0; n < batch; n++) {
diff --git a/src/core/ne-data.h b/src/core/ne-data.h

index e7a34f1..7febe77 100644 (file)
--- a/src/core/ne-data.h
+++ b/src/core/ne-data.h
@@ -22,7 +22,7 @@ class DataConverter
  {
    public:
      DataConverter (bool to_npu) :
-      dims_ (nullptr), src_ (nullptr), dst_ (nullptr), size_ (0),
+      dims_ (nullptr), npu_tops_ (8), src_ (nullptr), dst_ (nullptr), size_ (0),
        src_layout_ (DATA_LAYOUT_MODEL), dst_layout_ (DATA_LAYOUT_MODEL),
        src_type_ (DATA_TYPE_MODEL), dst_type_ (DATA_TYPE_MODEL),
        zero_ (0), scale_ (127.0), to_npu_ (to_npu) {}
@@ -58,8 +58,13 @@ class DataConverter
        scale_ = scale;
      }
  
+    void setTops (uint32_t tops) {
+      npu_tops_ = tops;
+    }
+
    private:
      const uint32_t *dims_;
+    uint32_t npu_tops_;
  
      void *src_;
      void *dst_;
diff --git a/src/core/ne-handler.cc b/src/core/ne-handler.cc

index 195dcce..1662905 100644 (file)
--- a/src/core/ne-handler.cc
+++ b/src/core/ne-handler.cc
@@ -1486,11 +1486,11 @@ size_t
  TrinityVision2::manipulateData (const Model *model, uint32_t idx, bool is_input,
      void *dst, void *src, size_t size)
  {
-  const Metadata *meta = model->getMetadata();
+  const Metadata *meta = model->getMetadata ();
    DataConverter converter (is_input);
  
    converter.setData (src, dst, size);
-
+  converter.setTops (meta->getTops ());
    if (is_input) {
      const tensor_data_info* info = model->getInputDataInfo (idx);
      if (info == nullptr)
diff --git a/src/core/ne-model.cc b/src/core/ne-model.cc

index 0244136..dd7e08d 100644 (file)
--- a/src/core/ne-model.cc
+++ b/src/core/ne-model.cc
@@ -143,13 +143,23 @@ Metadata_v2::getInputTensorSize (uint32_t idx, data_layout layout) const
    for (uint32_t rank_idx = 0; rank_idx < MAX_RANK; rank_idx++)
      tensor_size *= dims[rank_idx];
  
+  uint32_t granularity = DATA_GRANULARITY;
+  uint32_t shift = DATA_GRANULARITY_SHIFT;
+  uint32_t div = 1;
+
+  if (elem_size == 2) {
+    div *= 2;
+    shift--;
+  }
+  granularity /= div;
+
    /** special handling for TRIV */
    if (layout == DATA_LAYOUT_SRNPU && dims[3] != 3 &&
-      dims[3] % DATA_GRANULARITY != 0) {
+      dims[3] % granularity != 0) {
      uint32_t depth;
  
-    depth = (dims[3] + DATA_GRANULARITY - 1) >> DATA_GRANULARITY_SHIFT;
-    depth = depth * DATA_GRANULARITY;
+    depth = (dims[3] + granularity - 1) >> shift;
+    depth = depth * granularity;
  
      tensor_size /= dims[3];
      tensor_size *= depth;
@@ -171,13 +181,23 @@ Metadata_v2::getOutputTensorSize (uint32_t idx, data_layout layout) const
    for (uint32_t rank_idx = 0; rank_idx < MAX_RANK; rank_idx++)
      tensor_size *= dims[rank_idx];
  
+  uint32_t granularity = DATA_GRANULARITY;
+  uint32_t shift = DATA_GRANULARITY_SHIFT;
+  uint32_t div = 1;
+
+  if (elem_size == 2) {
+    div *= 2;
+    shift--;
+  }
+  granularity /= div;
+
    /** special handling for TRIV */
    if (layout == DATA_LAYOUT_SRNPU && dims[3] != 3 &&
-      dims[3] % DATA_GRANULARITY != 0) {
+      dims[3] % granularity != 0) {
      uint32_t depth;
  
-    depth = (dims[3] + DATA_GRANULARITY - 1) >> DATA_GRANULARITY_SHIFT;
-    depth = depth * DATA_GRANULARITY;
+    depth = (dims[3] + granularity - 1) >> shift;
+    depth = depth * granularity;
  
      tensor_size /= dims[3];
      tensor_size *= depth;
@@ -240,21 +260,35 @@ Metadata_v3::getInputTensorSize (uint32_t idx, data_layout layout) const
    assert (idx < getInputNum ());
  
    const uint32_t *dims = getInputDims (idx);
-  uint32_t tensor_size = getInputElemSize (idx);
+  uint32_t elem_size = getInputElemSize (idx);
+  uint32_t tensor_size = elem_size;
    uint32_t depth = dims[3];
    bool image;
  
    for (uint32_t rank_idx = 0; rank_idx < MAX_RANK; rank_idx++)
      tensor_size *= dims[rank_idx];
  
+  uint32_t granularity = DATA_GRANULARITY;
+  uint32_t shift = DATA_GRANULARITY_SHIFT;
+  uint32_t div = 1;
+
+  if (getTops () == 2) {
+    div *= 2;
+    shift--;
+  }
+  if (elem_size == 2) {
+    div *= 2;
+    shift--;
+  }
+  granularity /= div;
    image = (depth == 1 || depth == 3);
  
    /** special handling for TRIV2 */
-  if (layout == DATA_LAYOUT_TRIV2 && depth % DATA_GRANULARITY != 0 && !image) {
+  if (layout == DATA_LAYOUT_TRIV2 && depth % granularity != 0 && !image) {
      uint32_t new_depth;
  
-    new_depth = (depth + DATA_GRANULARITY - 1) >> DATA_GRANULARITY_SHIFT;
-    new_depth = new_depth * DATA_GRANULARITY;
+    new_depth = (depth + granularity - 1) >> shift;
+    new_depth = new_depth * granularity;
  
      tensor_size /= depth;
      tensor_size *= new_depth;
@@ -273,18 +307,33 @@ Metadata_v3::getOutputTensorSize (uint32_t idx, data_layout layout) const
    assert (idx < getOutputNum ());
  
    const uint32_t *dims = getOutputDims (idx);
-  uint32_t tensor_size = getOutputElemSize (idx);
+  uint32_t elem_size = getOutputElemSize (idx);
+  uint32_t tensor_size = elem_size;
    uint32_t depth = dims[3];
  
    for (uint32_t rank_idx = 0; rank_idx < MAX_RANK; rank_idx++)
      tensor_size *= dims[rank_idx];
  
+  uint32_t granularity = DATA_GRANULARITY;
+  uint32_t shift = DATA_GRANULARITY_SHIFT;
+  uint32_t div = 1;
+
+  if (getTops () == 2) {
+    div *= 2;
+    shift--;
+  }
+  if (elem_size == 2) {
+    div *= 2;
+    shift--;
+  }
+  granularity /= div;
+
    /** special handling for TRIV2 */
-  if (layout == DATA_LAYOUT_TRIV2 && depth % DATA_GRANULARITY != 0) {
+  if (layout == DATA_LAYOUT_TRIV2 && depth % granularity != 0) {
      uint32_t new_depth;
  
-    new_depth = (depth + DATA_GRANULARITY - 1) >> DATA_GRANULARITY_SHIFT;
-    new_depth = new_depth * DATA_GRANULARITY;
+    new_depth = (depth + granularity - 1) >> shift;
+    new_depth = new_depth * granularity;
  
      tensor_size /= depth;
      tensor_size *= new_depth;
diff --git a/src/core/ne-model.h b/src/core/ne-model.h

index 4a2117d..a43a1ad 100644 (file)
--- a/src/core/ne-model.h
+++ b/src/core/ne-model.h
@@ -92,6 +92,7 @@ class Metadata {
      uint64_t getBufferSize () const { return meta_->buffer_size; }
      uint32_t getMetaSize () const { return NPUBIN_META_TOTAL_SIZE(meta_->magiccode); }
      uint32_t getMetaExtraSize () const { return NPUBIN_META_EXTRA_SIZE(meta_->magiccode); }
+    uint32_t getTops () const { return NPU_VERSION_TOPS (getNPUVersion ()); }
  
      int getVersion () const { return version_; }
      uint64_t getNPUVersion () const { return meta_->npu_version; }
author	Dongju Chae <dongju.chae@samsung.com>
	Wed, 30 Dec 2020 01:24:51 +0000 (10:24 +0900)
committer	송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>
	Mon, 4 Jan 2021 02:47:09 +0000 (11:47 +0900)
include/common/npubinfmt.h		patch \| blob \| history
src/core/ne-data.cc		patch \| blob \| history
src/core/ne-data.h		patch \| blob \| history
src/core/ne-handler.cc		patch \| blob \| history
src/core/ne-model.cc		patch \| blob \| history
src/core/ne-model.h		patch \| blob \| history