core: npu: Apply triv2.4 profile format

author Jiho Chu <jiho.chu@samsung.com>

Fri, 1 Jul 2022 08:00:48 +0000 (17:00 +0900)

committer 추지호/NPU Lab(SR)/삼성전자 <jiho.chu@samsung.com>

Fri, 22 Jul 2022 10:06:40 +0000 (19:06 +0900)
author Jiho Chu <jiho.chu@samsung.com>
Fri, 1 Jul 2022 08:00:48 +0000 (17:00 +0900)
committer 추지호/NPU Lab(SR)/삼성전자 <jiho.chu@samsung.com>
Fri, 22 Jul 2022 10:06:40 +0000 (19:06 +0900)
diff --git a/src/core/npu/NPUdrvAPI_emul.cc b/src/core/npu/NPUdrvAPI_emul.cc

index f158d7cb0dd4a208426cc043a87a2554cecb9dfc..e65a96d039d1e3b6efcde2270839fa80184689cb 100644 (file)
--- a/src/core/npu/NPUdrvAPI_emul.cc
+++ b/src/core/npu/NPUdrvAPI_emul.cc
@@ -30,7 +30,211 @@
  #define RESERVED_DSPM_SIZE (64 * 1024) /* 64 KiB */
  #define ENVNAME_DSPM_SIZE ("MRPSIM_SPM_SIZE")
  
-static uint64_t global_exec_seq = 0;
+namespace {
+
+uint64_t global_exec_seq = 0;
+
+inline void getProfileArchVersion (uint32_t version, int *major, int *minor) {
+  *major = (version >> TRIV2PROF_MAJOR_SHIFT) & 0xffff;
+  *minor = (version) & 0xffff;
+}
+
+int
+parseProfile23 (std::ifstream *ifs, npu_profile *profile) {
+  T2PF_HEAD head;
+  ifs->read ((char *) &head, sizeof (T2PF_HEAD));
+
+  uint32_t total_dump = 0;
+
+  /* Neural Network Accelerator (NNA) */
+  total_dump += head.nna.num_of_dump;
+  total_dump += head.nna_dma_in.num_of_dump;
+  total_dump += head.nna_dma_out.num_of_dump;
+
+  /* Digital Signal Processor (DSP) */
+  total_dump += head.dsp.num_of_dump;
+  total_dump += head.dsp_dma_in.num_of_dump;
+  total_dump += head.dsp_dma_out.num_of_dump;
+
+  if (total_dump > 0) {
+    profile->layers = new npu_profile_layer[total_dump];
+    profile->num_layers = total_dump;
+    profile->total_system_cycles = head.total_cycles;
+    profile->dram_input_footprint = head.nna_dma_in.access_footprint_byte;
+    profile->dram_output_footprint = head.nna_dma_out.access_footprint_byte;
+    profile->dram_input_footprint += head.dsp_dma_in.access_footprint_byte;
+    profile->dram_output_footprint += head.dsp_dma_out.access_footprint_byte;
+
+    for (uint32_t i = 0; i < total_dump; i++) {
+      npu_profile_layer *layer = &profile->layers[i];
+
+      T2PF_DUMP common;
+      T2PF_DUMP_NNA nna;
+      T2PF_DUMP_DMA nna_dma;
+      T2PF_DUMP_DSP dsp;
+      T2PF_DUMP_DMA dsp_dma;
+
+      std::streampos pos;
+
+      pos = ifs->tellg ();
+      ifs->read ((char *) &common, sizeof (T2PF_DUMP));
+      ifs->seekg (pos);
+
+      memset (layer, '\x00', sizeof (npu_profile_layer));
+
+      layer->running_cycles = common.cycle_end - common.cycle_start;
+      layer->start_cycles = common.cycle_start;
+      layer->end_cycles = common.cycle_end;
+      layer->visa_prog_seq = i;
+      /* In the first run, program sequence == exec sequence */
+      layer->visa_exec_seq = global_exec_seq++;
+
+      switch (common.block_id) {
+        case TRIV2PROF_BLOCKID_NNA:
+          ifs->read ((char *) &nna, sizeof (T2PF_DUMP_NNA));
+
+          layer->visa_opcode = nna.opcode;
+          snprintf (layer->name, NPU_OPNAME_MAX, "%s", nna.op_name);
+          break;
+        case TRIV2PROF_BLOCKID_NNA_DMA_IN:
+          ifs->read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
+
+          layer->visa_opcode = 0x02;
+          snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_IN");
+          layer->dram_read_bytes = nna_dma.src_addr_end - nna_dma.src_addr_start;
+          layer->sram_write_bytes = nna_dma.dest_addr_end - nna_dma.dest_addr_start;
+          break;
+        case TRIV2PROF_BLOCKID_NNA_DMA_OUT:
+          ifs->read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
+
+          layer->visa_opcode = 0x03;
+          snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_OUT");
+          layer->dram_write_bytes = nna_dma.dest_addr_end - nna_dma.dest_addr_start;
+          layer->sram_read_bytes = nna_dma.src_addr_end - nna_dma.src_addr_start;
+          break;
+        case TRIV2PROF_BLOCKID_DSP:
+          ifs->read ((char *) &dsp, sizeof (T2PF_DUMP_DSP));
+
+          layer->visa_opcode = dsp.opcode;
+          snprintf (layer->name, NPU_OPNAME_MAX, "%s", dsp.op_name);
+          break;
+        case TRIV2PROF_BLOCKID_DSP_DMA_IN:
+          ifs->read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
+
+          layer->visa_opcode = 0x40;
+          snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_IN");
+          layer->dram_read_bytes = dsp_dma.src_addr_end - dsp_dma.src_addr_start;
+          layer->sram_write_bytes = dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
+          break;
+        case TRIV2PROF_BLOCKID_DSP_DMA_OUT:
+          ifs->read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
+
+          layer->visa_opcode = 0x41;
+          snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_OUT");
+          layer->dram_write_bytes = dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
+          layer->sram_read_bytes = dsp_dma.src_addr_end - dsp_dma.src_addr_start;
+          break;
+        default:
+          logerr (TAG, "Unknown block id detected: %u", common.block_id);
+          delete[] profile->layers;
+          profile->layers = nullptr;
+          return -EINVAL;
+      }
+    }
+  }
+
+  return 0;
+}
+
+int
+parseProfile24 (std::ifstream *ifs, npu_profile *profile) {
+  T24PF_HEAD head;
+  ifs->read ((char *) &head, sizeof (T24PF_HEAD));
+
+  uint32_t total_dump = 0;
+
+  /* Neural Network Accelerator (NNA) */
+  total_dump += head.nna0.num_of_dump;
+  total_dump += head.nna1.num_of_dump;
+  total_dump += head.dma_in.num_of_dump;
+  total_dump += head.dma_out.num_of_dump;
+
+  /* Digital Signal Processor (DSP) */
+  total_dump += head.dsp.num_of_dump;
+
+  if (total_dump > 0) {
+    profile->layers = new npu_profile_layer[total_dump];
+    profile->num_layers = total_dump;
+    profile->total_system_cycles = head.total_cycles;
+    profile->dram_input_footprint = head.dma_in.access_footprint_byte;
+    profile->dram_output_footprint = head.dma_out.access_footprint_byte;
+
+    for (uint32_t i = 0; i < total_dump; i++) {
+      npu_profile_layer *layer = &profile->layers[i];
+
+      T2PF_DUMP common;
+      T2PF_DUMP_NNA nna;
+      T2PF_DUMP_DMA dma;
+      T2PF_DUMP_DSP dsp;
+
+      std::streampos pos;
+
+      pos = ifs->tellg ();
+      ifs->read ((char *) &common, sizeof (T2PF_DUMP));
+      ifs->seekg (pos);
+
+      memset (layer, '\x00', sizeof (npu_profile_layer));
+
+      layer->running_cycles = common.cycle_end - common.cycle_start;
+      layer->start_cycles = common.cycle_start;
+      layer->end_cycles = common.cycle_end;
+      layer->visa_prog_seq = i;
+      /* In the first run, program sequence == exec sequence */
+      layer->visa_exec_seq = global_exec_seq++;
+
+      switch (common.block_id) {
+        case TRIV2PROF_BLOCKID_NNA0:
+        case TRIV2PROF_BLOCKID_NNA1:
+          ifs->read ((char *) &nna, sizeof (T2PF_DUMP_NNA));
+
+          layer->visa_opcode = nna.opcode;
+          snprintf (layer->name, NPU_OPNAME_MAX, "%s", nna.op_name);
+          break;
+        case TRIV2PROF_BLOCKID_DMA_IN:
+          ifs->read ((char *) &dma, sizeof (T2PF_DUMP_DMA));
+
+          layer->visa_opcode = 0x02;
+          snprintf (layer->name, NPU_OPNAME_MAX, "%s", "DMA_IN");
+          layer->dram_read_bytes = dma.src_addr_end - dma.src_addr_start;
+          layer->sram_write_bytes = dma.dest_addr_end - dma.dest_addr_start;
+          break;
+        case TRIV2PROF_BLOCKID_DMA_OUT:
+          ifs->read ((char *) &dma, sizeof (T2PF_DUMP_DMA));
+
+          layer->visa_opcode = 0x03;
+          snprintf (layer->name, NPU_OPNAME_MAX, "%s", "DMA_OUT");
+          layer->dram_write_bytes = dma.dest_addr_end - dma.dest_addr_start;
+          layer->sram_read_bytes = dma.src_addr_end - dma.src_addr_start;
+          break;
+        case TRIV2PROF_BLOCKID_DSP:
+          ifs->read ((char *) &dsp, sizeof (T2PF_DUMP_DSP));
+
+          layer->visa_opcode = dsp.opcode;
+          snprintf (layer->name, NPU_OPNAME_MAX, "%s", dsp.op_name);
+          break;
+        default:
+          logerr (TAG, "Unknown block id detected: %u", common.block_id);
+          delete[] profile->layers;
+          profile->layers = nullptr;
+          return -EINVAL;
+      }
+    }
+  }
+
+  return 0;
+}
+
+};  // namespace
  
  class EmulReq {
   public:
@@ -116,16 +320,15 @@ class EmulElement {
    model_config_t *getModelConfig (uint64_t id) { return model_config_map_.find (id); }
  
    void unsetModelConfig (uint64_t id) { model_config_map_.remove (id); }
-
-  void setNpuTops (uint64_t tops) {
-    /** for backward-compatibility, 0-tops is regarded as 8-tops */
-    if (tops == 0)
-      tops_ = 8;
-    else
-      tops_ = tops;
+  void setNpuVersion(uint64_t npu_version) {
+    npu_version_ = npu_version;
    }
  
-  uint64_t getNpuTops () const { return tops_; }
+  uint64_t getNpuVersion() const { return npu_version_; }
+  uint64_t getNpuTops () const {
+    uint64_t tops = NPU_VERSION_TOPS(npu_version_);
+    return tops != 0 ? tops : 8;
+  }
  
   private:
    static std::atomic<int> global_id_;
@@ -135,7 +338,7 @@ class EmulElement {
    size_t size_; /**< the allocated size */
  
    ThreadSafeMap<uint64_t, model_config_t> model_config_map_;
-  uint64_t tops_; /**< npu tops */
+  uint64_t npu_version_; /**< npu version */
  };
  
  /**
@@ -150,7 +353,7 @@ EmulElement::EmulElement (size_t size) {
  
    size_ = size;
    dmabuf_ = global_id_.fetch_add (1);
-  tops_ = 8;
+  npu_version_ = 0;
  }
  
  /** @brief dmabuf-to-element map */
@@ -432,7 +635,7 @@ TrinityEmulAPI::registerModel (model_config_t *model_config, uint64_t npu_versio
    memcpy (config, model_config, sizeof (model_config_t));
  
    elem->setModelConfig (config);
-  elem->setNpuTops (NPU_VERSION_TOPS (npu_version));
+  elem->setNpuVersion (npu_version);
  
    return 0;
  }
@@ -546,10 +749,18 @@ TrinityEmulAPI::runInput (input_config_t *input_config) const {
        delete[] segt;
      } else {
        std::string cmd_path (prefix_share_);
-      if (elem_model->getNpuTops () == 2)
-        cmd_path += "/mRPsim/triv2_2tops.cmd";
-      else
-        cmd_path += "/mRPsim/triv2.cmd";
+      if (NPU_VERSION_MINOR(elem_model->getNpuVersion()) == 3) {
+        if (elem_model->getNpuTops() == 2)
+          cmd_path += "/mRPsim/triv-3.9.0_2tops.cmd";
+        else
+          cmd_path += "/mRPsim/triv-3.9.0.cmd";
+      } else if (NPU_VERSION_MINOR(elem_model->getNpuVersion()) == 4) {
+        cmd_path += "/mRPsim/triv-4.0.0.cmd";
+      } else {
+        logerr (TAG, "Invalid model version: minor(%d)(%d)\n", model->version, NPU_VERSION_MINOR(model->version));
+        delete[] segt;
+        return -EINVAL;
+      }
  
        std::string prof_path (prefix_profile_);
        prof_path += "/ne_profile_" + std::to_string (getpid ());
@@ -625,111 +836,33 @@ TrinityEmulAPI::getProfile (int req_id, npu_profile *profile) const {
    T2PF_HEAD head;
    ifs.read ((char *) &head, sizeof (T2PF_HEAD));
  
+  ifs.clear ();
+  ifs.seekg (0, std::ios::beg);
+
    if (head.fmt_vesion != TRIV2PROF_FMT_VER) {
-    logerr (TAG, "Profile data format mismatch (%x vs. %x)", head.fmt_vesion, TRIV2PROF_FMT_VER);
+    ifs.close ();
      return -EINVAL;
    }
  
-  uint32_t total_dump = 0;
-
-  /* Neual Network Accelerator (NNA) */
-  total_dump += head.nna.num_of_dump;
-  total_dump += head.nna_dma_in.num_of_dump;
-  total_dump += head.nna_dma_out.num_of_dump;
-
-  /* Digital Signal Processor (DSP) */
-  total_dump += head.dsp.num_of_dump;
-  total_dump += head.dsp_dma_in.num_of_dump;
-  total_dump += head.dsp_dma_out.num_of_dump;
-
-  if (total_dump > 0) {
-    profile->layers = new npu_profile_layer[total_dump];
-    profile->num_layers = total_dump;
-    profile->total_system_cycles = head.total_cycles;
-    profile->dram_input_footprint = head.nna_dma_in.access_footprint_byte;
-    profile->dram_output_footprint = head.nna_dma_out.access_footprint_byte;
-    profile->dram_input_footprint += head.dsp_dma_in.access_footprint_byte;
-    profile->dram_output_footprint += head.dsp_dma_out.access_footprint_byte;
-
-    for (uint32_t i = 0; i < total_dump; i++) {
-      npu_profile_layer *layer = &profile->layers[i];
-
-      T2PF_DUMP common;
-      T2PF_DUMP_NNA nna;
-      T2PF_DUMP_DMA nna_dma;
-      T2PF_DUMP_DSP dsp;
-      T2PF_DUMP_DMA dsp_dma;
-
-      std::streampos pos;
-
-      pos = ifs.tellg ();
-      ifs.read ((char *) &common, sizeof (T2PF_DUMP));
-      ifs.seekg (pos);
-
-      memset (layer, '\x00', sizeof (npu_profile_layer));
-
-      layer->running_cycles = common.cycle_end - common.cycle_start;
-      layer->start_cycles = common.cycle_start;
-      layer->end_cycles = common.cycle_end;
-      layer->visa_prog_seq = i;
-      /* In the first run, program sequence == exec sequence */
-      layer->visa_exec_seq = global_exec_seq++;
-
-      switch (common.block_id) {
-        case TRIV2PROF_BLOCKID_NNA:
-          ifs.read ((char *) &nna, sizeof (T2PF_DUMP_NNA));
-
-          layer->visa_opcode = nna.opcode;
-          snprintf (layer->name, NPU_OPNAME_MAX, "%s", nna.op_name);
-          break;
-        case TRIV2PROF_BLOCKID_NNA_DMA_IN:
-          ifs.read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
-
-          layer->visa_opcode = 0x02;
-          snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_IN");
-          layer->dram_read_bytes = nna_dma.src_addr_end - nna_dma.src_addr_start;
-          layer->sram_write_bytes = nna_dma.dest_addr_end - nna_dma.dest_addr_start;
-          break;
-        case TRIV2PROF_BLOCKID_NNA_DMA_OUT:
-          ifs.read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
-
-          layer->visa_opcode = 0x03;
-          snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_OUT");
-          layer->dram_write_bytes = nna_dma.dest_addr_end - nna_dma.dest_addr_start;
-          layer->sram_read_bytes = nna_dma.src_addr_end - nna_dma.src_addr_start;
-          break;
-        case TRIV2PROF_BLOCKID_DSP:
-          ifs.read ((char *) &dsp, sizeof (T2PF_DUMP_DSP));
-
-          layer->visa_opcode = dsp.opcode;
-          snprintf (layer->name, NPU_OPNAME_MAX, "%s", dsp.op_name);
-          break;
-        case TRIV2PROF_BLOCKID_DSP_DMA_IN:
-          ifs.read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
-
-          layer->visa_opcode = 0x40;
-          snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_IN");
-          layer->dram_read_bytes = dsp_dma.src_addr_end - dsp_dma.src_addr_start;
-          layer->sram_write_bytes = dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
-          break;
-        case TRIV2PROF_BLOCKID_DSP_DMA_OUT:
-          ifs.read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
+  int major, minor;
+  getProfileArchVersion(head.arch_vesion, &major, &minor);
+  if (major != 2) {
+    logerr (TAG, "Invalid profile arch version (%d.%d)", major, minor);
+    ifs.close ();
+    return -EINVAL;
+  }
  
-          layer->visa_opcode = 0x41;
-          snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_OUT");
-          layer->dram_write_bytes = dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
-          layer->sram_read_bytes = dsp_dma.src_addr_end - dsp_dma.src_addr_start;
-          break;
-        default:
-          logerr (TAG, "Unknown block id detected: %u", common.block_id);
-          delete[] profile->layers;
-          profile->layers = nullptr;
-          ifs.close ();
-          return -EINVAL;
-      }
-    }
+  int ret;
+  if (minor == 3) {
+    ret = parseProfile23 (&ifs, profile);
+  } else if (minor == 4) {
+    ret = parseProfile24 (&ifs, profile);
+  } else {
+    logerr (TAG, "Invalid profile arch version (%d.%d)", major, minor);
+    ret = -EINVAL;
    }
  
    ifs.close ();
-  return 0;
+
+  return ret;
  }
diff --git a/utils/trinity_cuse/trinity-cuse-triv2.cc b/utils/trinity_cuse/trinity-cuse-triv2.cc

index 5179f1780bae6928894574e14c2139d01bbcc2e2..467ee62d33827097da60bcdf1b7b2f4b97935422 100644 (file)
--- a/utils/trinity_cuse/trinity-cuse-triv2.cc
+++ b/utils/trinity_cuse/trinity-cuse-triv2.cc
@@ -207,31 +207,18 @@ class EmulProfile {
    void *getData () const { return profile_.layers; }
    size_t getDataSize () const { return profile_.num_layers * sizeof (npu_profile_layer); }
  
-  bool parse () {
-    std::ifstream ifs (prof_path_, std::ios::binary);
-    if (!ifs.good ()) {
-      std::cerr << "Failed to find the profile data " << prof_path_ << "\n";
-      return false;
-    }
-
-    profile_.prof_path = strdup (prof_path_.c_str ());
-    if (!profile_.prof_path) {
-      std::cerr << "Unable to duplicate the profile path " << prof_path_ << "\n";
-      return false;
-    }
+  void getProfileArchVersion (uint32_t version, int *major, int *minor) {
+    *major = (version >> TRIV2PROF_MAJOR_SHIFT) & 0xffff;
+    *minor = (version) & 0xffff;
+  }
  
+  bool parseProfile23 (std::ifstream &ifs) {
      T2PF_HEAD head;
      ifs.read ((char *) &head, sizeof (T2PF_HEAD));
  
-    if (head.fmt_vesion != TRIV2PROF_FMT_VER) {
-      std::cerr << "Profile data format mismatch: "
-                << "(" << head.fmt_vesion << " vs. " << TRIV2PROF_FMT_VER << ")\n";
-      return false;
-    }
-
      uint32_t total_dump = 0;
  
-    /* Neual Network Accelerator (NNA) */
+    /* Neural Network Accelerator (NNA) */
      total_dump += head.nna.num_of_dump;
      total_dump += head.nna_dma_in.num_of_dump;
      total_dump += head.nna_dma_out.num_of_dump;
@@ -270,16 +257,20 @@ class EmulProfile {
          layer->running_cycles = common.cycle_end - common.cycle_start;
          layer->start_cycles = common.cycle_start;
          layer->end_cycles = common.cycle_end;
+        layer->visa_prog_seq = i;
+        layer->visa_exec_seq = exec_seq_++;
  
          switch (common.block_id) {
            case TRIV2PROF_BLOCKID_NNA:
              ifs.read ((char *) &nna, sizeof (T2PF_DUMP_NNA));
  
+            layer->visa_opcode = nna.opcode;
              snprintf (layer->name, NPU_OPNAME_MAX, "%s", nna.op_name);
              break;
            case TRIV2PROF_BLOCKID_NNA_DMA_IN:
              ifs.read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
  
+            layer->visa_opcode = 0x02;
              snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_IN");
              layer->dram_read_bytes = nna_dma.src_addr_end - nna_dma.src_addr_start;
              layer->sram_write_bytes = nna_dma.dest_addr_end - nna_dma.dest_addr_start;
@@ -287,6 +278,7 @@ class EmulProfile {
            case TRIV2PROF_BLOCKID_NNA_DMA_OUT:
              ifs.read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
  
+            layer->visa_opcode = 0x03;
              snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_OUT");
              layer->dram_write_bytes = nna_dma.dest_addr_end - nna_dma.dest_addr_start;
              layer->sram_read_bytes = nna_dma.src_addr_end - nna_dma.src_addr_start;
@@ -294,11 +286,13 @@ class EmulProfile {
            case TRIV2PROF_BLOCKID_DSP:
              ifs.read ((char *) &dsp, sizeof (T2PF_DUMP_DSP));
  
+            layer->visa_opcode = dsp.opcode;
              snprintf (layer->name, NPU_OPNAME_MAX, "%s", dsp.op_name);
              break;
            case TRIV2PROF_BLOCKID_DSP_DMA_IN:
              ifs.read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
  
+            layer->visa_opcode = 0x40;
              snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_IN");
              layer->dram_read_bytes = dsp_dma.src_addr_end - dsp_dma.src_addr_start;
              layer->sram_write_bytes = dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
@@ -306,6 +300,7 @@ class EmulProfile {
            case TRIV2PROF_BLOCKID_DSP_DMA_OUT:
              ifs.read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
  
+            layer->visa_opcode = 0x41;
              snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_OUT");
              layer->dram_write_bytes = dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
              layer->sram_read_bytes = dsp_dma.src_addr_end - dsp_dma.src_addr_start;
@@ -314,18 +309,150 @@ class EmulProfile {
              std::cerr << "Unknown block id detected: " << common.block_id << std::endl;
              delete[] profile_.layers;
              profile_.layers = nullptr;
-            ifs.close ();
              return false;
          }
        }
      }
  
-    ifs.close ();
      return true;
    }
  
+  int parseProfile24 (std::ifstream &ifs) {
+    T24PF_HEAD head;
+    ifs.read ((char *) &head, sizeof (T24PF_HEAD));
+
+    uint32_t total_dump = 0;
+
+    /* Neural Network Accelerator (NNA) */
+    total_dump += head.nna0.num_of_dump;
+    total_dump += head.nna1.num_of_dump;
+    total_dump += head.dma_in.num_of_dump;
+    total_dump += head.dma_out.num_of_dump;
+
+    /* Digital Signal Processor (DSP) */
+    total_dump += head.dsp.num_of_dump;
+
+    if (total_dump > 0) {
+      profile_.layers = new npu_profile_layer[total_dump];
+      profile_.num_layers = total_dump;
+      profile_.total_system_cycles = head.total_cycles;
+      profile_.dram_input_footprint = head.dma_in.access_footprint_byte;
+      profile_.dram_output_footprint = head.dma_out.access_footprint_byte;
+
+      for (uint32_t i = 0; i < total_dump; i++) {
+        npu_profile_layer *layer = &profile_.layers[i];
+
+        T2PF_DUMP common;
+        T2PF_DUMP_NNA nna;
+        T2PF_DUMP_DMA dma;
+        T2PF_DUMP_DSP dsp;
+
+        std::streampos pos;
+
+        pos = ifs.tellg ();
+        ifs.read ((char *) &common, sizeof (T2PF_DUMP));
+        ifs.seekg (pos);
+
+        memset (layer, '\x00', sizeof (npu_profile_layer));
+
+        layer->running_cycles = common.cycle_end - common.cycle_start;
+        layer->start_cycles = common.cycle_start;
+        layer->end_cycles = common.cycle_end;
+        layer->visa_prog_seq = i;
+        layer->visa_exec_seq = exec_seq_++;
+
+        switch (common.block_id) {
+          case TRIV2PROF_BLOCKID_NNA0:
+          case TRIV2PROF_BLOCKID_NNA1:
+            ifs.read ((char *) &nna, sizeof (T2PF_DUMP_NNA));
+
+            layer->visa_opcode = nna.opcode;
+            snprintf (layer->name, NPU_OPNAME_MAX, "%s", nna.op_name);
+            break;
+          case TRIV2PROF_BLOCKID_DMA_IN:
+            ifs.read ((char *) &dma, sizeof (T2PF_DUMP_DMA));
+
+            layer->visa_opcode = 0x02;
+            snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_IN");
+            layer->dram_read_bytes = dma.src_addr_end - dma.src_addr_start;
+            layer->sram_write_bytes = dma.dest_addr_end - dma.dest_addr_start;
+            break;
+          case TRIV2PROF_BLOCKID_DMA_OUT:
+            ifs.read ((char *) &dma, sizeof (T2PF_DUMP_DMA));
+
+            layer->visa_opcode = 0x03;
+            snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_OUT");
+            layer->dram_write_bytes = dma.dest_addr_end - dma.dest_addr_start;
+            layer->sram_read_bytes = dma.src_addr_end - dma.src_addr_start;
+            break;
+          case TRIV2PROF_BLOCKID_DSP:
+            ifs.read ((char *) &dsp, sizeof (T2PF_DUMP_DSP));
+
+            layer->visa_opcode = dsp.opcode;
+            snprintf (layer->name, NPU_OPNAME_MAX, "%s", dsp.op_name);
+            break;
+          default:
+            std::cerr << "Unknown block id detected: " << common.block_id << std::endl;
+            delete[] profile_.layers;
+            profile_.layers = nullptr;
+            return false;
+        }
+      }
+    }
+
+    return true;
+  }
+
+  bool parse () {
+    std::ifstream ifs (prof_path_, std::ios::binary);
+    if (!ifs.good ()) {
+      std::cerr << "Failed to find the profile data " << prof_path_ << "\n";
+      return false;
+    }
+
+    profile_.prof_path = strdup (prof_path_.c_str ());
+    if (!profile_.prof_path) {
+      std::cerr << "Unable to duplicate the profile path " << prof_path_ << "\n";
+      return false;
+    }
+
+    T2PF_HEAD head;
+    ifs.read ((char *) &head, sizeof (T2PF_HEAD));
+
+    ifs.clear ();
+    ifs.seekg (0, std::ios::beg);
+
+    if (head.fmt_vesion != TRIV2PROF_FMT_VER) {
+      ifs.close ();
+      return -EINVAL;
+    }
+
+    int major, minor;
+    getProfileArchVersion(head.arch_vesion, &major, &minor);
+    if (major != 2) {
+      std::cerr <<"Invalid profile arch version (" << major << "." << minor << ")" << std::endl;
+      ifs.close ();
+      return -EINVAL;
+    }
+
+    int ret;
+    if (minor == 3) {
+      ret = parseProfile23 (ifs);
+    } else if (minor == 4) {
+      ret = parseProfile24 (ifs);
+    } else {
+      std::cerr <<"Invalid profile arch version (" << major << "." << minor << ")" << std::endl;
+      ret = -EINVAL;
+    }
+
+    ifs.close ();
+
+    return ret;
+  }
+
   private:
    int req_id_;
+  uint64_t exec_seq_;
    std::string prof_path_;
    npu_profile profile_;
  };
@@ -492,7 +619,7 @@ triv2_release (trinity_cuse_context *ctx) {
   */
  static int
  triv2_get_version (trinity_cuse_context *ctx, uint32_t *version) {
-  *version = trinity_gen_ver (TRINITY_DEV_VISION2_CUSE, 2, 0, 0);
+  *version = trinity_gen_ver (TRINITY_DEV_VISION2_CUSE, 2, 4, 0);
    return 0;
  }
  
@@ -657,9 +784,6 @@ triv2_run_input (trinity_cuse_context *ctx, const trinity_cuse_input *in, trinit
    if (stat == nullptr)
      return -ENOENT;
  
-  if (model->getVersion () != 3)
-    return -EINVAL;
-
    EmulDmabuf *dbuf_model = global_dmabuf_map.find (model->getDbufFD ());
    if (dbuf_model == nullptr)
      return -ENOENT;
@@ -705,10 +829,7 @@ triv2_run_input (trinity_cuse_context *ctx, const trinity_cuse_input *in, trinit
    triv2_get_tops (ctx, &tops);
  
    std::string cmd_path (ctx->prefix_share);
-  if (tops == 2)
-    cmd_path += "/mRPsim/triv2_2tops.cmd";
-  else
-    cmd_path += "/mRPsim/triv2.cmd";
+  cmd_path += "/mRPsim/triv-4.0.0.cmd";
  
    int req_id = in->req_id;
    std::string prof_path (ctx->prefix_profile);
author	Jiho Chu <jiho.chu@samsung.com>
	Fri, 1 Jul 2022 08:00:48 +0000 (17:00 +0900)
committer	추지호/NPU Lab(SR)/삼성전자 <jiho.chu@samsung.com>
	Fri, 22 Jul 2022 10:06:40 +0000 (19:06 +0900)
src/core/npu/NPUdrvAPI_emul.cc		patch \| blob \| history
utils/trinity_cuse/trinity-cuse-triv2.cc		patch \| blob \| history