[profile] add mergeProfile

author Yelin Jeong <yelini.jeong@samsung.com>

Wed, 30 Nov 2022 12:21:57 +0000 (21:21 +0900)

committer 추지호/NPU Lab(SR)/삼성전자 <jiho.chu@samsung.com>

Mon, 5 Dec 2022 07:31:01 +0000 (16:31 +0900)
author Yelin Jeong <yelini.jeong@samsung.com>
Wed, 30 Nov 2022 12:21:57 +0000 (21:21 +0900)
committer 추지호/NPU Lab(SR)/삼성전자 <jiho.chu@samsung.com>
Mon, 5 Dec 2022 07:31:01 +0000 (16:31 +0900)
diff --git a/src/core/ne-profiler.cc b/src/core/ne-profiler.cc

index 68c0ed0339d2ff8d3c9b6ed5312d37027d3213d7..c4951e30182d4a6cfa82ace1985b742077924314 100644 (file)
--- a/src/core/ne-profiler.cc
+++ b/src/core/ne-profiler.cc
@@ -33,25 +33,43 @@ ModelProfiler::removeRequest (int req_id) {
  }
  
  int
-ModelProfiler::getProfile (int req_id, const npu_profile_opt &opt, npu_profile *profile) {
-  ProfileData *data = profile_map_.find (req_id);
-  if (data == nullptr)
-    return -ENOENT;
-
-  const Model *model = data->getModel ();
-  if (model == nullptr)
-    return -EINVAL;
-
-  int status = api_->getProfile (req_id, profile);
-  if (status != 0)
-    return status;
-
-  if (opt.level == PROFILE_LEVEL_EXT_META || opt.level == PROFILE_LEVEL_LAYER) {
-    HWmem *extended = model->getExtendedMetadata ();
-    if (extended != nullptr)
-      manipulateProfile (extended, profile);
-    else if (opt.level == PROFILE_LEVEL_LAYER)
-      status = -EINVAL;
+ModelProfiler::getProfile (int req_id, const npu_profile_opt &opt, npu_profile *profile,
+                           const std::string path) {
+  int status;
+  ProfileData *data;
+  const Model *model;
+
+  if (!path.empty ()) {
+    status = api_->getProfile (req_id, profile, path);
+
+    if (status != 0)
+      return status;
+
+    if (opt.level == PROFILE_LEVEL_VISA)
+      mergeProfile (profile);
+  } else {
+    data = profile_map_.find (req_id);
+    if (data == nullptr)
+      return -ENOENT;
+
+    model = data->getModel ();
+    if (model == nullptr)
+      return -EINVAL;
+    status = api_->getProfile (req_id, profile);
+
+    if (status != 0)
+      return status;
+
+    if (opt.level == PROFILE_LEVEL_EXT_META || opt.level == PROFILE_LEVEL_LAYER) {
+      HWmem *extended = model->getExtendedMetadata ();
+      if (extended != nullptr)
+        manipulateProfile (extended, profile);
+      else if (opt.level == PROFILE_LEVEL_LAYER)
+        status = -EINVAL;
+    }
+    else if (opt.level == PROFILE_LEVEL_VISA) {
+      mergeProfile (profile);
+    }
    }
  
    return status;
@@ -179,3 +197,35 @@ ModelProfiler::manipulateProfile (HWmem *extended, npu_profile *profile) {
      delete[] new_layers;
    }
  }
+
+void
+ModelProfiler::mergeProfile (npu_profile *profile) {
+  std::map<int64_t, npu_profile_layer *> profile_layers;
+  int layers_size, total_layer = profile->num_layers;
+
+  for (int i = 0; i < total_layer; i++) {
+    int64_t npu_pc = profile->layers[i].visa_prog_seq;
+    auto it = profile_layers.find (npu_pc);
+    if (it == profile_layers.end ()) {
+      profile_layers.insert (std::make_pair (npu_pc, &profile->layers[i]));
+    } else {
+      npu_profile_layer *to_layer = it->second;
+      to_layer->start_cycles = std::min (to_layer->start_cycles, profile->layers[i].start_cycles);
+      to_layer->end_cycles = std::max (to_layer->end_cycles, profile->layers[i].end_cycles);
+      to_layer->running_cycles = to_layer->end_cycles - to_layer->start_cycles;
+    }
+  }
+
+  layers_size = profile_layers.size();
+
+  if (layers_size < total_layer) {
+    npu_profile_layer *new_layer = new npu_profile_layer[layers_size];
+    int new_layer_index = 0;
+    for (auto it = profile_layers.begin () ; it != profile_layers.end () ; it++) {
+      new_layer[new_layer_index++] = *it->second;
+    }
+    delete[] profile->layers;
+    profile->layers = new_layer;
+    profile->num_layers = layers_size;
+  }
+}
diff --git a/src/core/ne-profiler.h b/src/core/ne-profiler.h

index 158e556dd2d88244e856b9c62373298ffedcf220..705eeb933ba695fdad5bffdeea34c7b674273216 100644 (file)
--- a/src/core/ne-profiler.h
+++ b/src/core/ne-profiler.h
@@ -36,8 +36,10 @@ class ModelProfiler {
    int appendRequest (int req_id, const Model *model);
    int removeRequest (int req_id);
  
-  int getProfile (int req_id, const npu_profile_opt &opt, npu_profile *profile);
+  int getProfile (int req_id, const npu_profile_opt &opt, npu_profile *profile,
+                  const std::string path = "");
    void manipulateProfile (HWmem *extended, npu_profile *profile);
+  void mergeProfile (npu_profile *profile);
  
   private:
    const DriverAPI *api_;
diff --git a/src/core/npu/NPUdrvAPI.h b/src/core/npu/NPUdrvAPI.h

index 7d87129c00ad7c14277eb9207b683c565dc0e319..bbf3b3a6ea7d4bd921e7b1a32ad986c26a752df3 100644 (file)
--- a/src/core/npu/NPUdrvAPI.h
+++ b/src/core/npu/NPUdrvAPI.h
@@ -99,7 +99,9 @@ class DriverAPI {
  #endif
  
    /** @brief get profile data for vISA instructions */
-  virtual int getProfile (int req_id, npu_profile *profile) const { return -EPERM; }
+  virtual int getProfile (int req_id, npu_profile *profile, const std::string path = "") const {
+    return -EPERM;
+  }
    virtual int getStatApps (npu_stat_apps *stat) const { return -EPERM; }
    virtual int getStatReqs (int appid, npu_stat_reqs *stat) const { return -EPERM; }
  
@@ -154,7 +156,7 @@ class TrinityVision2API : public DriverAPI {
    int fpga_memcpy (int dmabuf, uint32_t offset, void *addr, size_t size) const;
  #endif
  
-  int getProfile (int req_id, npu_profile *profile) const;
+  int getProfile (int req_id, npu_profile *profile, const std::string path = "") const;
  
    int getStatApps (npu_stat_apps *stat) const;
    int getStatReqs (int appid, npu_stat_reqs *stat) const;
@@ -206,7 +208,7 @@ class TrinityEmulAPI : public DriverAPI {
    int registerModel (model_config_t *model, uint64_t npu_version) const;
    int deregisterModel (unsigned long long id) const;
  
-  int getProfile (int req_id, npu_profile *profile) const;
+  int getProfile (int req_id, npu_profile *profile, const std::string path = "") const;
    void manipulateProfile (EmulReq *req, npu_profile *profile) const;
  
   private:
diff --git a/src/core/npu/NPUdrvAPI_emul.cc b/src/core/npu/NPUdrvAPI_emul.cc

index 1393cab0d3f23c0e229f5f329365933cfbcb043d..0d67b543a0b0269f25f8598041299b2e5b232610 100644 (file)
--- a/src/core/npu/NPUdrvAPI_emul.cc
+++ b/src/core/npu/NPUdrvAPI_emul.cc
@@ -159,6 +159,7 @@ parseProfile24 (std::ifstream *ifs, npu_profile *profile) {
    total_dump += head.nna1.num_of_dump;
    total_dump += head.dma_in.num_of_dump;
    total_dump += head.dma_out.num_of_dump;
+  total_dump += head.dsp.num_of_dump;
  
    /* Digital Signal Processor (DSP) */
    total_dump += head.dsp.num_of_dump;
@@ -173,15 +174,15 @@ parseProfile24 (std::ifstream *ifs, npu_profile *profile) {
      for (uint32_t i = 0; i < total_dump; i++) {
        npu_profile_layer *layer = &profile->layers[i];
  
-      T2PF_DUMP common;
-      T2PF_DUMP_NNA nna;
+      T24PF_DUMP common;
+      T24PF_DUMP_NNA nna;
        T2PF_DUMP_DMA dma;
        T2PF_DUMP_DSP dsp;
  
        std::streampos pos;
  
        pos = ifs->tellg ();
-      ifs->read ((char *) &common, sizeof (T2PF_DUMP));
+      ifs->read ((char *) &common, sizeof (T24PF_DUMP));
        ifs->seekg (pos);
  
        memset (layer, '\x00', sizeof (npu_profile_layer));
@@ -189,20 +190,20 @@ parseProfile24 (std::ifstream *ifs, npu_profile *profile) {
        layer->running_cycles = common.cycle_end - common.cycle_start;
        layer->start_cycles = common.cycle_start;
        layer->end_cycles = common.cycle_end;
-      layer->visa_prog_seq = i;
+      layer->visa_prog_seq = common.npu_pc;
        /* In the first run, program sequence == exec sequence */
        layer->visa_exec_seq = global_exec_seq++;
  
        switch (common.block_id) {
          case TRIV2PROF_BLOCKID_NNA0:
          case TRIV2PROF_BLOCKID_NNA1:
-          ifs->read ((char *) &nna, sizeof (T2PF_DUMP_NNA));
+          ifs->read ((char *) &nna, sizeof (T24PF_DUMP_NNA));
  
            layer->visa_opcode = nna.opcode;
            snprintf (layer->name, NPU_OPNAME_MAX, "%s", nna.op_name);
            break;
          case TRIV2PROF_BLOCKID_DMA_IN:
-          ifs->read ((char *) &dma, sizeof (T2PF_DUMP_DMA));
+          ifs->read ((char *) &dma, sizeof (T24PF_DUMP_DMA));
  
            layer->visa_opcode = 0x02;
            snprintf (layer->name, NPU_OPNAME_MAX, "%s", "DMA_IN");
@@ -210,7 +211,7 @@ parseProfile24 (std::ifstream *ifs, npu_profile *profile) {
            layer->sram_write_bytes = dma.dest_addr_end - dma.dest_addr_start;
            break;
          case TRIV2PROF_BLOCKID_DMA_OUT:
-          ifs->read ((char *) &dma, sizeof (T2PF_DUMP_DMA));
+          ifs->read ((char *) &dma, sizeof (T24PF_DUMP_DMA));
  
            layer->visa_opcode = 0x03;
            snprintf (layer->name, NPU_OPNAME_MAX, "%s", "DMA_OUT");
@@ -218,7 +219,7 @@ parseProfile24 (std::ifstream *ifs, npu_profile *profile) {
            layer->sram_read_bytes = dma.src_addr_end - dma.src_addr_start;
            break;
          case TRIV2PROF_BLOCKID_DSP:
-          ifs->read ((char *) &dsp, sizeof (T2PF_DUMP_DSP));
+          ifs->read ((char *) &dsp, sizeof (T24PF_DUMP_DSP));
  
            layer->visa_opcode = dsp.opcode;
            snprintf (layer->name, NPU_OPNAME_MAX, "%s", dsp.op_name);
@@ -809,18 +810,22 @@ TrinityEmulAPI::stop_target (int req_id) const {
  }
  
  int
-TrinityEmulAPI::getProfile (int req_id, npu_profile *profile) const {
+TrinityEmulAPI::getProfile (int req_id, npu_profile *profile, std::string path) const {
    if (!initialized ())
      return -EPERM;
  
-  if (req_id <= 0 || profile == nullptr) {
+  if (req_id < 0 || profile == nullptr) {
      logerr (TAG, "Invalid arguments detected");
      return -EINVAL;
    }
  
    std::string prof_path (prefix_profile_);
-  prof_path += "/ne_profile_" + std::to_string (getpid ());
-  prof_path += "/req_id_" + std::to_string (req_id) + ".rec";
+  if (!path.empty ())
+    prof_path = path;
+  else {
+    prof_path += "/ne_profile_" + std::to_string (getpid ());
+    prof_path += "/req_id_" + std::to_string (req_id) + ".rec";
+  }
  
    std::ifstream ifs (prof_path, std::ios::binary);
    if (!ifs.good ()) {
diff --git a/src/core/npu/NPUdrvAPI_triv2.cc b/src/core/npu/NPUdrvAPI_triv2.cc

index 354f4da53e93e79fb7358e1cdfa0a3bd01c12aac..3e89805f751ceb81fab59c1593d895490295eab6 100644 (file)
--- a/src/core/npu/NPUdrvAPI_triv2.cc
+++ b/src/core/npu/NPUdrvAPI_triv2.cc
@@ -625,7 +625,7 @@ TrinityVision2API::fpga_memcpy (int dmabuf, uint32_t offset, void *addr, size_t
  #endif
  
  int
-TrinityVision2API::getProfile (int req_id, npu_profile *profile) const {
+TrinityVision2API::getProfile (int req_id, npu_profile *profile, std::string path) const {
    struct trinity_ioctl_profile_meta meta;
    struct trinity_ioctl_profile_buff buff;
    int ret = 0;
author	Yelin Jeong <yelini.jeong@samsung.com>
	Wed, 30 Nov 2022 12:21:57 +0000 (21:21 +0900)
committer	추지호/NPU Lab(SR)/삼성전자 <jiho.chu@samsung.com>
	Mon, 5 Dec 2022 07:31:01 +0000 (16:31 +0900)
src/core/ne-profiler.cc		patch \| blob \| history
src/core/ne-profiler.h		patch \| blob \| history
src/core/npu/NPUdrvAPI.h		patch \| blob \| history
src/core/npu/NPUdrvAPI_emul.cc		patch \| blob \| history
src/core/npu/NPUdrvAPI_triv2.cc		patch \| blob \| history