This patch adds mergeProfile to ne-profiler.
Signed-off-by: Yelin Jeong <yelini.jeong@samsung.com>
}
int
-ModelProfiler::getProfile (int req_id, const npu_profile_opt &opt, npu_profile *profile) {
- ProfileData *data = profile_map_.find (req_id);
- if (data == nullptr)
- return -ENOENT;
-
- const Model *model = data->getModel ();
- if (model == nullptr)
- return -EINVAL;
-
- int status = api_->getProfile (req_id, profile);
- if (status != 0)
- return status;
-
- if (opt.level == PROFILE_LEVEL_EXT_META || opt.level == PROFILE_LEVEL_LAYER) {
- HWmem *extended = model->getExtendedMetadata ();
- if (extended != nullptr)
- manipulateProfile (extended, profile);
- else if (opt.level == PROFILE_LEVEL_LAYER)
- status = -EINVAL;
+ModelProfiler::getProfile (int req_id, const npu_profile_opt &opt, npu_profile *profile,
+ const std::string path) {
+ int status;
+ ProfileData *data;
+ const Model *model;
+
+ if (!path.empty ()) {
+ status = api_->getProfile (req_id, profile, path);
+
+ if (status != 0)
+ return status;
+
+ if (opt.level == PROFILE_LEVEL_VISA)
+ mergeProfile (profile);
+ } else {
+ data = profile_map_.find (req_id);
+ if (data == nullptr)
+ return -ENOENT;
+
+ model = data->getModel ();
+ if (model == nullptr)
+ return -EINVAL;
+ status = api_->getProfile (req_id, profile);
+
+ if (status != 0)
+ return status;
+
+ if (opt.level == PROFILE_LEVEL_EXT_META || opt.level == PROFILE_LEVEL_LAYER) {
+ HWmem *extended = model->getExtendedMetadata ();
+ if (extended != nullptr)
+ manipulateProfile (extended, profile);
+ else if (opt.level == PROFILE_LEVEL_LAYER)
+ status = -EINVAL;
+ }
+ else if (opt.level == PROFILE_LEVEL_VISA) {
+ mergeProfile (profile);
+ }
}
return status;
delete[] new_layers;
}
}
+
+void
+ModelProfiler::mergeProfile (npu_profile *profile) {
+ std::map<int64_t, npu_profile_layer *> profile_layers;
+ int layers_size, total_layer = profile->num_layers;
+
+ for (int i = 0; i < total_layer; i++) {
+ int64_t npu_pc = profile->layers[i].visa_prog_seq;
+ auto it = profile_layers.find (npu_pc);
+ if (it == profile_layers.end ()) {
+ profile_layers.insert (std::make_pair (npu_pc, &profile->layers[i]));
+ } else {
+ npu_profile_layer *to_layer = it->second;
+ to_layer->start_cycles = std::min (to_layer->start_cycles, profile->layers[i].start_cycles);
+ to_layer->end_cycles = std::max (to_layer->end_cycles, profile->layers[i].end_cycles);
+ to_layer->running_cycles = to_layer->end_cycles - to_layer->start_cycles;
+ }
+ }
+
+ layers_size = profile_layers.size();
+
+ if (layers_size < total_layer) {
+ npu_profile_layer *new_layer = new npu_profile_layer[layers_size];
+ int new_layer_index = 0;
+ for (auto it = profile_layers.begin () ; it != profile_layers.end () ; it++) {
+ new_layer[new_layer_index++] = *it->second;
+ }
+ delete[] profile->layers;
+ profile->layers = new_layer;
+ profile->num_layers = layers_size;
+ }
+}
int appendRequest (int req_id, const Model *model);
int removeRequest (int req_id);
- int getProfile (int req_id, const npu_profile_opt &opt, npu_profile *profile);
+ int getProfile (int req_id, const npu_profile_opt &opt, npu_profile *profile,
+ const std::string path = "");
void manipulateProfile (HWmem *extended, npu_profile *profile);
+ void mergeProfile (npu_profile *profile);
private:
const DriverAPI *api_;
#endif
/** @brief get profile data for vISA instructions */
- virtual int getProfile (int req_id, npu_profile *profile) const { return -EPERM; }
+ virtual int getProfile (int req_id, npu_profile *profile, const std::string path = "") const {
+ return -EPERM;
+ }
virtual int getStatApps (npu_stat_apps *stat) const { return -EPERM; }
virtual int getStatReqs (int appid, npu_stat_reqs *stat) const { return -EPERM; }
int fpga_memcpy (int dmabuf, uint32_t offset, void *addr, size_t size) const;
#endif
- int getProfile (int req_id, npu_profile *profile) const;
+ int getProfile (int req_id, npu_profile *profile, const std::string path = "") const;
int getStatApps (npu_stat_apps *stat) const;
int getStatReqs (int appid, npu_stat_reqs *stat) const;
int registerModel (model_config_t *model, uint64_t npu_version) const;
int deregisterModel (unsigned long long id) const;
- int getProfile (int req_id, npu_profile *profile) const;
+ int getProfile (int req_id, npu_profile *profile, const std::string path = "") const;
void manipulateProfile (EmulReq *req, npu_profile *profile) const;
private:
total_dump += head.nna1.num_of_dump;
total_dump += head.dma_in.num_of_dump;
total_dump += head.dma_out.num_of_dump;
+ total_dump += head.dsp.num_of_dump;
/* Digital Signal Processor (DSP) */
total_dump += head.dsp.num_of_dump;
for (uint32_t i = 0; i < total_dump; i++) {
npu_profile_layer *layer = &profile->layers[i];
- T2PF_DUMP common;
- T2PF_DUMP_NNA nna;
+ T24PF_DUMP common;
+ T24PF_DUMP_NNA nna;
T2PF_DUMP_DMA dma;
T2PF_DUMP_DSP dsp;
std::streampos pos;
pos = ifs->tellg ();
- ifs->read ((char *) &common, sizeof (T2PF_DUMP));
+ ifs->read ((char *) &common, sizeof (T24PF_DUMP));
ifs->seekg (pos);
memset (layer, '\x00', sizeof (npu_profile_layer));
layer->running_cycles = common.cycle_end - common.cycle_start;
layer->start_cycles = common.cycle_start;
layer->end_cycles = common.cycle_end;
- layer->visa_prog_seq = i;
+ layer->visa_prog_seq = common.npu_pc;
/* In the first run, program sequence == exec sequence */
layer->visa_exec_seq = global_exec_seq++;
switch (common.block_id) {
case TRIV2PROF_BLOCKID_NNA0:
case TRIV2PROF_BLOCKID_NNA1:
- ifs->read ((char *) &nna, sizeof (T2PF_DUMP_NNA));
+ ifs->read ((char *) &nna, sizeof (T24PF_DUMP_NNA));
layer->visa_opcode = nna.opcode;
snprintf (layer->name, NPU_OPNAME_MAX, "%s", nna.op_name);
break;
case TRIV2PROF_BLOCKID_DMA_IN:
- ifs->read ((char *) &dma, sizeof (T2PF_DUMP_DMA));
+ ifs->read ((char *) &dma, sizeof (T24PF_DUMP_DMA));
layer->visa_opcode = 0x02;
snprintf (layer->name, NPU_OPNAME_MAX, "%s", "DMA_IN");
layer->sram_write_bytes = dma.dest_addr_end - dma.dest_addr_start;
break;
case TRIV2PROF_BLOCKID_DMA_OUT:
- ifs->read ((char *) &dma, sizeof (T2PF_DUMP_DMA));
+ ifs->read ((char *) &dma, sizeof (T24PF_DUMP_DMA));
layer->visa_opcode = 0x03;
snprintf (layer->name, NPU_OPNAME_MAX, "%s", "DMA_OUT");
layer->sram_read_bytes = dma.src_addr_end - dma.src_addr_start;
break;
case TRIV2PROF_BLOCKID_DSP:
- ifs->read ((char *) &dsp, sizeof (T2PF_DUMP_DSP));
+ ifs->read ((char *) &dsp, sizeof (T24PF_DUMP_DSP));
layer->visa_opcode = dsp.opcode;
snprintf (layer->name, NPU_OPNAME_MAX, "%s", dsp.op_name);
}
int
-TrinityEmulAPI::getProfile (int req_id, npu_profile *profile) const {
+TrinityEmulAPI::getProfile (int req_id, npu_profile *profile, std::string path) const {
if (!initialized ())
return -EPERM;
- if (req_id <= 0 || profile == nullptr) {
+ if (req_id < 0 || profile == nullptr) {
logerr (TAG, "Invalid arguments detected");
return -EINVAL;
}
std::string prof_path (prefix_profile_);
- prof_path += "/ne_profile_" + std::to_string (getpid ());
- prof_path += "/req_id_" + std::to_string (req_id) + ".rec";
+ if (!path.empty ())
+ prof_path = path;
+ else {
+ prof_path += "/ne_profile_" + std::to_string (getpid ());
+ prof_path += "/req_id_" + std::to_string (req_id) + ".rec";
+ }
std::ifstream ifs (prof_path, std::ios::binary);
if (!ifs.good ()) {
#endif
int
-TrinityVision2API::getProfile (int req_id, npu_profile *profile) const {
+TrinityVision2API::getProfile (int req_id, npu_profile *profile, std::string path) const {
struct trinity_ioctl_profile_meta meta;
struct trinity_ioctl_profile_buff buff;
int ret = 0;