public:
EmulReq (int req_id) : req_id_ (req_id), cout_orig_ (nullptr) {}
- const char *get_profile_path () const { return prof_path_.c_str (); }
-
void run_emul (char *prog, char **segt, char *metadata, std::string cmd_path,
std::string prof_path) {
- prof_path_ = prof_path + ".rec";
-
setMute (true);
run_triv2_emul (prog, segt, metadata, cmd_path.c_str (), prof_path.c_str ());
setMute (false);
}
}
- void run (std::function<void()> func) { func (); }
-
- bool get_profile (npu_profile *profile) {
- std::ifstream ifs (prof_path_, std::ios::binary);
- if (!ifs.good ()) {
- std::cerr << "Failed to find the profile data " << prof_path_ << "\n";
- return false;
- }
-
- profile->prof_path = strdup (prof_path_.c_str ());
- if (!profile->prof_path) {
- std::cerr << "Unable to duplicate the profile path " << prof_path_ << "\n";
- return false;
- }
-
- T2PF_HEAD head;
- ifs.read ((char *) &head, sizeof (T2PF_HEAD));
-
- if (head.fmt_vesion != TRIV2PROF_FMT_VER) {
- std::cerr << "Profile data format mismatch: "
- << "(" << head.fmt_vesion << " vs. " << TRIV2PROF_FMT_VER << ")\n";
- return false;
- }
-
- uint32_t total_dump = 0;
-
- /* Neual Network Accelerator (NNA) */
- total_dump += head.nna.num_of_dump;
- total_dump += head.nna_dma_in.num_of_dump;
- total_dump += head.nna_dma_out.num_of_dump;
-
- /* Digital Signal Processor (DSP) */
- total_dump += head.dsp.num_of_dump;
- total_dump += head.dsp_dma_in.num_of_dump;
- total_dump += head.dsp_dma_out.num_of_dump;
-
- if (total_dump > 0) {
- profile->layers = new npu_profile_layer[total_dump];
- profile->num_layers = total_dump;
- profile->total_system_cycles = head.total_cycles;
- profile->dram_input_footprint = head.nna_dma_in.access_footprint_byte;
- profile->dram_output_footprint = head.nna_dma_out.access_footprint_byte;
- profile->dram_input_footprint += head.dsp_dma_in.access_footprint_byte;
- profile->dram_output_footprint += head.dsp_dma_out.access_footprint_byte;
-
- for (uint32_t i = 0; i < total_dump; i++) {
- npu_profile_layer *layer = &profile->layers[i];
-
- T2PF_DUMP common;
- T2PF_DUMP_NNA nna;
- T2PF_DUMP_DMA nna_dma;
- T2PF_DUMP_DSP dsp;
- T2PF_DUMP_DMA dsp_dma;
-
- std::streampos pos;
-
- pos = ifs.tellg ();
- ifs.read ((char *) &common, sizeof (T2PF_DUMP));
- ifs.seekg (pos);
-
- memset (layer, '\x00', sizeof (npu_profile_layer));
-
- layer->running_cycles = common.cycle_end - common.cycle_start;
- layer->start_cycles = common.cycle_start;
- layer->end_cycles = common.cycle_end;
- layer->visa_prog_seq = i;
- /* In the first run, program sequence == exec sequence */
- layer->visa_exec_seq = global_exec_seq++;
-
- switch (common.block_id) {
- case TRIV2PROF_BLOCKID_NNA:
- ifs.read ((char *) &nna, sizeof (T2PF_DUMP_NNA));
-
- layer->visa_opcode = nna.opcode;
- snprintf (layer->name, NPU_OPNAME_MAX, "%s", nna.op_name);
- break;
- case TRIV2PROF_BLOCKID_NNA_DMA_IN:
- ifs.read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
-
- layer->visa_opcode = 0x02;
- snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_IN");
- layer->dram_read_bytes = nna_dma.src_addr_end - nna_dma.src_addr_start;
- layer->sram_write_bytes = nna_dma.dest_addr_end - nna_dma.dest_addr_start;
- break;
- case TRIV2PROF_BLOCKID_NNA_DMA_OUT:
- ifs.read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
-
- layer->visa_opcode = 0x03;
- snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_OUT");
- layer->dram_write_bytes = nna_dma.dest_addr_end - nna_dma.dest_addr_start;
- layer->sram_read_bytes = nna_dma.src_addr_end - nna_dma.src_addr_start;
- break;
- case TRIV2PROF_BLOCKID_DSP:
- ifs.read ((char *) &dsp, sizeof (T2PF_DUMP_DSP));
-
- layer->visa_opcode = dsp.opcode;
- snprintf (layer->name, NPU_OPNAME_MAX, "%s", dsp.op_name);
- break;
- case TRIV2PROF_BLOCKID_DSP_DMA_IN:
- ifs.read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
-
- layer->visa_opcode = 0x40;
- snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_IN");
- layer->dram_read_bytes = dsp_dma.src_addr_end - dsp_dma.src_addr_start;
- layer->sram_write_bytes = dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
- break;
- case TRIV2PROF_BLOCKID_DSP_DMA_OUT:
- ifs.read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
-
- layer->visa_opcode = 0x41;
- snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_OUT");
- layer->dram_write_bytes = dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
- layer->sram_read_bytes = dsp_dma.src_addr_end - dsp_dma.src_addr_start;
- break;
- default:
- std::cerr << "Unknown block id detected: " << common.block_id << std::endl;
- delete[] profile->layers;
- profile->layers = nullptr;
- ifs.close ();
- return false;
- }
- }
- }
-
- ifs.close ();
- return true;
- }
-
static std::atomic<int> global_id;
private:
int req_id_;
- std::string prof_path_;
std::streambuf *cout_orig_;
std::ofstream ofs_null_;
};
}
int req_id = input_config->req_id;
- EmulReq *req = req_map_.find (req_id);
- if (req != nullptr)
- req_map_.remove (req_id);
-
- req = new EmulReq (req_id);
- status = req_map_.insert (req_id, req);
- if (status != 0) {
- delete req;
- delete[] segment_table;
- return status;
- }
-
+ EmulReq *req = new EmulReq (req_id);
if (is_kernel) {
- /* skip execution of kernel requests */
+ status = req_map_.insert (req_id, req);
+ /* internal logic error */
+ assert (status == 0);
+ /* FIXME: does not support execution of kernel requests in emulation */
delete[] segment_table;
} else {
std::string cmd_path (prefix_share_);
std::string prof_path (prefix_profile_);
prof_path += "/ne_profile." + std::to_string (req_id);
- auto func = std::bind (&EmulReq::run_emul, req, prog, segment_table,
- static_cast<char *> (elem_metadata->getAddr ()), cmd_path, prof_path);
- req->run (func);
+ req->run_emul (prog, segment_table, static_cast<char *> (elem_metadata->getAddr ()), cmd_path,
+ prof_path);
+
+ delete req;
}
}
if (!initialized ())
return -EPERM;
- EmulReq *req = req_map_.find (req_id);
- if (req == nullptr)
+ if (req_id <= 0 || profile == nullptr) {
+ logerr (TAG, "Invalid arguments detected");
+ return -EINVAL;
+ }
+
+ std::string prof_path (prefix_profile_);
+ prof_path += "/ne_profile." + std::to_string (req_id) + ".rec";
+
+ std::ifstream ifs (prof_path, std::ios::binary);
+ if (!ifs.good ()) {
+ logerr (TAG, "Failed to find the profile data %s", prof_path.c_str ());
return -ENOENT;
+ }
- if (!req->get_profile (profile))
+ profile->prof_path = strdup (prof_path.c_str ());
+ if (!profile->prof_path) {
+ logerr (TAG, "Unable to duplicate the profile path %s", prof_path.c_str ());
+ return -ENOMEM;
+ }
+
+ T2PF_HEAD head;
+ ifs.read ((char *) &head, sizeof (T2PF_HEAD));
+
+ if (head.fmt_vesion != TRIV2PROF_FMT_VER) {
+ logerr (TAG, "Profile data format mismatch (%x vs. %x)", head.fmt_vesion, TRIV2PROF_FMT_VER);
return -EINVAL;
+ }
- req_map_.remove (req_id);
+ uint32_t total_dump = 0;
+
+ /* Neual Network Accelerator (NNA) */
+ total_dump += head.nna.num_of_dump;
+ total_dump += head.nna_dma_in.num_of_dump;
+ total_dump += head.nna_dma_out.num_of_dump;
+
+ /* Digital Signal Processor (DSP) */
+ total_dump += head.dsp.num_of_dump;
+ total_dump += head.dsp_dma_in.num_of_dump;
+ total_dump += head.dsp_dma_out.num_of_dump;
+
+ if (total_dump > 0) {
+ profile->layers = new npu_profile_layer[total_dump];
+ profile->num_layers = total_dump;
+ profile->total_system_cycles = head.total_cycles;
+ profile->dram_input_footprint = head.nna_dma_in.access_footprint_byte;
+ profile->dram_output_footprint = head.nna_dma_out.access_footprint_byte;
+ profile->dram_input_footprint += head.dsp_dma_in.access_footprint_byte;
+ profile->dram_output_footprint += head.dsp_dma_out.access_footprint_byte;
+
+ for (uint32_t i = 0; i < total_dump; i++) {
+ npu_profile_layer *layer = &profile->layers[i];
+
+ T2PF_DUMP common;
+ T2PF_DUMP_NNA nna;
+ T2PF_DUMP_DMA nna_dma;
+ T2PF_DUMP_DSP dsp;
+ T2PF_DUMP_DMA dsp_dma;
+
+ std::streampos pos;
+
+ pos = ifs.tellg ();
+ ifs.read ((char *) &common, sizeof (T2PF_DUMP));
+ ifs.seekg (pos);
+
+ memset (layer, '\x00', sizeof (npu_profile_layer));
+
+ layer->running_cycles = common.cycle_end - common.cycle_start;
+ layer->start_cycles = common.cycle_start;
+ layer->end_cycles = common.cycle_end;
+ layer->visa_prog_seq = i;
+ /* In the first run, program sequence == exec sequence */
+ layer->visa_exec_seq = global_exec_seq++;
+
+ switch (common.block_id) {
+ case TRIV2PROF_BLOCKID_NNA:
+ ifs.read ((char *) &nna, sizeof (T2PF_DUMP_NNA));
+
+ layer->visa_opcode = nna.opcode;
+ snprintf (layer->name, NPU_OPNAME_MAX, "%s", nna.op_name);
+ break;
+ case TRIV2PROF_BLOCKID_NNA_DMA_IN:
+ ifs.read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
+
+ layer->visa_opcode = 0x02;
+ snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_IN");
+ layer->dram_read_bytes = nna_dma.src_addr_end - nna_dma.src_addr_start;
+ layer->sram_write_bytes = nna_dma.dest_addr_end - nna_dma.dest_addr_start;
+ break;
+ case TRIV2PROF_BLOCKID_NNA_DMA_OUT:
+ ifs.read ((char *) &nna_dma, sizeof (T2PF_DUMP_DMA));
+
+ layer->visa_opcode = 0x03;
+ snprintf (layer->name, NPU_OPNAME_MAX, "%s", "ADMA_OUT");
+ layer->dram_write_bytes = nna_dma.dest_addr_end - nna_dma.dest_addr_start;
+ layer->sram_read_bytes = nna_dma.src_addr_end - nna_dma.src_addr_start;
+ break;
+ case TRIV2PROF_BLOCKID_DSP:
+ ifs.read ((char *) &dsp, sizeof (T2PF_DUMP_DSP));
+
+ layer->visa_opcode = dsp.opcode;
+ snprintf (layer->name, NPU_OPNAME_MAX, "%s", dsp.op_name);
+ break;
+ case TRIV2PROF_BLOCKID_DSP_DMA_IN:
+ ifs.read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
+
+ layer->visa_opcode = 0x40;
+ snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_IN");
+ layer->dram_read_bytes = dsp_dma.src_addr_end - dsp_dma.src_addr_start;
+ layer->sram_write_bytes = dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
+ break;
+ case TRIV2PROF_BLOCKID_DSP_DMA_OUT:
+ ifs.read ((char *) &dsp_dma, sizeof (T2PF_DUMP_DMA));
+
+ layer->visa_opcode = 0x41;
+ snprintf (layer->name, NPU_OPNAME_MAX, "%s", "PDMA_OUT");
+ layer->dram_write_bytes = dsp_dma.dest_addr_end - dsp_dma.dest_addr_start;
+ layer->sram_read_bytes = dsp_dma.src_addr_end - dsp_dma.src_addr_start;
+ break;
+ default:
+ logerr (TAG, "Unknown block id detected: %u", common.block_id);
+ delete[] profile->layers;
+ profile->layers = nullptr;
+ ifs.close ();
+ return -EINVAL;
+ }
+ }
+ }
+
+ ifs.close ();
return 0;
}
EXPECT_NE (writeNPU_log (NPU_LOG_ERROR, "TEST", nullptr), 0);
}
+/**
+ * @brief test profile APIs (legacy)
+ */
+TEST (ne_libnpuhost_test, profile_apis_legacy) {
+ UtilTRIV2 tester;
+
+ ASSERT_EQ (tester.init (), 0);
+
+ std::string model_dir (BASIC_TEST_MODEL);
+ uint32_t model_id = 0;
+
+ /* skip if the test model is not available */
+ if (tester.loadModel (model_dir, &model_id) != 0)
+ return;
+
+ int req_id = tester.run (model_id, true);
+ ASSERT_GT (req_id, 0);
+
+ const uint32_t max_iters = 10;
+ for (uint32_t i = 0; i < max_iters; i++) {
+ npu_profile profile = {0};
+
+ /* get profile data */
+ EXPECT_EQ (tester.getProfile (req_id, &profile), 0);
+ EXPECT_GT (profile.num_layers, 0);
+
+ putNPU_profile (&profile);
+ }
+}
+
+/**
+ * @brief test profile APIs (decoupled)
+ */
+TEST (ne_libnpuhost_test, profile_apis_decoupled) {
+ npudev_h dev;
+ uint32_t modelid;
+ generic_buffer model;
+ std::string model_dir (BASIC_TEST_MODEL);
+ std::string model_path = model_dir + "/model.tvn";
+
+ /* skip if the test model is not available */
+ if (!create_test_model (model_path.c_str (), &model))
+ return;
+
+ ASSERT_EQ (getNPUdeviceByTypeAny (&dev, NPUCOND_TRIV2_CONN_SOCIP, 2), 0);
+ ASSERT_EQ (registerNPUmodel (dev, &model, &modelid), 0);
+
+ /* tensor input/output data */
+ generic_buffers input, output = {0};
+ std::string input_path = model_dir + "/input_fmap_0.bin";
+ std::string output_path = model_dir + "/output_fmap_0.bin";
+
+ input.num_buffers = 1;
+ input.bufs[0].size = get_file_size (input_path.c_str ());
+ input.bufs[0].filepath = input_path.c_str ();
+ input.bufs[0].type = BUFFER_FILE;
+
+ /* tensor data info */
+ tensors_data_info info_in, info_out;
+
+ info_in.num_info = 1;
+ info_in.info[0].layout = DATA_LAYOUT_NHWC;
+ info_in.info[0].type = DATA_TYPE_QASYMM8;
+
+ info_out.num_info = 1;
+ info_out.info[0].layout = DATA_LAYOUT_NHWC;
+ info_out.info[0].type = DATA_TYPE_QASYMM8;
+
+ /* misc (optional) */
+ npu_constraint constraint;
+
+ constraint.timeout_ms = 1000;
+ constraint.priority = NPU_PRIORITY_MID;
+ constraint.notimode = NPU_INTERRUPT;
+
+ /* create request */
+ const uint32_t max_iters = 10;
+ int req_id;
+
+ ASSERT_EQ (createNPU_request (dev, modelid, &req_id), 0);
+ ASSERT_EQ (setNPU_requestData (dev, req_id, &input, &info_in, &output, &info_out), 0);
+ ASSERT_EQ (setNPU_requestMode (dev, req_id, NPU_INFER_BLOCKING), 0);
+ ASSERT_EQ (setNPU_requestConstraint (dev, req_id, constraint), 0);
+ ASSERT_EQ (submitNPU_request (dev, req_id), 0);
+
+ for (uint32_t i = 0; i < max_iters; i++) {
+ npu_profile profile = {0};
+
+ /* get profile data */
+ EXPECT_EQ (getNPU_profile (dev, req_id, &profile), 0);
+ EXPECT_GT (profile.num_layers, 0);
+
+ putNPU_profile (&profile);
+ }
+
+ ASSERT_EQ (removeNPU_request (dev, req_id), 0);
+
+ for (uint32_t i = 0; i < max_iters; i++) {
+ npu_profile profile = {0};
+
+ /* unable to get profile data after removing the request */
+ EXPECT_NE (getNPU_profile (dev, req_id, &profile), 0);
+ EXPECT_EQ (profile.num_layers, 0);
+
+ putNPU_profile (&profile);
+ }
+
+ ASSERT_EQ (unregisterNPUmodel (dev, modelid), 0);
+
+ putNPUdevice (dev);
+}
+
+/**
+ * @brief test profile APIs (decoupled) with error handling
+ */
+TEST (ne_libnpuhost_test, profile_apis_decoupled_n) {
+ npudev_h dev;
+ uint32_t modelid;
+ generic_buffer model;
+ std::string model_dir (BASIC_TEST_MODEL);
+ std::string model_path = model_dir + "/model.tvn";
+
+ /* skip if the test model is not available */
+ if (!create_test_model (model_path.c_str (), &model))
+ return;
+
+ ASSERT_EQ (getNPUdeviceByTypeAny (&dev, NPUCOND_TRIV2_CONN_SOCIP, 2), 0);
+ ASSERT_EQ (registerNPUmodel (dev, &model, &modelid), 0);
+
+ /* create request */
+ npu_profile profile;
+ int req_id;
+
+ ASSERT_EQ (createNPU_request (dev, modelid, &req_id), 0);
+
+ /* unable to get profile data if a request is not submitted yet */
+ EXPECT_NE (getNPU_profile (dev, req_id, &profile), 0);
+
+ ASSERT_EQ (removeNPU_request (dev, req_id), 0);
+ ASSERT_EQ (unregisterNPUmodel (dev, modelid), 0);
+
+ putNPUdevice (dev);
+}
+
/**
* @brief main function for unit test
*/