std::cout << "\tInference\n";
std::cout << "\t-m model_path : inference model path\n";
std::cout << "\t-f # of inference : number of inference\n";
+ std::cout << "\t-c # of async req : number of requests with non_blocking option\n";
+}
+
+static void
+cb (output_buffers* output, int req_id, void* data) {
+ test_sync* sync = static_cast<test_sync*> (data);
+ std::unique_lock<std::mutex> lock (sync->m);
+ sync->end++;
+ sync->cv.notify_one ();
+}
+
+generic_buffers*
+StressTester::createBuffer (bool isInput) {
+ generic_buffers* buffers = new generic_buffers;
+
+ std::vector<std::string>* path;
+ std::string prefix;
+
+ if (isInput) {
+ buffers->num_buffers = meta_->input_seg_num;
+ path = &inpath_;
+ prefix = "input";
+ } else {
+ buffers->num_buffers = meta_->output_seg_num;
+ path = &outpath_;
+ prefix = "output";
+ }
+
+ if (NPUBIN_VERSION (meta_->magiccode) <= 1) {
+ path->resize (1);
+ path->at (0) = model_dir_ + "/" + prefix + "_fmap.bin";
+ buffers->bufs[0].filepath = path->at (0).c_str ();
+ buffers->bufs[0].size = get_file_size (path->at (0).c_str ());
+ buffers->bufs[0].type = isInput ? BUFFER_FILE : BUFFER_MAPPED;
+ } else {
+ path->resize (buffers->num_buffers);
+ for (uint32_t i = 0; i < buffers->num_buffers; i++) {
+ path->at (i) = model_dir_ + "/" + prefix + "_fmap_" + std::to_string (i) + ".bin";
+ buffers->bufs[i].filepath = path->at (i).c_str ();
+ buffers->bufs[i].size = get_file_size (path->at (i).c_str ());
+ buffers->bufs[i].type = isInput ? BUFFER_FILE : BUFFER_MAPPED;
+ }
+ }
+
+ for (uint32_t i = 0; i < buffers->num_buffers; i++) {
+ if (buffers->bufs[i].size == 0) {
+ std::cerr << "Failed to find " << prefix << "_fmap" << std::endl;
+ goto clear;
+ }
+ }
+
+ return buffers;
+
+clear:
+ delete buffers;
+ return nullptr;
+}
+
+int
+StressTester::addRequest () {
+ int status, req_id;
+ input_buffers* input_buffer;
+ output_buffers* output_buffer;
+
+ input_buffer = createBuffer (true);
+ output_buffer = createBuffer (false);
+
+ if (input_buffer == nullptr || output_buffer == nullptr) {
+ std::cerr << "Failed to create buffers" << std::endl;
+ return -EINVAL;
+ }
+
+ status = allocNPU_genericBuffers (dev_, output_buffer);
+ if (status < 0) {
+ std::cerr << "Failed to alloc output buffers : " << status << std::endl;
+ return -EINVAL;
+ }
+
+ status = createNPU_request (dev_, model_id_, &req_id);
+ if (status < 0) {
+ std::cerr << "Failed to create request : " << status << std::endl;
+ goto clean_output;
+ }
+
+ status = setNPU_requestData (dev_, req_id, input_buffer, NULL, output_buffer, NULL);
+ if (status < 0) {
+ std::cerr << "Failed to set input data : " << status << std::endl;
+ goto remove_request;
+ }
+
+ if (async_mode_) {
+ status = setNPU_requestCallback (dev_, req_id, cb, &sync);
+ status = setNPU_requestMode (dev_, req_id, NPU_INFER_NON_BLOCKING);
+ }
+
+ inputs_.push_back (input_buffer);
+ outputs_.push_back (output_buffer);
+ req_ids_.push_back (req_id);
+
+ return 0;
+
+remove_request:
+ removeNPU_request (dev_, req_id);
+clean_output:
+ cleanNPU_genericBuffers (dev_, output_buffer);
+
+ return status;
}
int
return status;
}
- input_.num_buffers = meta_->input_seg_num;
- if (NPUBIN_VERSION (meta_->magiccode) <= 1) {
- inpath_.resize (1);
- inpath_[0] = model_dir_ + "/input_fmap.bin";
- input_.bufs[0].filepath = inpath_[0].c_str ();
- input_.bufs[0].size = get_file_size (inpath_[0].c_str ());
- input_.bufs[0].type = BUFFER_FILE;
- } else {
- inpath_.resize (input_.num_buffers);
- for (uint32_t idx = 0; idx < input_.num_buffers; idx++) {
- inpath_[idx] = model_dir_ + "/input_fmap_" + std::to_string (idx) + ".bin";
- input_.bufs[idx].filepath = inpath_[idx].c_str ();
- input_.bufs[idx].size = get_file_size (inpath_[idx].c_str ());
- input_.bufs[idx].type = BUFFER_FILE;
- }
- }
-
- for (uint32_t idx = 0; idx < input_.num_buffers; idx++) {
- if (input_.bufs[idx].size == 0) {
- std::cerr << "Failed to find input_fmap" << std::endl;
- return -ENOENT;
- }
- }
-
- output_.num_buffers = meta_->output_seg_num;
- if (NPUBIN_VERSION (meta_->magiccode) <= 1) {
- outpath_.resize (1);
- outpath_[0] = model_dir_ + "/output_fmap.bin";
- output_.bufs[0].filepath = outpath_[0].c_str ();
- output_.bufs[0].size = get_file_size (outpath_[0].c_str ());
- output_.bufs[0].type = BUFFER_MAPPED;
- } else {
- outpath_.resize (output_.num_buffers);
- for (uint32_t idx = 0; idx < output_.num_buffers; idx++) {
- outpath_[idx] = model_dir_ + "/output_fmap_" + std::to_string (idx) + ".bin";
- output_.bufs[idx].filepath = outpath_[idx].c_str ();
- output_.bufs[idx].size = get_file_size (outpath_[idx].c_str ());
- output_.bufs[idx].type = BUFFER_MAPPED;
- }
- }
-
- for (uint32_t idx = 0; idx < output_.num_buffers; idx++) {
- if (output_.bufs[idx].size == 0) {
- std::cerr << "Failed to find output_fmap" << std::endl;
- return -ENOENT;
+ for (int i = 0; i < req_num_; i++) {
+ status = addRequest ();
+ if (status < 0) {
+ std::cerr << "Failed to add request" << std::endl;
+ break;
}
}
- status = allocNPU_genericBuffers (dev_, &output_);
- if (status < 0) {
- std::cerr << "Failed to alloc output buffers : " << status << std::endl;
- return status;
- }
-
- status = createNPU_request (dev_, model_id_, &req_id_);
- if (status < 0) {
- std::cerr << "Failed to create request : " << status << std::endl;
- return status;
- }
-
- status = setNPU_requestData (dev_, req_id_, &input_, NULL, &output_, NULL);
- if (status < 0) {
- std::cerr << "Failed to set input data : " << status << std::endl;
- return status;
- }
return 0;
}
return 0;
start = clock ();
+ std::cerr << "runInference: " << model_dir_ << std::endl;
+
+ for (int i = 0; i < infer_num_; i++) {
+ if (async_mode_) {
+ sync.end = 0;
+ for (int j = 0; j < req_num_; j++) {
+ status = submitNPU_request (dev_, req_ids_[j]);
+ if (status < 0) {
+ std::cerr << "Failed to submit request : " << status << std::endl;
+ return status;
+ } else
+ std::unique_lock<std::mutex> lock (sync.m);
+ }
+
+ std::unique_lock<std::mutex> lock (sync.m);
+ sync.cv.wait (lock, [&] () { return sync.end == req_num_; });
+ } else {
+ status = submitNPU_request (dev_, req_ids_[0]);
+ if (status < 0) {
+ std::cerr << "Failed to submit request : " << status << std::endl;
+ return status;
+ }
+ }
- std::cout << "runInference: " << model_dir_ << std::endl;
- for (int j = 0; j < infer_num_; j++) {
- status = submitNPU_request (dev_, req_id_);
- if (status < 0) {
- std::cerr << "Failed to submit request : " << status << std::endl;
- return status;
+ success = true;
+
+ for (int j = 0; j < req_num_; j++) {
+ for (int k = 0; k < outputs_[j]->num_buffers; k++) {
+ const char* path = outpath_[k].c_str ();
+ char* buf = static_cast<char*> (outputs_[j]->bufs[k].addr);
+ size_t size = outputs_[j]->bufs[k].size;
+
+ if (compare_data (path, buf, size) != 0)
+ success = false;
+ }
}
- }
- success = true;
- for (int j = 0; j < output_.num_buffers; j++) {
- const char* path = outpath_[j].c_str ();
- char* buf = static_cast<char*> (output_.bufs[j].addr);
- size_t size = output_.bufs[j].size;
-
- if (compare_data (path, buf, size) != 0)
- success = false;
- }
- if (!success) {
- return -1;
+ if (!success)
+ return -1;
}
if (print_time_) {
optind = 0;
opterr = 0;
- while ((c = getopt (argc, argv, "i:s:a:rm:f:pth")) != -1) {
+ while ((c = getopt (argc, argv, "i:s:a:rm:f:pthc:")) != -1) {
switch (c) {
case 'i':
setIterNum (optarg);
case 't':
setPrintTime (true);
break;
+ case 'c':
+ setAsyncNum (optarg);
+ break;
case '?':
- if (optopt == 's' || optopt == 'a' || optopt == 'i' || optopt == 'm' || optopt == 'f')
- std::cerr << "Option 's','a','i','m' or 'f' requires an extra argument\n";
+ if (optopt == 's' || optopt == 'a' || optopt == 'i' || optopt == 'm' || optopt == 'f' ||
+ optopt == 'c')
+ std::cerr << "Option 's','a','i','m','f' or 'c' requires an extra argument\n";
else
std::cerr << "Unknown flag: " << c << std::endl;
return -1;