* @brief Example code to use libnpuhost.h with 2-TOPs TRIV2 device
* @author Dongju Chae <dongju.chae@samsung.com>
* @bug No known bugs except for NYI items
+ *
+ * Usage:
+ * $ apptest_tvn_triv2_example [.tvn file path]
*/
#include <libnpuhost.h>
+#include <string.h>
+
#include <iostream>
#include <fstream>
#define NPU_TYPE NPUCOND_TRIV2_CONN_SOCIP
-#define MODEL_NAME "model.tvn"
-#define INPUT_NAME "input_fmap_0.bin"
using namespace std;
-ifstream::pos_type
-filesize (string filename) {
- ifstream in (filename, ifstream::ate | ifstream::binary);
- return in.tellg ();
-}
-
/** @brief apptest main */
int
main (int argc, char **argv) {
+ npudev_h dev;
+ input_buffers input;
+ output_buffers output;
+ npubin_meta *meta;
+ generic_buffer modelfile;
+ const char *modelpath;
+ uint32_t modelid;
+ int req_id, ret;
+
if (argc != 2) {
- cerr << "No model directory provided. Skip this test\n";
+ cerr << "The model path was not provided. Skip this test\n";
return 0;
}
- string dir = argv[1];
-
/**
* 1) Open a NPU device using getNPUdeviceByTypeAny().
*
* Or, You can specify NPU affinity using getNPUdeviceByType().
*/
- npudev_h dev;
- if (getNPUdeviceByTypeAny (&dev, NPU_TYPE, 2 /** NPU TOPS */) < 0) {
- cerr << "Fail to open the NPU device\n";
- return -1;
+ ret = getNPUdeviceByTypeAny (&dev, NPU_TYPE, 2 /** NPU TOPS */);
+ if (ret < 0) {
+ cerr << "Failed to open the NPU device: " << ret << "\n";
+ return ret;
}
/**
- * 2) Register the .tvn model using registerNPUmodel().
+ * 2) Get model metadata using getNPUmodel_metadata().
*/
- string modelpath = dir + "/" + MODEL_NAME;
+ modelpath = argv[1];
+ meta = getNPUmodel_metadata (modelpath, false);
+ if (meta == nullptr) {
+ cerr << "Failed to find the valid model from " << modelpath << "\n";
+ ret = -ENOENT;
+ goto free_device;
+ }
- generic_buffer modelfile;
+ /**
+ * 3) Register the .tvn model using registerNPUmodel().
+ */
modelfile.type = BUFFER_FILE;
- modelfile.filepath = modelpath.c_str ();
- modelfile.size = filesize (modelpath);
+ modelfile.filepath = modelpath;
+ modelfile.size = meta->size;
- uint32_t modelid;
- if (registerNPUmodel (dev, &modelfile, &modelid) < 0) {
- cerr << "Fail to register model\n";
- putNPUdevice (dev);
- return -1;
+ ret = registerNPUmodel (dev, &modelfile, &modelid);
+ if (ret < 0) {
+ cerr << "Failed to register model: " << ret << "\n";
+ goto free_meta;
}
/**
- * 3) Set the data format using setNPU_dataInfo().
- *
- * Even if the format/type is not matched to the model,
- * it's internally manipulated in runtime but losing performance.
- * If you wanna know the exact layout and format, parse the model's
- * metadata using getNPUmodel_metadata().
+ * 4) Create request using createNPU_request().
*/
- tensors_data_info in, out;
-
- /* here, we assume a single input/output tensor */
- in.num_info = 1;
- in.info[0].layout = DATA_LAYOUT_MODEL; /* model-expected layout */
- in.info[0].type = DATA_TYPE_MODEL; /* model-expected type */
- out.num_info = 1;
- out.info[0].layout = DATA_LAYOUT_MODEL; /* model-expected layout */
- out.info[0].type = DATA_TYPE_MODEL; /* model-expected type */
-
- if (setNPU_dataInfo (dev, modelid, &in, &out) < 0) {
- cerr << "Fail to set data info\n";
- unregisterNPUmodel (dev, modelid);
- putNPUdevice (dev);
- return -1;
+ ret = createNPU_request (dev, modelid, &req_id);
+ if (ret < 0) {
+ cerr << "Failed to create request: " << ret << "\n";
+ goto unregister_model;
}
/**
- * 4) Optional) Set any constraint for next inferences
+ * 5) Prepare input data. It may need to be quantized.
+ *
+ * e.g., for the first input tensor,
+ * input quant zeropoint: meta->input_seg_quant_z[0]
+ * input quant scale: meta->input_seg_quant_s[0]
*/
- npuConstraint constraint;
- constraint.timeout_ms = 5000;
- constraint.priority = NPU_PRIORITY_MID;
- constraint.notimode = NPU_INTERRUPT;
-
- if (setNPU_constraint (dev, modelid, constraint) < 0) {
- cerr << "Fail to set constraints\n";
- unregisterNPUmodel (dev, modelid);
- putNPUdevice (dev);
- return -1;
+ input.num_buffers = meta->input_seg_num;
+ for (uint32_t i = 0; i < input.num_buffers; i++) {
+ uint32_t idx = meta->input_seg_idx[i];
+
+ input.bufs[i].type = BUFFER_MAPPED;
+ input.bufs[i].size = meta->segment_size[idx];
+ }
+
+ ret = allocNPU_inputBuffers (dev, &input);
+ if (ret < 0) {
+ cerr << "Failed to alloc input buffers: " << ret << "\n";
+ goto remove_request;
+ }
+
+ for (uint32_t i = 0; i < input.num_buffers; i++) {
+ if (input.bufs[i].addr != NULL) {
+ /* DO SOMETHING (e.g., quantization) */
+ }
}
/**
- * 5) Run inference using runNPU_sync().
- * Or, you can use runNPU_async() using a callback.
+ * 6) Set input and output information for the request.
+ * Note that output buffers are internally allocated in blocking mode.
*/
- string inputpath = dir + "/" + INPUT_NAME;
-
- input_buffers input;
- input.num_buffers = 1;
- input.bufs[0].type = BUFFER_FILE;
- input.bufs[0].filepath = inputpath.c_str ();
- input.bufs[0].size = filesize (inputpath);
+ memset (&output, '\x00', sizeof (output));
+ ret = setNPU_requestData (dev, req_id, &input, NULL, &output, NULL);
+ if (ret < 0) {
+ cerr << "Failed to set input data: " << ret << "\n";
+ goto free_input;
+ }
- output_buffers output;
- if (runNPU_sync (dev, modelid, &input, &output) < 0) {
- cerr << "Fail to run inference\n";
- unregisterNPUmodel (dev, modelid);
- putNPUdevice (dev);
- return -1;
+ /**
+ * 7) Submit the request for inference.
+ * You can submit multiple times until the request is explicitly removed.
+ */
+ ret = submitNPU_request (dev, req_id);
+ if (ret < 0) {
+ cerr << "Failed to submit request: " << ret << "\n";
+ goto free_input;
}
/**
- * 6) Check the output buffer
+ * 8) Check the output buffer. It may need to be dequantized.
+ *
+ * e.g., for the first output tensor,
+ * output quant zeropoint: meta->output_seg_quant_z[0]
+ * output quant scale: meta->output_seg_quant_s[0]
*/
for (uint32_t idx = 0; idx < output.num_buffers; idx++) {
generic_buffer *buffer = &output.bufs[idx];
if (buffer->addr != NULL) {
- /* DO SOMETHING */
+ /* DO SOMETHING (e.g., dequantization) */
free (buffer->addr);
}
}
- /**
- * 7) Unregister model and clean up the instance
- */
- unregisterNPUmodel (dev, modelid);
+free_input:
+ cleanNPU_inputBuffers (dev, &input);
+
+remove_request:
+ ret = removeNPU_request (dev, req_id);
+ if (ret < 0) {
+ cerr << "Failed to remove request: " << ret << "\n";
+ }
+
+unregister_model:
+ ret = unregisterNPUmodel (dev, modelid);
+ if (ret < 0) {
+ cerr << "Failed to unregister model: " << ret << "\n";
+ }
+
+free_meta:
+ free (meta);
+free_device:
putNPUdevice (dev);
- return 0;
+ return ret;
}