From 7da6231113301b30db3b34fb29d27e3a0cfdc913 Mon Sep 17 00:00:00 2001 From: Dongju Chae Date: Mon, 14 Jun 2021 17:45:31 +0900 Subject: [PATCH] [API] Add decoupled inference APIs (request submission) This patch adds decoupled inference APIs to create, configure, and submit a request. But, this PR only contains their prototype yet which is TBU. Signed-off-by: Dongju Chae --- include/common/typedef.h | 11 ++++- include/host/libnpuhost.h | 79 ++++++++++++++++++++++++++++++++++- src/host/ne-host.cc | 102 ++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 190 insertions(+), 2 deletions(-) diff --git a/include/common/typedef.h b/include/common/typedef.h index c39a039..2664038 100644 --- a/include/common/typedef.h +++ b/include/common/typedef.h @@ -218,7 +218,10 @@ typedef struct { uint32_t timeout_ms; npu_priority priority; npu_notimode notimode; -} npuConstraint; +} npu_constraint; + +/* deprecated, for backward-compatibility */ +#define npuConstraint npu_constraint static const uint32_t default_timeout = 3000; static const npu_priority default_priority = NPU_PRIORITY_MID; @@ -409,4 +412,10 @@ typedef enum { NPU_LOG_END, } npu_loglevel; +typedef struct { + uint32_t task_handle; + uint32_t subtask_idx; + /* TODO: Add more if needed */ +} npumgr_param; + #endif /* NPU_TYPEDEF_H__ */ diff --git a/include/host/libnpuhost.h b/include/host/libnpuhost.h index 8188513..28ebf38 100644 --- a/include/host/libnpuhost.h +++ b/include/host/libnpuhost.h @@ -188,7 +188,7 @@ int setNPU_dataInfo (npudev_h dev, uint32_t model_id, * @note If this function is not called, default values are used. */ int setNPU_constraint (npudev_h dev, uint32_t model_id, - npuConstraint constraint); + npu_constraint constraint); /** * @brief Execute inference. @@ -488,6 +488,83 @@ int writeNPU_log (npu_loglevel level, const char *tag, const char *format, ...); #define writeNPU_logError(tag, format, ...) \ writeNPU_log (NPU_LOG_ERROR, tag, format, ##__VA_ARGS__) +/** NPU Request/Submit Interface (decoupled version for runNPU_* APIs) */ + +/** + * @brief Create NPU inferance request + * @param[in] dev The NPU device handle + * @param[in] model_id The model to be inferred. + * @param[out] req_id The ID of created request + * @return 0 if no error. Otherwise a negative errno + * @note the created request is not submitted until runNPU_request is called + */ +int createNPU_request (npudev_h dev, uint32_t model_id, int *req_id); + +/** + * @brief Set request's input/output data + * @param[in] dev The NPU device handle + * @param[in] req_id The request ID + * @param[in] input The input data buffers + * @param[in] in_info The input data info (format, type) + * @param[in] output The output data buffers + * @param[in] out_info The output data info (format, type) + * @return 0 if no error. Otherwise a negative errno + * @note The data and its data info are user-expected ones. When data format/type are + * different from the model-assumed ones, npu-engine performs data manipulation + * internally (e.g., NHWC <-> TRIV2). + */ +int setNPU_requestData (npudev_h dev, int req_id, input_buffers *input, + tensors_data_info *in_info, output_buffers *output, + tensors_data_info *out_info); + +/** + * @brief Set output callback of the request + * @param[in] dev The NPU device handle + * @param[in] req_id The request ID + * @param[in] cb The output callback handler + * @param[in] [nullable] data The data to pass to callback handler + * @return 0 if no error. Otherwise a negative errno + */ +int setNPU_requestCallback (npudev_h dev, int req_id, npuOutputNotify cb, + void *data); + +/** + * @brief Set the request's inference mode + * @param[in] dev The NPU device handle + * @param[in] req_id The request ID + * @param[in] mode Configures how this inference works. + * @return 0 if no error. Otherwise a negative errno + */ +int setNPU_requestMode (npudev_h dev, int req_id, npu_infer_mode mode); + +/** + * @brief [OPTIONAL] Set the request's inference mode + * @param[in] dev The NPU device handle + * @param[in] req_id The request ID + * @param[in] constraint inference constraint (e.g., timeout, priority) + * @return 0 if no error. Otherwise a negative errno + * @note if this is not called, the default values are used (see typedef.h). + */ +int setNPU_requestConstraint (npudev_h dev, int req_id, + npu_constraint constraint); + +/** + * @brief [OPTIONAL] Set the request's VD NPU manager parameter + * @param[in] dev The NPU device handle + * @param[in] req_id The request ID + * @param[in] param npumgr parameter + * @return 0 if no error. Otherwise a negative errno + */ +int setNPU_requestNpumgrParam (npudev_h dev, int req_id, npumgr_param param); + +/** + * @brief Submit the request to the NPU + * @param[in] dev The NPU device handle + * @param[in] req_id The request ID + * @return 0 if no error. Otherwise a negative errno + */ +int submitNPU_request (npudev_h dev, int req_id); + #if defined(__cplusplus) } #endif diff --git a/src/host/ne-host.cc b/src/host/ne-host.cc index 768fe3a..1092fbd 100644 --- a/src/host/ne-host.cc +++ b/src/host/ne-host.cc @@ -725,3 +725,105 @@ writeNPU_log (npu_loglevel level, const char *tag, const char *format, ...) { return ret; } + +/** + * @brief Create NPU inferance request + * @param[in] dev The NPU device handle + * @param[in] model_id The model to be inferred. + * @param[out] req_id The ID of created request + * @return 0 if no error. Otherwise a negative errno + * @note the created request is not submitted until runNPU_request is called + */ +int +createNPU_request (npudev_h dev, uint32_t model_id, int *req_id) { + /* NYI */ + return -EPERM; +} + +/** + * @brief Set request's input/output data + * @param[in] dev The NPU device handle + * @param[in] req_id The request ID + * @param[in] input The input data buffers + * @param[in] in_info The input data info (format, type) + * @param[in] output The output data buffers + * @param[in] out_info The output data info (format, type) + * @return 0 if no error. Otherwise a negative errno + * @note The data and its data info are user-expected ones. When data format/type are + * different from the model-assumed ones, npu-engine performs data manipulation + * internally (e.g., NHWC <-> TRIV2). + */ +int +setNPU_requestData (npudev_h dev, int req_id, input_buffers *input, + tensors_data_info *in_info, output_buffers *output, + tensors_data_info *out_info) { + /* NYI */ + return -EPERM; +} + +/** + * @brief Set output callback of the request + * @param[in] dev The NPU device handle + * @param[in] req_id The request ID + * @param[in] cb The output callback handler + * @param[in] [nullable] data The data to pass to callback handler + * @return 0 if no error. Otherwise a negative errno + */ +int +setNPU_requestCallback (npudev_h dev, int req_id, npuOutputNotify cb, + void *data) { + /* NYI */ + return -EPERM; +} + +/** + * @brief Set the request's inference mode + * @param[in] dev The NPU device handle + * @param[in] req_id The request ID + * @param[in] mode Configures how this inference works. + * @return 0 if no error. Otherwise a negative errno + */ +int +setNPU_requestMode (npudev_h dev, int req_id, npu_infer_mode mode) { + /* NYI */ + return -EPERM; +} + +/** + * @brief [OPTIONAL] Set the request's inference mode + * @param[in] dev The NPU device handle + * @param[in] req_id The request ID + * @param[in] constraint inference constraint (e.g., timeout, priority) + * @return 0 if no error. Otherwise a negative errno + * @note if this is not called, the default values are used (see typedef.h). + */ +int +setNPU_requestConstraint (npudev_h dev, int req_id, npu_constraint constraint) { + /* NYI */ + return -EPERM; +} + +/** + * @brief [OPTIONAL] Set the request's VD NPU manager parameter + * @param[in] dev The NPU device handle + * @param[in] req_id The request ID + * @param[in] param npumgr parameter + * @return 0 if no error. Otherwise a negative errno + */ +int +setNPU_requestNpumgrParam (npudev_h dev, int req_id, npumgr_param param) { + /* NYI */ + return -EPERM; +} + +/** + * @brief Submit the request to the NPU + * @param[in] dev The NPU device handle + * @param[in] req_id The request ID + * @return 0 if no error. Otherwise a negative errno + */ +int +submitNPU_request (npudev_h dev, int req_id) { + /* NYI */ + return -EPERM; +} -- 2.7.4