This commit adds the API to set timeout/priority per model.
Note that per-request handling requires more software burdens, and API
modification, which are not desirable.
Also, we now support three types of priorities for inference requests,
HIGH, MID, and LOW.
Signed-off-by: Dongju Chae <dongju.chae@samsung.com>
} data_type;
/**
+ * @brief Description of priority for NPU inference requests
+ * @details NPU Engine currently supports three priorities; low, mid, and high.
+ * requests with higher priority are always handled preferentially than
+ * other requests. FIFO is used among the same priority requests.
+ */
+typedef enum {
+ NPU_PRIORITY_LOW = 0, /**< Low priority: tasks could be delayed or canceled */
+ NPU_PRIORITY_MID = 1, /**< Mid priority: tasks could be slightly delayed */
+ NPU_PRIORITY_HIGH = 2, /**< High priority: tasks should be issued immediately */
+} npu_priority;
+
+/**
* @brief Operable modes of NPU when the inputs are from NPU's own hardware.
*/
typedef enum {
tensors_data_info *info_in, tensors_data_info *info_out);
/**
- * @brief [OPTIONAL] Set the timeout for next NPU inferences
+ * @brief Constraints for NPU inferences (per model)
+ */
+typedef struct {
+ uint32_t timeout_ms;
+ npu_priority priority;
+ /** @todo add more */
+} npuConstraint;
+
+#define DEFAULT_TIMEOUT (1000) /**< default timeout, 1000ms */
+#define DEFAULT_PRIORITY (NPU_PRIORITY_MID)
+
+/**
+ * @brief [OPTIONAL] Set the inference constraint for next NPU inferences
* @param[in] dev The NPU device handle
* @param[in] modelid The target model id
- * @param[in] timeout_ms The timeout of inferences (in ms)
+ * @param[in] constraint inference constraint (e.g., timeout, priority)
* @return @c 0 if no error. otherwise a negative error value
- * @note If this function is not called, default timeout will be used.
- * Also, the timeout does not include the time to run the task,
- * but just the time to schedule the task on the device.
- * In case of the zero timeout, the next inferences are regarded as
- * time-critical tasks, which will preempt on-going and pending ones.
+ * @note If this function is not called, default values are used.
*/
-int setNPU_timeout(npudev_h dev, uint32_t modelid, uint32_t timeout_ms);
+int setNPU_constraint(npudev_h dev, uint32_t modelid, npuConstraint constraint);
/**
* @brief Execute inference. Wait (block) until the output is available.
#define DEV_NAME_SIZE 128
#define DEV_MAX_OFFSET 16
#define DEFAULT_FORCE_STOP true
-#define DEFAULT_TIMEOUT 1000 /**< default timeout, 1000ms */
#define MAX_NUM_MODELS 1
uint64_t model_version; /**< model version returned from registration */
tensors_data_info info_in; /**< layout/type info of input tensors */
tensors_data_info info_out; /**< layout/type info of output tensors */
- uint32_t timeout_ms; /**< timeout of next inferences (in ms) */
+ npuConstraint constraint; /**< constraints for inferences */
npuOutputNotify cb; /**< callback after finish running this model */
void *cb_data; /**< callback data */
} model_private;
}
/** set default values for the model */
- priv_model->timeout_ms = DEFAULT_TIMEOUT;
+ priv_model->constraint.timeout_ms = DEFAULT_TIMEOUT;
+ priv_model->constraint.priority = DEFAULT_PRIORITY;
+
priv_model->info_in.num_info = 1;
priv_model->info_in.info[0].layout = DATA_LAYOUT_SRNPU;
priv_model->info_in.info[0].type = DATA_TYPE_SRNPU;
}
/**
- * @brief Set the timeout for next NPU inferences
+ * @brief [OPTIONAL] Set the inference constraint for next NPU inferences
* @param[in] dev The NPU device handle
* @param[in] modelid The target model id
- * @param[in] timeout_ms The timeout of inferences (in ms)
+ * @param[in] constraint inference constraint (e.g., timeout, priority)
* @return @c 0 if no error. otherwise a negative error value
- * @note if timeout is zero, the next inferences are regarded as
- * time-critical tasks, which will preempt on-going and pending ones.
+ * @note If this function is not called, default values are used.
*/
-int setNPU_timeout(npudev_h dev, uint32_t modelid, uint32_t timeout_ms)
+int setNPU_constraint(npudev_h dev, uint32_t modelid, npuConstraint constraint)
{
npu_device *npu_dev;
DEVICE_LOCK();
- npu_dev->models[modelid]->timeout_ms = timeout_ms;
+ memcpy (&npu_dev->models[modelid]->constraint,
+ &constraint, sizeof (npuConstraint));
DEVICE_UNLOCK();
{
buffer *buffer_ptr = NULL;
npu_device *npu_dev = dev;
+ npu_priority priority;
npubin_meta meta;
dev_type type;
int err = 0;
if (input->num_buffers == 0)
return -EINVAL;
- timestamp = get_timestamp(npu_dev->models[modelid]->timeout_ms);
+ timestamp = get_timestamp(npu_dev->models[modelid]->constraint.timeout_ms);
+ priority = npu_dev->models[modelid]->constraint.priority;
type = npu_dev->device_type & DEVICETYPE_MASK;
if (type == DEVICETYPE_ASR) {
DEVICE_UNLOCK();
if ((err = libnpupriv.host_handle->validateBuffer (buffer_ptr,
- timestamp)) < 0) {
+ priority, timestamp)) < 0) {
logerr (TAG, "Error validating buffer, errno: %d\n", err);
goto out_unlock;
}
/**
* @brief The buffer is filled and valid for inference. Start when ready.
* @param[in] buffer The buffer with input data filled.
+ * @param[in] priority The priority of this inference
* @param[in] timestamp The timeout timestamp
* @return 0 if ok. errno if error.
*
* @note after validation, it's no longer accessible because it was returned.
*/
- int (*validateBuffer)(buffer *buffer, uint64_t timestamp);
+ int (*validateBuffer)(buffer *buffer, npu_priority priority, uint64_t timestamp);
/**
* @brief get the next output buffer
/**
* @brief The buffer is filled and valid for inference. Start when ready.
* @param[in] buffer The buffer with input data filled.
+ * @param[in] priority The priority of this npu request
* @param[in] timestamp The timeout timestamp
* @return 0 if ok. errno if error.
*
* @note after validation, it's no longer accessible because it was returned.
*/
static int
-handler_validate_buffer (buffer *buffer, uint64_t timestamp)
+handler_validate_buffer (buffer *buffer, npu_priority priority, uint64_t timestamp)
{
int err;
return err;
}
- return n3_dataReady (timestamp);
+ return n3_dataReady (priority, timestamp);
}
/**
thread_priv proc_thread; /**< main looping thread struct */
thread_priv cb_thread; /**< cb looping thread struct */
+ npu_priority priority; /**< the inference priority */
uint64_t timestamp; /**< timestamp of inference timeout (ms) */
} host_inservice_priv;
if (npriv->opmode == NPUINPUT_HOST) {
uint64_t cur_timestamp = get_timestamp (0);
-
/** check timeout in advance */
if (cur_timestamp > npriv->timestamp) {
- /** we can skip to call npu_run_input() */
- logwarn (TAG, "Timeout! The output contains invalid values. Try again\n");
+ /** @todo how to handle in timeout? */
+ logwarn (TAG, "Timeout! This fails to meet the timeout requirment, "
+ "but still work without any errors\n");
+ input_config.timeout_ms = DEFAULT_TIMEOUT;
} else {
input_config.timeout_ms = npriv->timestamp - cur_timestamp;
-
- /** start the device with set values, this is blocking call */
- status = npu_run_input(npriv->fd, &input_config);
- if (status == -ETIMEDOUT) {
- logwarn (TAG, "Timeout! The output contains invalid values. Try again\n");
- } else if (status < 0) {
- logerr (TAG, "Running the device failed, errno: %d\n", status);
- GET_MEM()->reset_buffer (buffer_for_cb);
- goto error_lock_exit;
- }
+ }
+ /** @todo enable this after kernel is revised */
+ // input_config.priority = npriv->priority;
+
+ /** start the device with set values, this is blocking call */
+ status = npu_run_input(npriv->fd, &input_config);
+ if (status == -ETIMEDOUT) {
+ logwarn (TAG, "Timeout! This fails to meet the timeout requirment, "
+ "but still work without any errors\n");
+ } else if (status < 0) {
+ logerr (TAG, "Running the device failed, errno: %d\n", status);
+ GET_MEM()->reset_buffer (buffer_for_cb);
+ goto error_lock_exit;
}
} else {
/** wait for the device to finish, device enabled from iCAM/iMIC */
/**
* @brief move to the next input data
*/
-static int nextHost (inputservice *me, buffer *buffer, uint64_t timestamp)
+static int nextHost (inputservice *me, buffer *buffer,
+ npu_priority priority, uint64_t timestamp)
{
host_inservice_priv *npriv;
npriv->proc_thread.buffer = buffer;
npriv->proc_thread.data_ready = 1;
+ npriv->priority = priority;
npriv->timestamp = timestamp;
pthread_cond_broadcast(&npriv->proc_thread.cond);
}
/** @brief Allows to enter host input data. For more detail, refer to the header */
-int n4_dataReady(uint64_t timestamp)
+int n4_dataReady(npu_priority priority, uint64_t timestamp)
{
inputservice *n4x;
n40_status status;
}
if (n4_priv.stopped != 1) {
- n4x->next(n4x, buffer, timestamp);
+ n4x->next(n4x, buffer, priority, timestamp);
} else {
/**
* return the current buffer (if any) to output before exit
/**
* @brief Allows to enter host input data.
+ * @param[in] priority The priority of this npu request
* @param[in] timestamp The timeout timestamp
* @return 0 if success, otherwise negative error numbers.
*/
-extern int n4_dataReady(uint64_t timestamp);
+extern int n4_dataReady(npu_priority priority, uint64_t timestamp);
/**
* @brief get the next I/O buffer dedicated to the requsted role.
n40_status (*getStatus) (inputservice *me, model_opmode opmode);
int (*start) (inputservice *me);
- int (*next) (inputservice *me, buffer *buffer, uint64_t timestamp);
+ int (*next) (inputservice *me, buffer *buffer, npu_priority priority, uint64_t timestamp);
/**< N4C may call getNextBuffer() and move to the next buffer for input after next has returned.
buffer will be updated before calling this. */
};
/**
* @brief Notify the input buffer is ready for inference.
+ * @param[in] priority The priority of this npu request
* @param[in] timestamp The timeout for this inference (in ms)
* @return @c 0 if success. otherwise, -ERRNO.
*/
-int n3_dataReady(uint64_t timestamp)
+int n3_dataReady(npu_priority priority, uint64_t timestamp)
{
if (spriv.input_opmode == SMODEL_OPS_END) {
logerr (TAG, "No input service yet\n");
return -EPERM;
}
- return n4_dataReady (timestamp);
+ return n4_dataReady (priority, timestamp);
}
/**
/**
* @brief Notify the input buffer is ready for inference.
+ * @param[in] priority The priority of this npu request
* @param[in] timestamp The timeout timestamp
* @return @c 0 if success. otherwise, -ERRNO.
*/
-int n3_dataReady(uint64_t timestamp);
+int n3_dataReady(npu_priority priority, uint64_t timestamp);
/**
* @brief get the next I/O buffer dedicated to the requsted role.
#include <npubinfmt.h>
}
-#define DEFAULT_TIMEOUT (1000)
/** configuration to fill the model meta */
#define MAGICCODE (NPUBIN_MAGICCODE | 0x1)
GET_MEM()->resize_buffers(NPUBIN_META_SIZE * 2);
buffer_ptr = host_handle->getCurrentInputBuffer(NPUASYNC_WAIT, &err);
EXPECT_EQ (err, 0);
- EXPECT_EQ (host_handle->validateBuffer(buffer_ptr, get_timestamp(DEFAULT_TIMEOUT)), 0);
+ EXPECT_EQ (host_handle->validateBuffer(buffer_ptr, DEFAULT_PRIORITY,
+ get_timestamp(DEFAULT_TIMEOUT)), 0);
sleep (2);
/** stop the model1 */
EXPECT_EQ (host_handle->setOpMode(NPUINPUT_STOP, true, id[1], version[1],
#include <libnpuhost.h>
}
-#define DEFAULT_TIMEOUT (1000)
/** configuration to fill the model meta */
#define MAGICCODE (NPUBIN_MAGICCODE | 0x1)
#define STOP_SLEEP 1
/** calling other functions without configure */
EXPECT_LT (n4_start (SMODEL_OPS_NPU), 0);
EXPECT_LT (n4_stop (SMODEL_OPS_NPU, STOP_PREEMPT), 0);
- EXPECT_LT (n4_dataReady(get_timestamp(DEFAULT_TIMEOUT)), 0);
+ EXPECT_LT (n4_dataReady(DEFAULT_PRIORITY, get_timestamp(DEFAULT_TIMEOUT)), 0);
/** correct configuration */
EXPECT_EQ (n4_configure (SMODEL_OPS_NPU, model_ptr, test_cb, model_ptr), 0);
/** calling data ready without start */
EXPECT_LT (n4_stop (SMODEL_OPS_NPU, STOP_PREEMPT), 0);
- EXPECT_LT (n4_dataReady(get_timestamp(DEFAULT_TIMEOUT)), 0);
+ EXPECT_LT (n4_dataReady(DEFAULT_PRIORITY, get_timestamp(DEFAULT_TIMEOUT)), 0);
/** starting wrong device */
EXPECT_LT (n4_start (SMODEL_OPS_END), 0);
EXPECT_EQ (err, 0);
EXPECT_EQ (GET_MEM()->return_buffer(buffer_ptr), 0);
/** call n4_dataReady() here */
- EXPECT_EQ (n4_dataReady(get_timestamp(DEFAULT_TIMEOUT)), 0);
+ EXPECT_EQ (n4_dataReady(DEFAULT_PRIORITY, get_timestamp(DEFAULT_TIMEOUT)), 0);
/** let the completion callback be called */
sleep(SLEEP_DURATION);
/** correct stopping device */
EXPECT_EQ (err, 0);
EXPECT_EQ (GET_MEM()->return_buffer(buffer_ptr), 0);
/** call n4_dataReady() here */
- EXPECT_EQ (n4_dataReady(get_timestamp(DEFAULT_TIMEOUT)), 0);
+ EXPECT_EQ (n4_dataReady(DEFAULT_PRIORITY, get_timestamp(DEFAULT_TIMEOUT)), 0);
/** completion callback to be called */
sleep(SLEEP_DURATION);
/** correct stopping device */
buffer_ptr = GET_MEM()->get_next_buffer(NPUASYNC_WAIT, BUFFER_ROLE_INPUT, &err);
EXPECT_EQ (GET_MEM()->return_buffer(buffer_ptr), 0);
/** call n4_dataReady() here */
- EXPECT_EQ (n4_dataReady(get_timestamp(DEFAULT_TIMEOUT)), 0);
+ EXPECT_EQ (n4_dataReady(DEFAULT_PRIORITY, get_timestamp(DEFAULT_TIMEOUT)), 0);
/** wait for the device to stop and completion callback to be called */
sleep(SLEEP_DURATION);
/** correct stopping device */
EXPECT_EQ (err, 0);
EXPECT_EQ (GET_MEM()->return_buffer(buffer_ptr), 0);
/** call n4_dataReady() here */
- EXPECT_EQ (n4_dataReady(get_timestamp(DEFAULT_TIMEOUT)), 0);
+ EXPECT_EQ (n4_dataReady(DEFAULT_PRIORITY, get_timestamp(DEFAULT_TIMEOUT)), 0);
}
pthread_mutex_lock (&mutex);