} npu_priority;
/**
+ * @brief Description of output notification mode for NPU inference requests
+ * @note in case of high priority apps, this mode is ignored. Output is handled by
+ * third-party hardware.
+ */
+typedef enum {
+ NPU_INTERRUPT = 0, /**< interrupt: moderate latency but save CPU usage (default) */
+ NPU_POLLING = 1, /**< polling: consume CPU usage but achieve low latency */
+} npu_notimode;
+
+/**
* @brief Operable modes of NPU when the inputs are from NPU's own hardware.
* @note this mode will decide which input service performs the inference of a model.
*/
typedef struct {
uint32_t timeout_ms;
npu_priority priority;
+ npu_notimode notimode;
/** @todo add more */
} npuConstraint;
static const uint32_t default_timeout = 3000;
static const npu_priority default_priority = NPU_PRIORITY_MID;
+static const npu_notimode default_notimode = NPU_INTERRUPT;
/**
* @brief [OPTIONAL] Set the inference constraint for next NPU inferences
input_config.timeout_ms = constraint.timeout_ms;
input_config.priority = constraint.priority;
+ /**
+ * @todo: let's remove hw_input/hw_output. instead use input_mode, output_mode
+ * input_mode will be cpu or hw. output_mode will be interrupt, polling, or hw.
+ * also, we need to define enum for input/output mode in kernel header.
+ */
+
+ /** input handling by CPU. host inputservice only supports CPU mode */
+ // input_config.input_mode = TRINITY_INPUT_CPU;
+
+ /** output handling by CPU, host inputservice only supports either interrupt or polling */
+ if (constraint.notimode == NPU_POLLING) {
+ // input_config.output_mode = TRINITY_OUTPUT_CPU_POLLING;
+ } else { /** default mode is interrupt */
+ // input_config.output_mode = TRINITY_OUTPUT_CPU_INTERRUPT;
+ }
+
/** run the inference with the input */
state = api->runInput (&input_config);
if (state != 0)
/** set default values */
constraint_.timeout_ms = default_timeout;
constraint_.priority = default_priority;
+ constraint_.notimode = default_notimode;
in_.num_info = 1;
in_.info[0].layout = DATA_LAYOUT_SRNPU;