[API/Priority] Add the API to set timeout/priority per model

author Dongju Chae <dongju.chae@samsung.com>

Tue, 25 Feb 2020 11:07:48 +0000 (20:07 +0900)

committer 송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>

Thu, 5 Mar 2020 06:48:15 +0000 (15:48 +0900)
author Dongju Chae <dongju.chae@samsung.com>
Tue, 25 Feb 2020 11:07:48 +0000 (20:07 +0900)
committer 송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>
Thu, 5 Mar 2020 06:48:15 +0000 (15:48 +0900)
diff --git a/include/common/typedef.h b/include/common/typedef.h

index 135a99b..36ebbd9 100644 (file)
--- a/include/common/typedef.h
+++ b/include/common/typedef.h
@@ -70,6 +70,18 @@ typedef enum {
  } data_type;
  
  /**
+ * @brief Description of priority for NPU inference requests
+ * @details NPU Engine currently supports three priorities; low, mid, and high.
+ *          requests with higher priority are always handled preferentially than
+ *          other requests. FIFO is used among the same priority requests.
+ */
+typedef enum {
+  NPU_PRIORITY_LOW = 0,   /**< Low priority: tasks could be delayed or canceled */
+  NPU_PRIORITY_MID = 1,   /**< Mid priority: tasks could be slightly delayed */
+  NPU_PRIORITY_HIGH = 2,  /**< High priority: tasks should be issued immediately */
+} npu_priority;
+
+/**
   * @brief Operable modes of NPU when the inputs are from NPU's own hardware.
   */
  typedef enum {
diff --git a/include/host/libnpuhost.h b/include/host/libnpuhost.h

index d115fe4..03b34b2 100644 (file)
--- a/include/host/libnpuhost.h
+++ b/include/host/libnpuhost.h
@@ -219,18 +219,26 @@ int setNPU_dataInfo(npudev_h dev, uint32_t modelid,
      tensors_data_info *info_in, tensors_data_info *info_out);
  
  /**
- * @brief [OPTIONAL] Set the timeout for next NPU inferences
+ * @brief Constraints for NPU inferences (per model)
+ */
+typedef struct {
+  uint32_t timeout_ms;
+  npu_priority priority;
+  /** @todo add more */
+} npuConstraint;
+
+#define DEFAULT_TIMEOUT (1000)  /**< default timeout, 1000ms */
+#define DEFAULT_PRIORITY (NPU_PRIORITY_MID)
+
+/**
+ * @brief [OPTIONAL] Set the inference constraint for next NPU inferences
   * @param[in] dev The NPU device handle
   * @param[in] modelid The target model id
- * @param[in] timeout_ms The timeout of inferences (in ms)
+ * @param[in] constraint inference constraint (e.g., timeout, priority)
   * @return @c 0 if no error. otherwise a negative error value
- * @note If this function is not called, default timeout will be used.
- *       Also, the timeout does not include the time to run the task,
- *       but just the time to schedule the task on the device.
- *       In case of the zero timeout, the next inferences are regarded as
- *       time-critical tasks, which will preempt on-going and pending ones.
+ * @note If this function is not called, default values are used.
   */
-int setNPU_timeout(npudev_h dev, uint32_t modelid, uint32_t timeout_ms);
+int setNPU_constraint(npudev_h dev, uint32_t modelid, npuConstraint constraint);
  
  /**
   * @brief Execute inference. Wait (block) until the output is available.
diff --git a/src/core/ip/plugin-comm-ip.c b/src/core/ip/plugin-comm-ip.c

index 5d85d5c..4968736 100644 (file)
--- a/src/core/ip/plugin-comm-ip.c
+++ b/src/core/ip/plugin-comm-ip.c
@@ -55,7 +55,6 @@
  #define DEV_NAME_SIZE 128
  #define DEV_MAX_OFFSET 16
  #define DEFAULT_FORCE_STOP true
-#define DEFAULT_TIMEOUT 1000  /**< default timeout, 1000ms */
  
  #define MAX_NUM_MODELS 1
  
@@ -71,7 +70,7 @@ typedef struct {
    uint64_t model_version;     /**< model version returned from registration */
    tensors_data_info info_in;  /**< layout/type info of input tensors */
    tensors_data_info info_out; /**< layout/type info of output tensors */
-  uint32_t timeout_ms;        /**< timeout of next inferences (in ms) */
+  npuConstraint constraint;   /**< constraints for inferences */
    npuOutputNotify cb;         /**< callback after finish running this model */
    void *cb_data;              /**< callback data */
  } model_private;
@@ -473,7 +472,9 @@ int registerNPUmodel(npudev_h dev, generic_buffer *model, uint32_t *modelid)
    }
  
    /** set default values for the model */
-  priv_model->timeout_ms = DEFAULT_TIMEOUT;
+  priv_model->constraint.timeout_ms = DEFAULT_TIMEOUT;
+  priv_model->constraint.priority = DEFAULT_PRIORITY;
+
    priv_model->info_in.num_info = 1;
    priv_model->info_in.info[0].layout = DATA_LAYOUT_SRNPU;
    priv_model->info_in.info[0].type = DATA_TYPE_SRNPU;
@@ -616,15 +617,14 @@ int setNPU_dataInfo(npudev_h dev, uint32_t modelid,
  }
  
  /**
- * @brief Set the timeout for next NPU inferences
+ * @brief [OPTIONAL] Set the inference constraint for next NPU inferences
   * @param[in] dev The NPU device handle
   * @param[in] modelid The target model id
- * @param[in] timeout_ms The timeout of inferences (in ms)
+ * @param[in] constraint inference constraint (e.g., timeout, priority)
   * @return @c 0 if no error. otherwise a negative error value
- * @note if timeout is zero, the next inferences are regarded as
- *       time-critical tasks, which will preempt on-going and pending ones.
+ * @note If this function is not called, default values are used.
   */
-int setNPU_timeout(npudev_h dev, uint32_t modelid, uint32_t timeout_ms)
+int setNPU_constraint(npudev_h dev, uint32_t modelid, npuConstraint constraint)
  {
    npu_device *npu_dev;
  
@@ -638,7 +638,8 @@ int setNPU_timeout(npudev_h dev, uint32_t modelid, uint32_t timeout_ms)
  
    DEVICE_LOCK();
  
-  npu_dev->models[modelid]->timeout_ms = timeout_ms;
+  memcpy (&npu_dev->models[modelid]->constraint,
+      &constraint, sizeof (npuConstraint));
  
    DEVICE_UNLOCK();
  
@@ -821,6 +822,7 @@ static int runNPU_async_util(npudev_h dev, uint32_t modelid, const input_buffers
  {
    buffer *buffer_ptr = NULL;
    npu_device *npu_dev = dev;
+  npu_priority priority;
    npubin_meta meta;
    dev_type type;
    int err = 0;
@@ -834,7 +836,8 @@ static int runNPU_async_util(npudev_h dev, uint32_t modelid, const input_buffers
    if (input->num_buffers == 0)
      return -EINVAL;
  
-  timestamp = get_timestamp(npu_dev->models[modelid]->timeout_ms);
+  timestamp = get_timestamp(npu_dev->models[modelid]->constraint.timeout_ms);
+  priority = npu_dev->models[modelid]->constraint.priority;
  
    type = npu_dev->device_type & DEVICETYPE_MASK;
    if (type == DEVICETYPE_ASR) {
@@ -886,7 +889,7 @@ static int runNPU_async_util(npudev_h dev, uint32_t modelid, const input_buffers
    DEVICE_UNLOCK();
  
    if ((err = libnpupriv.host_handle->validateBuffer (buffer_ptr,
-          timestamp)) < 0) {
+          priority, timestamp)) < 0) {
      logerr (TAG, "Error validating buffer, errno: %d\n", err);
      goto out_unlock;
    }
diff --git a/src/core/ne-comm.h b/src/core/ne-comm.h

index 584506a..417fde5 100644 (file)
--- a/src/core/ne-comm.h
+++ b/src/core/ne-comm.h
@@ -123,12 +123,13 @@ typedef struct {
    /**
     * @brief The buffer is filled and valid for inference. Start when ready.
     * @param[in] buffer The buffer with input data filled.
+   * @param[in] priority The priority of this inference
     * @param[in] timestamp The timeout timestamp
     * @return 0 if ok. errno if error.
     *
     * @note after validation, it's no longer accessible because it was returned.
     */
-  int (*validateBuffer)(buffer *buffer, uint64_t timestamp);
+  int (*validateBuffer)(buffer *buffer, npu_priority priority, uint64_t timestamp);
  
    /**
     * @brief get the next output buffer
diff --git a/src/core/ne-handler.c b/src/core/ne-handler.c

index 4fc06ad..4250e2d 100644 (file)
--- a/src/core/ne-handler.c
+++ b/src/core/ne-handler.c
@@ -1197,13 +1197,14 @@ handler_extract_output_buffer (const npubin_meta *meta, const tensors_data_info
  /**
   * @brief The buffer is filled and valid for inference. Start when ready.
   * @param[in] buffer The buffer with input data filled.
+ * @param[in] priority The priority of this npu request
   * @param[in] timestamp The timeout timestamp
   * @return 0 if ok. errno if error.
   *
   * @note after validation, it's no longer accessible because it was returned.
   */
  static int
-handler_validate_buffer (buffer *buffer, uint64_t timestamp)
+handler_validate_buffer (buffer *buffer, npu_priority priority, uint64_t timestamp)
  {
    int err;
  
@@ -1223,7 +1224,7 @@ handler_validate_buffer (buffer *buffer, uint64_t timestamp)
      return err;
    }
  
-  return n3_dataReady (timestamp);
+  return n3_dataReady (priority, timestamp);
  }
  
  /**
diff --git a/src/core/ne-host-input-service.c b/src/core/ne-host-input-service.c

index 15e7fd2..12c5bd3 100644 (file)
--- a/src/core/ne-host-input-service.c
+++ b/src/core/ne-host-input-service.c
@@ -101,6 +101,7 @@ typedef struct {
    thread_priv proc_thread;    /**< main looping thread struct */
    thread_priv cb_thread;      /**< cb looping thread struct */
  
+  npu_priority priority;      /**< the inference priority */
    uint64_t timestamp;         /**< timestamp of inference timeout (ms) */
  } host_inservice_priv;
  
@@ -268,23 +269,27 @@ static void* loopHostService (void *data)
  
      if (npriv->opmode == NPUINPUT_HOST) {
        uint64_t cur_timestamp = get_timestamp (0);
-
        /** check timeout in advance */
        if (cur_timestamp > npriv->timestamp) {
-        /** we can skip to call npu_run_input() */
-        logwarn (TAG, "Timeout! The output contains invalid values. Try again\n");
+        /** @todo how to handle in timeout? */
+        logwarn (TAG, "Timeout! This fails to meet the timeout requirment, "
+            "but still work without any errors\n");
+        input_config.timeout_ms = DEFAULT_TIMEOUT;
        } else {
          input_config.timeout_ms = npriv->timestamp - cur_timestamp;
-
-        /** start the device with set values, this is blocking call */
-        status = npu_run_input(npriv->fd, &input_config);
-        if (status == -ETIMEDOUT) {
-          logwarn (TAG, "Timeout! The output contains invalid values. Try again\n");
-        } else if (status < 0) {
-          logerr (TAG, "Running the device failed, errno: %d\n", status);
-          GET_MEM()->reset_buffer (buffer_for_cb);
-          goto error_lock_exit;
-        }
+      }
+      /** @todo enable this after kernel is revised */
+      // input_config.priority = npriv->priority;
+
+      /** start the device with set values, this is blocking call */
+      status = npu_run_input(npriv->fd, &input_config);
+      if (status == -ETIMEDOUT) {
+        logwarn (TAG, "Timeout! This fails to meet the timeout requirment, "
+            "but still work without any errors\n");
+      } else if (status < 0) {
+        logerr (TAG, "Running the device failed, errno: %d\n", status);
+        GET_MEM()->reset_buffer (buffer_for_cb);
+        goto error_lock_exit;
        }
      } else {
        /** wait for the device to finish, device enabled from iCAM/iMIC */
@@ -624,7 +629,8 @@ static int startHost (inputservice *me)
  /**
   * @brief move to the next input data
   */
-static int nextHost (inputservice *me, buffer *buffer, uint64_t timestamp)
+static int nextHost (inputservice *me, buffer *buffer,
+    npu_priority priority, uint64_t timestamp)
  {
    host_inservice_priv *npriv;
  
@@ -639,6 +645,7 @@ static int nextHost (inputservice *me, buffer *buffer, uint64_t timestamp)
  
    npriv->proc_thread.buffer = buffer;
    npriv->proc_thread.data_ready = 1;
+  npriv->priority = priority;
    npriv->timestamp = timestamp;
  
    pthread_cond_broadcast(&npriv->proc_thread.cond);
diff --git a/src/core/ne-inf.c b/src/core/ne-inf.c

index 3807b95..5b8348a 100644 (file)
--- a/src/core/ne-inf.c
+++ b/src/core/ne-inf.c
@@ -246,7 +246,7 @@ out:
  }
  
  /** @brief Allows to enter host input data. For more detail, refer to the header */
-int n4_dataReady(uint64_t timestamp)
+int n4_dataReady(npu_priority priority, uint64_t timestamp)
  {
    inputservice *n4x;
    n40_status status;
@@ -281,7 +281,7 @@ int n4_dataReady(uint64_t timestamp)
    }
  
    if (n4_priv.stopped != 1) {
-    n4x->next(n4x, buffer, timestamp);
+    n4x->next(n4x, buffer, priority, timestamp);
    } else {
      /**
       * return the current buffer (if any) to output before exit
diff --git a/src/core/ne-inf.h b/src/core/ne-inf.h

index bbdea3b..08c32b4 100644 (file)
--- a/src/core/ne-inf.h
+++ b/src/core/ne-inf.h
@@ -61,10 +61,11 @@ extern int n4_configure(model_opmode op, model *m, output_ready cb, void *cb_dat
  
  /**
   * @brief Allows to enter host input data.
+ * @param[in] priority The priority of this npu request
   * @param[in] timestamp The timeout timestamp
   * @return 0 if success, otherwise negative error numbers.
   */
-extern int n4_dataReady(uint64_t timestamp);
+extern int n4_dataReady(npu_priority priority, uint64_t timestamp);
  
  /**
   * @brief get the next I/O buffer dedicated to the requsted role.
diff --git a/src/core/ne-inputservice.h b/src/core/ne-inputservice.h

index 0e55f04..ac5dc39 100644 (file)
--- a/src/core/ne-inputservice.h
+++ b/src/core/ne-inputservice.h
@@ -68,7 +68,7 @@ struct _inputservice {
    n40_status (*getStatus) (inputservice *me, model_opmode opmode);
  
    int (*start) (inputservice *me);
-  int (*next) (inputservice *me, buffer *buffer, uint64_t timestamp);
+  int (*next) (inputservice *me, buffer *buffer, npu_priority priority, uint64_t timestamp);
      /**< N4C may call getNextBuffer() and move to the next buffer for input after next has returned.
           buffer will be updated before calling this. */
  };
diff --git a/src/core/ne-scheduler.c b/src/core/ne-scheduler.c

index 6ef07ff..08473f3 100644 (file)
--- a/src/core/ne-scheduler.c
+++ b/src/core/ne-scheduler.c
@@ -202,10 +202,11 @@ int n3_setOpMode(npu_input_opmode op, int force, model *model,
  
  /**
   * @brief Notify the input buffer is ready for inference.
+ * @param[in] priority The priority of this npu request
   * @param[in] timestamp The timeout for this inference (in ms)
   * @return @c 0 if success. otherwise, -ERRNO.
   */
-int n3_dataReady(uint64_t timestamp)
+int n3_dataReady(npu_priority priority, uint64_t timestamp)
  {
    if (spriv.input_opmode == SMODEL_OPS_END) {
      logerr (TAG, "No input service yet\n");
@@ -217,7 +218,7 @@ int n3_dataReady(uint64_t timestamp)
      return -EPERM;
    }
  
-  return n4_dataReady (timestamp);
+  return n4_dataReady (priority, timestamp);
  }
  
  /**
diff --git a/src/core/ne-scheduler.h b/src/core/ne-scheduler.h

index ebe5d7a..77c77aa 100644 (file)
--- a/src/core/ne-scheduler.h
+++ b/src/core/ne-scheduler.h
@@ -40,10 +40,11 @@ int n3_setOpMode(npu_input_opmode op, int force, model *model, output_ready cb,
  
  /**
   * @brief Notify the input buffer is ready for inference.
+ * @param[in] priority The priority of this npu request
   * @param[in] timestamp The timeout timestamp
   * @return @c 0 if success. otherwise, -ERRNO.
   */
-int n3_dataReady(uint64_t timestamp);
+int n3_dataReady(npu_priority priority, uint64_t timestamp);
  
  /**
   * @brief get the next I/O buffer dedicated to the requsted role.
diff --git a/tests/unittests/ne_core_handler_test.cpp b/tests/unittests/ne_core_handler_test.cpp

index 0553356..c0e1eb3 100644 (file)
--- a/tests/unittests/ne_core_handler_test.cpp
+++ b/tests/unittests/ne_core_handler_test.cpp
@@ -25,7 +25,6 @@ extern "C"
    #include <npubinfmt.h>
  }
  
-#define DEFAULT_TIMEOUT (1000)
  /** configuration to fill the model meta */
  #define MAGICCODE (NPUBIN_MAGICCODE | 0x1)
  
@@ -178,7 +177,8 @@ TEST (ne_core_handler_test, start_stop_operation)
    GET_MEM()->resize_buffers(NPUBIN_META_SIZE * 2);
    buffer_ptr = host_handle->getCurrentInputBuffer(NPUASYNC_WAIT, &err);
    EXPECT_EQ (err, 0);
-  EXPECT_EQ (host_handle->validateBuffer(buffer_ptr, get_timestamp(DEFAULT_TIMEOUT)), 0);
+  EXPECT_EQ (host_handle->validateBuffer(buffer_ptr, DEFAULT_PRIORITY,
+        get_timestamp(DEFAULT_TIMEOUT)), 0);
    sleep (2);
    /** stop the model1 */
    EXPECT_EQ (host_handle->setOpMode(NPUINPUT_STOP, true, id[1], version[1],
diff --git a/tests/unittests/ne_core_inf_test.cpp b/tests/unittests/ne_core_inf_test.cpp

index b194ee2..80d56d0 100644 (file)
--- a/tests/unittests/ne_core_inf_test.cpp
+++ b/tests/unittests/ne_core_inf_test.cpp
@@ -24,7 +24,6 @@ extern "C"
    #include <libnpuhost.h>
  }
  
-#define DEFAULT_TIMEOUT (1000)
  /** configuration to fill the model meta */
  #define MAGICCODE (NPUBIN_MAGICCODE | 0x1)
  #define STOP_SLEEP 1
@@ -126,14 +125,14 @@ TEST (ne_core_inf_test, validation_without_data)
    /** calling other functions without configure */
    EXPECT_LT (n4_start (SMODEL_OPS_NPU), 0);
    EXPECT_LT (n4_stop (SMODEL_OPS_NPU, STOP_PREEMPT), 0);
-  EXPECT_LT (n4_dataReady(get_timestamp(DEFAULT_TIMEOUT)), 0);
+  EXPECT_LT (n4_dataReady(DEFAULT_PRIORITY, get_timestamp(DEFAULT_TIMEOUT)), 0);
  
    /** correct configuration */
    EXPECT_EQ (n4_configure (SMODEL_OPS_NPU, model_ptr, test_cb, model_ptr), 0);
  
    /** calling data ready without start */
    EXPECT_LT (n4_stop (SMODEL_OPS_NPU, STOP_PREEMPT), 0);
-  EXPECT_LT (n4_dataReady(get_timestamp(DEFAULT_TIMEOUT)), 0);
+  EXPECT_LT (n4_dataReady(DEFAULT_PRIORITY, get_timestamp(DEFAULT_TIMEOUT)), 0);
    /** starting wrong device */
    EXPECT_LT (n4_start (SMODEL_OPS_END), 0);
  
@@ -194,7 +193,7 @@ TEST (ne_core_inf_test, validation_with_data)
    EXPECT_EQ (err, 0);
    EXPECT_EQ (GET_MEM()->return_buffer(buffer_ptr), 0);
    /** call n4_dataReady() here */
-  EXPECT_EQ (n4_dataReady(get_timestamp(DEFAULT_TIMEOUT)), 0);
+  EXPECT_EQ (n4_dataReady(DEFAULT_PRIORITY, get_timestamp(DEFAULT_TIMEOUT)), 0);
    /** let the completion callback be called */
    sleep(SLEEP_DURATION);
    /** correct stopping device */
@@ -210,7 +209,7 @@ TEST (ne_core_inf_test, validation_with_data)
    EXPECT_EQ (err, 0);
    EXPECT_EQ (GET_MEM()->return_buffer(buffer_ptr), 0);
    /** call n4_dataReady() here */
-  EXPECT_EQ (n4_dataReady(get_timestamp(DEFAULT_TIMEOUT)), 0);
+  EXPECT_EQ (n4_dataReady(DEFAULT_PRIORITY, get_timestamp(DEFAULT_TIMEOUT)), 0);
    /** completion callback to be called */
    sleep(SLEEP_DURATION);
    /** correct stopping device */
@@ -225,7 +224,7 @@ TEST (ne_core_inf_test, validation_with_data)
    buffer_ptr = GET_MEM()->get_next_buffer(NPUASYNC_WAIT, BUFFER_ROLE_INPUT, &err);
    EXPECT_EQ (GET_MEM()->return_buffer(buffer_ptr), 0);
    /** call n4_dataReady() here */
-  EXPECT_EQ (n4_dataReady(get_timestamp(DEFAULT_TIMEOUT)), 0);
+  EXPECT_EQ (n4_dataReady(DEFAULT_PRIORITY, get_timestamp(DEFAULT_TIMEOUT)), 0);
    /** wait for the device to stop and completion callback to be called */
    sleep(SLEEP_DURATION);
    /** correct stopping device */
@@ -272,7 +271,7 @@ TEST (ne_core_inf_test, multiple_inputs)
      EXPECT_EQ (err, 0);
      EXPECT_EQ (GET_MEM()->return_buffer(buffer_ptr), 0);
      /** call n4_dataReady() here */
-    EXPECT_EQ (n4_dataReady(get_timestamp(DEFAULT_TIMEOUT)), 0);
+    EXPECT_EQ (n4_dataReady(DEFAULT_PRIORITY, get_timestamp(DEFAULT_TIMEOUT)), 0);
    }
  
    pthread_mutex_lock (&mutex);
author	Dongju Chae <dongju.chae@samsung.com>
	Tue, 25 Feb 2020 11:07:48 +0000 (20:07 +0900)
committer	송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>
	Thu, 5 Mar 2020 06:48:15 +0000 (15:48 +0900)
include/common/typedef.h		patch \| blob \| history
include/host/libnpuhost.h		patch \| blob \| history
src/core/ip/plugin-comm-ip.c		patch \| blob \| history
src/core/ne-comm.h		patch \| blob \| history
src/core/ne-handler.c		patch \| blob \| history
src/core/ne-host-input-service.c		patch \| blob \| history
src/core/ne-inf.c		patch \| blob \| history
src/core/ne-inf.h		patch \| blob \| history
src/core/ne-inputservice.h		patch \| blob \| history
src/core/ne-scheduler.c		patch \| blob \| history
src/core/ne-scheduler.h		patch \| blob \| history
tests/unittests/ne_core_handler_test.cpp		patch \| blob \| history
tests/unittests/ne_core_inf_test.cpp		patch \| blob \| history