[API/RunInternal] Implement run_internal (HW recurring) API

author Dongju Chae <dongju.chae@samsung.com>

Wed, 22 Jul 2020 05:40:54 +0000 (14:40 +0900)

committer 송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>

Mon, 27 Jul 2020 02:30:02 +0000 (11:30 +0900)
author Dongju Chae <dongju.chae@samsung.com>
Wed, 22 Jul 2020 05:40:54 +0000 (14:40 +0900)
committer 송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>
Mon, 27 Jul 2020 02:30:02 +0000 (11:30 +0900)
diff --git a/include/host/libnpuhost.h b/include/host/libnpuhost.h

index 9868d88..f390e7c 100644 (file)
--- a/include/host/libnpuhost.h
+++ b/include/host/libnpuhost.h
@@ -243,13 +243,20 @@ typedef void (*npuOutputNotify) (output_buffers *output, uint64_t sequence,
   * @param[in] dev The NPU device handle
   * @param[in] modelid The model to be inferred.
   * @param[in] opmode NPU has different opmode with auto-inputs. Choose one.
- * @param[in] cb The output buffer handler.
- * @param[in] data Internal data to be given to the callback, cb.
+ * @param[in] input The input buffer where input data comes.
+ * @param[in] output The output buffer where output data is filled.
+ * @return @c 0 or positive id if no error. otherwise a negative error value
+ */
+int runNPU_internalInput(npudev_h dev, uint32_t modelid, npu_input_opmode opmode,
+    const input_buffers *input, const output_buffers *output);
+
+/**
+ * @brief Stop the request with the given id
+ * @param[in] dev The NPU device handle
+ * @param[in] id The request id
   * @return @c 0 if no error. otherwise a negative error value
   */
-int runNPU_internalInput(npudev_h dev, uint32_t modelid,
-    npu_input_opmode opmode, npuOutputNotify cb,
-    void *data);
+int stopNPU_internalInput(npudev_h dev, int id);
  
  typedef enum {
    NPUASYNC_DROP_OLD, /**< If there is an unprocessed input data
diff --git a/src/core/ne-handler.cc b/src/core/ne-handler.cc

index 7e983a0..962e9b9 100644 (file)
--- a/src/core/ne-handler.cc
+++ b/src/core/ne-handler.cc
@@ -191,6 +191,36 @@ int runNPU_async(npudev_h dev, uint32_t modelid, const input_buffers *input,
  }
  
  /**
+ * @brief Let NPU accept input frames from its internal source continuously
+ * @param[in] dev The NPU device handle
+ * @param[in] modelid The model to be inferred.
+ * @param[in] opmode NPU has different opmode with auto-inputs. Choose one.
+ * @param[in] input The input buffer where input data comes.
+ * @param[in] output The output buffer where output data is filled.
+ * @return @c 0 if no error. otherwise a negative error value
+ */
+int runNPU_internalInput(npudev_h dev, uint32_t modelid, npu_input_opmode opmode,
+    const input_buffers *input, const output_buffers *output)
+{
+  INIT_HOST_HANDLER (host_handler, dev);
+
+  return host_handler->runInternal (modelid, opmode, input, output);
+}
+
+/**
+ * @brief Stop the request with the given id
+ * @param[in] dev The NPU device handle
+ * @param[in] id The request id
+ * @return @c 0 if no error. otherwise a negative error value
+ */
+int stopNPU_internalInput(npudev_h dev, int id)
+{
+  INIT_HOST_HANDLER (host_handler, dev);
+
+  return host_handler->stopInternal (id);
+}
+
+/**
   * @brief Allocate a generic buffer with the requested buffer type.
   * @param[in] dev The NPU device handle
   * @param[in/out] Buffer the buffer pointer where memory is allocated.
@@ -610,6 +640,55 @@ HostHandler::runAsync (uint32_t modelid, const input_buffers *input,
  }
  
  /**
+ * @brief Let NPU accept input frames from its internal source continuously
+ * @param[in] modelid The model to be inferred.
+ * @param[in] opmode NPU has different opmode with auto-inputs. Choose one.
+ * @param[in] input The input buffer where input data comes.
+ * @param[in] output The output buffer where output data is filled.
+ * @return @c 0 if no error. otherwise a negative error value
+ */
+int
+HostHandler::runInternal (uint32_t modelid, npu_input_opmode opmode,
+    const input_buffers *input, const output_buffers *output)
+{
+  Model *model = nullptr;
+
+  if (device_->needModel()) {
+    model = getModel (modelid);
+    if (model == nullptr)
+      return -ENOENT;
+  }
+
+  /* check the given model before running */
+  if (!model->finalize ()) {
+    logerr (TAG, "Failed to finalize the model. Please see the log messages\n");
+    return -EINVAL;
+  }
+
+  return device_->runInternal (opmode, model, input, output);
+}
+
+/**
+ * @brief Stop the request with the given id
+ * @param[in] dev The NPU device handle
+ * @param[in] id The request id
+ * @return @c 0 if no error. otherwise a negative error value
+ */
+int
+HostHandler::stopInternal (int id)
+{
+  if (id <= 0) {
+    logerr (TAG, "Unable to stop this request with id (%d)\n", id);
+    return -EINVAL;
+  }
+
+  const DriverAPI * api = device_->getDriverAPI ();
+  assert (api != nullptr);
+
+  return api->stop_target (id);
+}
+
+/**
   * @brief get number of available devices
   * @param[in] type device type
   * @return number of devices
@@ -1237,10 +1316,12 @@ TrinityVision::callback (Request *req, npuOutputNotify cb, void *cb_data)
   * @brief extract the segment table instance from input generic buffers
   * @param[in] model the model instance
   * @param[in] input the input generic buffers
+ * @param[in] output the output generic buffers
   * @return the segment table instance
   */
  SegmentTable *
-TrinityVision2::prepareSegmentTable (const Model *model, const input_buffers *input)
+TrinityVision2::prepareSegmentTable (const Model *model, const input_buffers *input,
+    const output_buffers *output)
  {
    if (model == nullptr || input == nullptr) {
      logerr (TAG, "Invalid arguments provided\n");
@@ -1261,7 +1342,7 @@ TrinityVision2::prepareSegmentTable (const Model *model, const input_buffers *in
      goto delete_segt;
    }
  
-  status = segt->createSegments (model, input);
+  status = segt->createSegments (model, input, output);
    if (status != 0) {
      logerr (TAG, "Failed to create segments: %d\n", status);
      goto delete_segt;
@@ -1423,7 +1504,7 @@ TrinityVision2::run (npu_input_opmode opmode, const Model *model,
      return -EPERM;
    }
  
-  if (opmode != NPUINPUT_HOST && opmode != NPUINPUT_HW_RECURRING)
+  if (opmode != NPUINPUT_HOST)
      return -EINVAL;
  
    /** this device uses segment table */
@@ -1469,6 +1550,33 @@ TrinityVision2::run (npu_input_opmode opmode, const Model *model,
    return scheduler_->submitRequest (req);
  }
  
+/** @brief implementation of TRIV2's runInternal() */
+int
+TrinityVision2::runInternal (npu_input_opmode opmode, const Model *model,
+    const input_buffers *input, const output_buffers *output)
+{
+  if (!initialized ()) {
+    logerr (TAG, "Uninitialized device; should use libnpuhost APIs\n");
+    return -EPERM;
+  }
+
+  if (opmode != NPUINPUT_HW_RECURRING)
+    return -EINVAL;
+
+  /** this device uses segment table */
+  SegmentTable * segt = prepareSegmentTable (model, input, output);
+  if (segt == nullptr) {
+    logerr (TAG, "Failed to create segment table instance\n");
+    return -EINVAL;
+  }
+
+  Request *req = new Request (opmode);
+  req->setModel (model);
+  req->setSegmentTable (segt);
+
+  return scheduler_->submitRequest (req);
+}
+
  /** @brief callback of TRIV2 request */
  void
  TrinityVision2::callback (Request *req, npuOutputNotify cb, void *cb_data)
diff --git a/src/core/ne-handler.h b/src/core/ne-handler.h

index 3970d30..e08d356 100644 (file)
--- a/src/core/ne-handler.h
+++ b/src/core/ne-handler.h
@@ -51,6 +51,9 @@ class HostHandler {
      int runAsync (uint32_t modelid, const input_buffers *input,
          npuOutputNotify cb = nullptr, void *cb_data = nullptr,
          npu_async_mode mode = NPUASYNC_WAIT, uint64_t *sequence = nullptr);
+    int runInternal (uint32_t modelid, npu_input_opmode opmode,
+        const input_buffers *input, const output_buffers *output);
+    int stopInternal (int id);
  
      /** @brief get statistics */
      int getMemoryStatus (size_t *alloc_total, size_t *free_total);
@@ -110,6 +113,9 @@ class Device {
          const input_buffers *input, npuOutputNotify cb = nullptr,
          void *cb_data = nullptr, uint64_t *sequence = nullptr) = 0;
  
+    virtual int runInternal (npu_input_opmode opmode, const Model *model,
+        const input_buffers *input, const output_buffers *output) { return -EPERM; }
+
    protected:
      /** the device instance has ownership of all related components */
      std::unique_ptr<DriverAPI>    api_;       /**< device api */
@@ -165,13 +171,16 @@ class TrinityVision2 : public Device {
      static size_t manipulateData (const Model *model, uint32_t idx, bool is_input,
          void *dst, void *src, size_t size);
  
-    SegmentTable * prepareSegmentTable (const Model *model, const input_buffers *input);
+    SegmentTable * prepareSegmentTable (const Model *model, const input_buffers *input,
+        const output_buffers *output = nullptr);
  
      int setModel (const generic_buffer *model, Model ** model_ptr);
      int unsetModel (Model * model);
      int run (npu_input_opmode opmode, const Model *model,
          const input_buffers *input, npuOutputNotify cb = nullptr,
          void *cb_data = nullptr, uint64_t *sequence = nullptr);
+    int runInternal (npu_input_opmode opmode, const Model *model,
+        const input_buffers *input, const output_buffers *output);
  
    private:
      void callback (Request *req, npuOutputNotify cb, void *cb_data);
diff --git a/src/core/ne-host-input-service.cc b/src/core/ne-host-input-service.cc

index 4e24b1b..e95b911 100644 (file)
--- a/src/core/ne-host-input-service.cc
+++ b/src/core/ne-host-input-service.cc
@@ -132,7 +132,7 @@ HostInputService::invoke_buffer (const DriverAPI *api, const Model *model,
  
    /** run the inference with the input */
    ret = api->runInput (&input_config);
-  if (ret != 0 && ret != -ECANCELED)
+  if (ret < 0 && ret != -ECANCELED)
      logerr (TAG, "Failed to run the NPU inference: %d\n", ret);
  
  handle_callback:
@@ -200,7 +200,7 @@ HostInputService::invoke_segt (const DriverAPI *api, const Model *model,
  
    /** run the inference with the input */
    ret = api->runInput (&input_config);
-  if (ret != 0 && ret != -ECANCELED)
+  if (ret < 0 && ret != -ECANCELED)
      logerr (TAG, "Failed to run the NPU inference: %d\n", ret);
  
  handle_callback:
diff --git a/src/core/ne-hw-input-service.cc b/src/core/ne-hw-input-service.cc

index 4407513..4dcf464 100644 (file)
--- a/src/core/ne-hw-input-service.cc
+++ b/src/core/ne-hw-input-service.cc
@@ -9,10 +9,13 @@
   * @brief Source of HW recurring input service
   * @author Dongju Chae <dongju.chae@samsung.com>
   * @bug No known bugs except for NYI items
+ * @note this input service does not use a thread pool.
   */
  
  #include "ne-inputservice.h"
  
+#define TAG _N41
+
  std::unique_ptr<HwInputService> HwInputService::instance_;
  std::once_flag HwInputService::once_flag_;
  
@@ -41,9 +44,7 @@ HwInputService::submit (const DriverAPI *api, uint32_t id,
    if (api == nullptr || model == nullptr)
      return -EINVAL;
  
-  /** TODO */
-  invoke (api, model, segt, callback);
-  return 0;
+  return invoke (api, model, segt, callback);
  }
  
  /**
@@ -52,12 +53,60 @@ HwInputService::submit (const DriverAPI *api, uint32_t id,
   * @param[in] model the target model
   * @param[in] segt the target segment table
   * @param[in] callback output callback
+ * @return 0 if no error, otherwise a negative errno
   */
-void
+int
  HwInputService::invoke (const DriverAPI *api, const Model *model,
      SegmentTable *segt, outputCallback callback)
  {
-  /** TODO */
+  input_config_t input_config;
+  device_state_t state;
+  npuConstraint constraint;
+  int ret = -EINVAL;
+
+  state = api->isReady();
+  if (state != device_state_t::STATE_READY) {
+    logerr (TAG, "device is not available to run inference %d\n", state);
+    goto handle_callback;
+  }
+
+  if (model == nullptr) {
+    logerr (TAG, "No valid model provided\n");
+    goto handle_callback;
+  }
+
+  /** consider NOP cases */
+  if (model->getProgramData() == nullptr) {
+    ret = 0;
+    goto handle_callback;
+  }
+
+  input_config.model_id = model->getInternalID();
+  if (segt != nullptr) {
+    input_config.dbuf_fd = segt->getDmabuf ();
+    input_config.num_segments = segt->getNumTotalSegments ();
+  } else {
+    /** some instructions do not require the segment table (e.g., nop) */
+    input_config.dbuf_fd = -1;
+    input_config.num_segments = 0;
+  }
+
+  /** set constraints */
+  constraint = model->getConstraint ();
+  input_config.timeout_ms = 0;  /* immediatedly handled */
+  input_config.priority = NPU_PRIORITY_HIGH;
+  input_config.input_mode = INPUT_HW;
+  input_config.output_mode = OUTPUT_HW;
+
+  /** run the inference with the input */
+  ret = api->runInput (&input_config);
+  if (ret != 0 && ret != -ECANCELED)
+    logerr (TAG, "Failed to run the NPU inference: %d\n", ret);
+
+handle_callback:
+  /** should call the callback regardless of failure, to avoid deadlock */
    if (callback != nullptr)
      callback ();
+
+  return ret;
  }
diff --git a/src/core/ne-inputservice.h b/src/core/ne-inputservice.h

index 6802695..56080fd 100644 (file)
--- a/src/core/ne-inputservice.h
+++ b/src/core/ne-inputservice.h
@@ -76,7 +76,7 @@ class HwInputService : public InputService {
  
    private:
      /** do not allow to directly call invoke () */
-    void invoke (const DriverAPI *api, const Model *model, SegmentTable *segt,
+    int invoke (const DriverAPI *api, const Model *model, SegmentTable *segt,
          outputCallback callback);
  
      static std::unique_ptr<HwInputService> instance_;
diff --git a/src/core/ne-segment-table.cc b/src/core/ne-segment-table.cc

index 55b617e..221fee3 100644 (file)
--- a/src/core/ne-segment-table.cc
+++ b/src/core/ne-segment-table.cc
@@ -80,11 +80,13 @@ SegmentTable::setSegmentSlot (HWmem *hwmem, int slot)
   * @brief create segments according to on metadata info
   * @param[in] model the model instance
   * @param[in] input user-provided input buffers
+ * @param[in] output user-provided output buffers
   * @return 0 if no error, otherwise a negative errno
   * @note we assume that # weight segments is always 1. (fix impl when it's changed)
   */
  int
-SegmentTable::createSegments (const Model *model, const input_buffers *input)
+SegmentTable::createSegments (const Model *model,
+    const input_buffers *input, const output_buffers *output)
  {
    if (model == nullptr || input == nullptr) {
      logerr (TAG, "No model/input provided\n");
@@ -145,7 +147,8 @@ SegmentTable::createSegments (const Model *model, const input_buffers *input)
          hwmem->setSize (0);
        }
        hwmem->setOffset (0);
-    } else { /** check input segments */
+    } else {
+      /** check input segments */
        for (uint32_t j = 0; j < meta->getInputNum (); j++) {
          /** user provides a external segment */
          if (i == meta->getInputSegmentIndex (j)) {
@@ -161,6 +164,24 @@ SegmentTable::createSegments (const Model *model, const input_buffers *input)
          }
        }
  
+      /** check output segments if provided */
+      if (hwmem == nullptr && output != nullptr) {
+        for (uint32_t j = 0; j < meta->getOutputNum (); j++) {
+          /** user provides a external segment */
+          if (i == meta->getOutputSegmentIndex (j)) {
+            if (output->bufs[j].type == BUFFER_DMABUF) {
+              hwmem = new HWmem (new HWmemExternal);
+              hwmem->setDriverAPI (getDriverAPI ());
+              /** it assume that dmabuf indicates the base address of segment */
+              hwmem->setDmabuf (output->bufs[j].dmabuf);
+              hwmem->setOffset (0);
+              hwmem->setSize (size);
+              break;
+            }
+          }
+        }
+      }
+
        if (hwmem == nullptr) {
          hwmem = new HWmem (new HWmemDevice);
          hwmem->setDriverAPI (getDriverAPI ());
diff --git a/src/core/ne-segment-table.h b/src/core/ne-segment-table.h

index 6173733..d2b7685 100644 (file)
--- a/src/core/ne-segment-table.h
+++ b/src/core/ne-segment-table.h
@@ -28,7 +28,8 @@ class SegmentTable : public HWmem {
  
      /** Override alloc () and DO NOT support a variable-sized segment table */
      int alloc (size_t = 0) { return static_cast<HWmem *>(this)->alloc (PAGE_SIZE); }
-    int createSegments (const Model *model, const input_buffers *input);
+    int createSegments (const Model *model, const input_buffers *input,
+        const output_buffers *output = nullptr);
  
      HWmem *getWeightSegment (uint32_t idx = 0); /** maybe, # weight segments is 1 */
      HWmem *getInputSegment (uint32_t idx);
diff --git a/src/core/npu/NPUdrvAPI.h b/src/core/npu/NPUdrvAPI.h

index 3ec8c1c..d4f8351 100644 (file)
--- a/src/core/npu/NPUdrvAPI.h
+++ b/src/core/npu/NPUdrvAPI.h
@@ -31,6 +31,7 @@
  #include <bitset>
  #include <bits/stdc++.h>
  #include <memory>
+#include <thread>
  
  #include <sys/user.h> /* PAGE_SIZE */
  /** the size of each allocation is aligned to PAGE_SIZE */
@@ -91,6 +92,8 @@ class DriverAPI {
      virtual int runInput (input_config_t *input) const { return -EPERM; }
      /** @brief stop all requests. The stopped requests should be notified */
      virtual int stop () const { return 0; }
+    /** @brief stop the target request with the given id obtained by runInput() */
+    virtual int stop_target (int id) const { return -EPERM; }
  
      /** @brief register model config to the driver */
      virtual int registerModel (model_config_t *model) const { return -EPERM; }
@@ -189,6 +192,8 @@ class TrinityAsrAPI : public DriverAPI {
  /** @brief emulation element */
  class EmulElement;
  class EmulStat;
+class EmulTask;
+
  /** @brief Driver APIs for emulation */
  class TrinityEmulAPI : public DriverAPI {
    public:
@@ -209,6 +214,7 @@ class TrinityEmulAPI : public DriverAPI {
  
      int runInput (input_config_t *input) const;
      int stop () const;
+    int stop_target (int id) const;
  
      int registerModel (model_config_t *model) const;
      int deregisterModel (unsigned long long id) const;
@@ -220,6 +226,8 @@ class TrinityEmulAPI : public DriverAPI {
        /**< dmabuf-to-element map. to track memory allocation */
      static ThreadSafeMap<int, EmulStat> stat_map_;
        /**< devfd-to-stat map. to track memory statistics */
+    static ThreadSafeMap<int, EmulTask> task_map_;
+      /**< taskid-to-task map. to support async invoke/stop */
  
      dev_type dev_type_; /**< emulated device type */
  };
diff --git a/src/core/npu/NPUdrvAPI_emul.cc b/src/core/npu/NPUdrvAPI_emul.cc

index ec534b6..90745c8 100644 (file)
--- a/src/core/npu/NPUdrvAPI_emul.cc
+++ b/src/core/npu/NPUdrvAPI_emul.cc
@@ -23,6 +23,31 @@
  
  #define MAX_EMUL_DEVICES (100)
  
+class EmulTask {
+  public:
+    EmulTask () : stop_ (false) {}
+
+    void run_emul (char *prog, size_t prog_size, char **segt, size_t seg_num) {
+      while (!stop_)
+        run_triv2_emul (prog, prog_size, segt, seg_num);
+
+      delete [] segt;
+    }
+
+    void run (std::function<void ()> func) {
+      task_ = std::thread (func);
+    }
+
+    void stop () {
+      stop_ = true;
+      task_.join ();
+    }
+
+  private:
+    bool stop_;
+    std::thread task_;
+};
+
  /**
   * @brief memory statistics. Because driver API methods require 'const' instances,
   * we use a static variable for std::map to update statistics for each device emulation.
@@ -108,6 +133,8 @@ EmulElement::EmulElement (size_t size)
  ThreadSafeMap<int, EmulElement> TrinityEmulAPI::elem_map_;
  /** @brief devfd-to-stat map */
  ThreadSafeMap<int, EmulStat> TrinityEmulAPI::stat_map_;
+/** @biref taskid-to-task map */
+ThreadSafeMap<int, EmulTask> TrinityEmulAPI::task_map_;
  /** @brief element's global id */
  std::atomic<int> EmulElement::global_id_ (0);
  /** @brief element's global id */
@@ -348,21 +375,19 @@ TrinityEmulAPI::deregisterModel (unsigned long long id) const
  /**
   * @brief run inference with the input config
   * @param[in] input_config input configuration for the inference
- * @return 0 if no error. otherwise a negative errno
+ * @return 0 or positive id if no error. otherwise a negative errno
   */
  int
  TrinityEmulAPI::runInput (input_config_t *input_config) const
  {
-  int dbuf_fd;
-  int status = -EPERM;
-
    if (!initialized())
      return -EPERM;
  
    if (input_config == nullptr)
      return -EINVAL;
  
-  dbuf_fd = input_config->model_id >> TRINITY_SHIFT_MODEL_ID;
+  int dbuf_fd = input_config->model_id >> TRINITY_SHIFT_MODEL_ID;
+  int status = -EPERM;
  
    EmulElement *elem_model = elem_map_.find (dbuf_fd);
    if (elem_model == nullptr || elem_model->getAddr () == nullptr)
@@ -389,6 +414,9 @@ TrinityEmulAPI::runInput (input_config_t *input_config) const
      if (input_config->num_segments <= 0)
        return -EINVAL;
  
+    char *prog = addr_model + model->program_offset_addr;
+    size_t prog_size = model->program_size;
+
      uint32_t num_segs = input_config->num_segments;
      char ** segment_table = new char* [num_segs];
  
@@ -410,9 +438,23 @@ TrinityEmulAPI::runInput (input_config_t *input_config) const
          Conf::getInstance().getLogVerbose(),
          Conf::getInstance().getLogDir());
  
-    status = run_triv2_emul (addr_model + model->program_offset_addr, model->program_size,
-        segment_table, num_segs);
-    delete [] segment_table;
+    if (input_config->input_mode == INPUT_HW) {
+      int taskid = global_fd_.fetch_add (1);
+      EmulTask *task = new EmulTask;
+
+      status = task_map_.insert (taskid, task);
+      if (status != 0)
+        return status;
+
+      auto func = std::bind (&EmulTask::run_emul, task,
+          prog, prog_size, segment_table, num_segs);
+
+      task->run (func);
+      status = taskid;
+    } else {
+      status = run_triv2_emul (prog, prog_size, segment_table, num_segs);
+      delete [] segment_table;
+    }
    }
  
    return status;
@@ -425,6 +467,9 @@ TrinityEmulAPI::runInput (input_config_t *input_config) const
  int
  TrinityEmulAPI::stop () const
  {
+  if (!initialized())
+    return -EPERM;
+
    if ((dev_type_ & DEVICETYPE_MASK) == DEVICETYPE_TRIV) {
      return stop_triv_emul ();
    } else if ((dev_type_ & DEVICETYPE_MASK) == DEVICETYPE_TRIV2) {
@@ -433,3 +478,19 @@ TrinityEmulAPI::stop () const
  
    return -EPERM;
  }
+
+int
+TrinityEmulAPI::stop_target (int taskid) const
+{
+  if (!initialized())
+    return -EPERM;
+
+  EmulTask * task = task_map_.find (taskid);
+  if (task == nullptr)
+    return -ENOENT;
+
+  task->stop ();
+  task_map_.remove (taskid);
+
+  return 0;
+}
diff --git a/tests/apptests/hw_recurring.cc b/tests/apptests/hw_recurring.cc

new file mode 100644 (file)

index 0000000..a5fc231
--- /dev/null
+++ b/tests/apptests/hw_recurring.cc
@@ -0,0 +1,299 @@
+/**
+ * Proprietary
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+/**
+ * @file hw_recurring.cc
+ * @date 21 Jul 2020
+ * @brief AppTest to test HW-recurring input service in TRIV2 device
+ * @author Dongju Chae <dongju.chae@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include <libnpuhost.h>
+#include <ne_test_utils.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <iostream>
+#include <fstream>
+
+/** ./include/typedef.h */
+#define TRIV2_TYPE NPUCOND_TRIV2_CONN_SOCIP
+#define NPU_MODEL_NAME ("model.tvn")
+#define DEFAULT_TIME   (3000) /* 3000 ms */
+
+/** @brief c++ class to emulate third-party HW to feed recurring input to the NPU */
+class ThirdPartyHW {
+  public:
+    ThirdPartyHW () {
+      dev_ = nullptr;
+      input_.num_buffers = 0;
+      output_.num_buffers = 0;
+    }
+
+    ~ThirdPartyHW () {
+      if (dev_ != nullptr) {
+        cleanNPU_genericBuffers (dev_, &input_);
+        cleanNPU_genericBuffers (dev_, &output_);
+        putNPUdevice (dev_);
+      }
+    }
+
+    int init (const npubin_meta *meta) {
+      /** let's emulate an external dmabuf using TRIV2 */
+      int status = getNPUdeviceByType (&dev_, TRIV2_TYPE, 0);
+      if (status != 0)
+        return status;
+
+      status = alloc_inputs (meta);
+      if (status != 0)
+        return status;
+
+      status = alloc_outputs (meta);
+      if (status != 0)
+        return status;
+
+      return 0;
+    }
+
+    int get_input_dmabuf (uint32_t idx) {
+      return input_.bufs[idx].dmabuf;
+    }
+
+    int get_output_dmabuf (uint32_t idx) {
+      return output_.bufs[idx].dmabuf;
+    }
+
+  private:
+    /** we don't consider the case that input/output segments are overlapped */
+    int alloc_inputs (const npubin_meta *meta) {
+      for (uint32_t idx = 0; idx < meta->input_seg_num; idx++) {
+        uint32_t seg_idx = meta->input_seg_idx[idx];
+        uint32_t seg_size = meta->segment_size[seg_idx];
+
+        input_.bufs[idx].type = BUFFER_MAPPED;
+        input_.bufs[idx].size = seg_size;
+      }
+
+      input_.num_buffers = meta->input_seg_num;
+      return allocNPU_genericBuffers (dev_, &input_);
+    }
+
+    int alloc_outputs (const npubin_meta *meta) {
+      for (uint32_t idx = 0; idx < meta->output_seg_num; idx++) {
+        uint32_t seg_idx = meta->output_seg_idx[idx];
+        uint32_t seg_size = meta->segment_size[seg_idx];
+
+        output_.bufs[idx].type = BUFFER_MAPPED;
+        output_.bufs[idx].size = seg_size;
+      }
+
+      output_.num_buffers = meta->output_seg_num;
+      return allocNPU_genericBuffers (dev_, &output_);
+    }
+
+    npudev_h dev_;
+    input_buffers input_;
+    output_buffers output_;
+};
+
+/** @brief c++ class to describe how to use npu-engine library */
+class TesterTRIV2 {
+  public:
+    TesterTRIV2 () : dev_ (nullptr), meta_ (nullptr), model_id_ (0) {
+      model_.size = 0;
+      input_.num_buffers = 0;
+      output_.num_buffers = 0;
+      time_ = DEFAULT_TIME;
+    }
+
+    ~TesterTRIV2 () {
+      /** release resources */
+      if (dev_ != nullptr) {
+        if (model_id_ > 0)
+          unregisterNPUmodel (dev_, model_id_);
+
+        if (model_.size > 0)
+          cleanNPU_modelBuffer (dev_, &model_);
+
+        if (meta_ != nullptr)
+          free (meta_);
+
+        putNPUdevice (dev_);
+      }
+    }
+
+    void set_time (int time) {
+      time_ = time;
+    }
+
+    /** @brief initilize the device handle */
+    int init (const std::string dir) {
+      int num_devices = getnumNPUdeviceByType (TRIV2_TYPE);
+      if (num_devices <= 0)
+        return -ENODEV;
+
+      /** use any available TRIV2 device. (0 <= dev_id < num_devices) */
+      int dev_id = num_devices - 1;
+
+      dir_ = dir;
+      return getNPUdeviceByType (&dev_, TRIV2_TYPE, dev_id);
+    }
+
+    /** @brief run the inference (with dummy data) */
+    int run () {
+      int status = prepare_model ();
+      if (status != 0)
+        return status;
+
+      status = hw_.init (meta_);
+      if (status != 0)
+        return status;
+
+      status = prepare_input ();
+      if (status != 0)
+        return status;
+
+      status = prepare_output ();
+      if (status != 0)
+        return status;
+
+      status = set_data_info ();
+      if (status != 0)
+        return status;
+
+      int id = runNPU_internalInput (dev_, model_id_, NPUINPUT_HW_RECURRING,
+          &input_, &output_);
+      if (id < 0)
+        return id;
+
+      usleep (time_ * 1000);
+
+      return stopNPU_internalInput (dev_, id);
+    }
+
+  private:
+    int prepare_model () {
+      std::string model_path;
+
+      model_path = dir_ + "/" + NPU_MODEL_NAME;
+
+      meta_ = getNPUmodel_metadata (model_path.c_str(), false);
+      if (meta_ == nullptr)
+        return -EINVAL;
+
+      if (NPUBIN_VERSION (meta_->magiccode) != 3)
+        return -EINVAL;
+
+      model_.type = BUFFER_FILE;
+      model_.size = meta_->size;
+      model_.filepath = model_path.c_str();
+
+      int status = allocNPU_modelBuffer (dev_, &model_);
+      if (status != 0)
+        return status;
+
+      return registerNPUmodel (dev_, &model_, &model_id_);
+    }
+
+    int prepare_input () {
+      input_.num_buffers = meta_->input_seg_num;
+      for (uint32_t idx = 0; idx < meta_->input_seg_num; idx++) {
+        uint32_t input_seg_idx = meta_->input_seg_idx[idx];
+        uint32_t input_seg_size = meta_->segment_size[input_seg_idx];
+        uint32_t input_seg_off = meta_->input_seg_off[idx];
+
+        input_.bufs[idx].type = BUFFER_DMABUF;
+        input_.bufs[idx].size = input_seg_size - input_seg_off;
+        input_.bufs[idx].offset = input_seg_off;
+        input_.bufs[idx].dmabuf = hw_.get_input_dmabuf (idx);
+      }
+
+      return 0;
+    }
+
+    int prepare_output () {
+      output_.num_buffers = meta_->output_seg_num;
+      for (uint32_t idx = 0; idx < meta_->output_seg_num; idx++) {
+        uint32_t output_seg_idx = meta_->output_seg_idx[idx];
+        uint32_t output_seg_size = meta_->segment_size[output_seg_idx];
+        uint32_t output_seg_off = meta_->output_seg_off[idx];
+
+        output_.bufs[idx].type = BUFFER_DMABUF;
+        output_.bufs[idx].size = output_seg_size - output_seg_off;
+        output_.bufs[idx].offset = output_seg_off;
+        output_.bufs[idx].dmabuf = hw_.get_output_dmabuf (idx);
+      }
+
+      return 0;
+    }
+
+    int set_data_info () {
+      tensors_data_info info_in;
+      tensors_data_info info_out;
+
+      /* No data manipulation & quantization in this test */
+
+      info_in.num_info = meta_->input_seg_num;
+      for (uint32_t idx = 0; idx < info_in.num_info; idx++) {
+        info_in.info[idx].layout = DATA_LAYOUT_TRIV2;
+        info_in.info[idx].type = DATA_TYPE_QASYMM8;
+      }
+
+      info_out.num_info = meta_->output_seg_num;
+      for (uint32_t idx = 0; idx < info_out.num_info; idx++) {
+        info_out.info[idx].layout = DATA_LAYOUT_TRIV2;
+        info_out.info[idx].type = DATA_TYPE_QASYMM8;
+      }
+
+      return setNPU_dataInfo (dev_, model_id_, &info_in, &info_out);
+    }
+
+    std::string dir_;
+
+    npudev_h dev_;
+
+    npubin_meta *meta_;
+    uint32_t model_id_;
+    generic_buffer model_;
+    input_buffers input_;
+    output_buffers output_;
+
+    uint32_t time_;
+    ThirdPartyHW hw_;
+};
+
+/** @brief apptest main  */
+int
+main (int argc, char **argv)
+{
+  TesterTRIV2 tester;
+  int status;
+
+  if (argc < 2) {
+    std::cerr << "[APPTEST] " << argv[0] << ": SKIPPED\n";
+    return 0;
+  }
+
+  /** initialize triv2 device */
+  status = tester.init (argv[1]);
+  if (status != 0)
+    goto err;
+
+  if (argc > 2)
+    tester.set_time (atoi(argv[2]));
+
+  /** run the inference with the device */
+  status = tester.run ();
+  if (status != 0)
+    goto err;
+
+  std::cerr << "[APPTEST] " << argv[0] << ": PASSED\n";
+  return 0;
+
+err:
+  std::cerr << "[APPTEST] " << argv[0] << ": FAILED (" << status << ")\n";
+  return status;
+}
diff --git a/tests/apptests/meson.build b/tests/apptests/meson.build

index 248e0da..4efbb2c 100644 (file)
--- a/tests/apptests/meson.build
+++ b/tests/apptests/meson.build
@@ -74,6 +74,16 @@ executable ('apptest_multiple_runs',
    install_dir : join_paths(ne_bindir, 'apptests')
  )
  
+executable ('apptest_hw_recurring',
+  'hw_recurring.cc',
+  include_directories : ne_apptest_inc,
+  dependencies : ne_test_utils_dep,
+  link_with : ne_library_shared,
+  install : true,
+  install_rpath : ne_libdir,
+  install_dir : join_paths(ne_bindir, 'apptests')
+)
+
  ## TRIV2 (Trinity Vision 2)
  executable ('apptest_dummy_run_sync_triv2',
    'dummy_run_sync_triv2.cc',
diff --git a/tests/apptests/multiple_runs.cc b/tests/apptests/multiple_runs.cc

index b023e3b..5053f91 100644 (file)
--- a/tests/apptests/multiple_runs.cc
+++ b/tests/apptests/multiple_runs.cc
@@ -201,7 +201,7 @@ class TesterTRIV2 {
      int set_constraint () {
        npuConstraint constraint;
  
-      constraint.timeout_ms = NPU_TIMEOUT_MS;;
+      constraint.timeout_ms = NPU_TIMEOUT_MS;
        constraint.priority = NPU_PRIORITY_MID;
        constraint.notimode = noti_mode_;
author	Dongju Chae <dongju.chae@samsung.com>
	Wed, 22 Jul 2020 05:40:54 +0000 (14:40 +0900)
committer	송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>
	Mon, 27 Jul 2020 02:30:02 +0000 (11:30 +0900)
include/host/libnpuhost.h		patch \| blob \| history
src/core/ne-handler.cc		patch \| blob \| history
src/core/ne-handler.h		patch \| blob \| history
src/core/ne-host-input-service.cc		patch \| blob \| history
src/core/ne-hw-input-service.cc		patch \| blob \| history
src/core/ne-inputservice.h		patch \| blob \| history
src/core/ne-segment-table.cc		patch \| blob \| history
src/core/ne-segment-table.h		patch \| blob \| history
src/core/npu/NPUdrvAPI.h		patch \| blob \| history
src/core/npu/NPUdrvAPI_emul.cc		patch \| blob \| history
tests/apptests/hw_recurring.cc	[new file with mode: 0644]	patch \| blob
tests/apptests/meson.build		patch \| blob \| history
tests/apptests/multiple_runs.cc		patch \| blob \| history