[libnpuhost/profile] Add new APIs for NPU profiling

author Dongju Chae <dongju.chae@samsung.com>

Tue, 15 Sep 2020 07:10:45 +0000 (16:10 +0900)

committer 송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>

Mon, 21 Sep 2020 05:43:59 +0000 (14:43 +0900)
author Dongju Chae <dongju.chae@samsung.com>
Tue, 15 Sep 2020 07:10:45 +0000 (16:10 +0900)
committer 송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>
Mon, 21 Sep 2020 05:43:59 +0000 (14:43 +0900)
diff --git a/include/host/libnpuhost.h b/include/host/libnpuhost.h

index 3d8a2c3..01ef7ba 100644 (file)
--- a/include/host/libnpuhost.h
+++ b/include/host/libnpuhost.h
@@ -208,7 +208,7 @@ int setNPU_constraint(npudev_h dev, uint32_t modelid, npuConstraint constraint);
   * @param[in] modelid The model to be inferred.
   * @param[in] input The input data to be inferred.
   * @param[out] output The output result. The caller MUST allocate appropriately before calling this.
- * @return @c 0 if no error. otherwise a negative error value
+ * @return @c 0 or positive id if no error. otherwise a negative error value
   *
   * @detail This is a syntactic sugar of runNPU_async().
   *         CAUTION: There is a memcpy for the output buffer.
@@ -276,7 +276,7 @@ typedef enum {
   * @param[out] sequence The sequence number returned with runNPU_async.
   * @param[in] data The data given as a parameter to the runNPU_async call.
   * @param[in] mode Configures how this operation works.
- * @return @c 0 if no error. otherwise a negative error value
+ * @return @c 0 or positive id if no error. otherwise a negative error value
   */
  int runNPU_async(npudev_h dev, uint32_t modelid, const input_buffers *input,
      npuOutputNotify cb, uint64_t *sequence, void *data,
@@ -427,6 +427,41 @@ int allocNPU_genericBuffers (npudev_h dev, generic_buffers * buffers);
   */
  int cleanNPU_genericBuffers (npudev_h dev, generic_buffers * buffers);
  
+/** NPU Profiling */
+
+#define NPU_OPNAME_MAX (32)
+
+typedef struct {
+  char name[NPU_OPNAME_MAX];
+
+  uint32_t latency_ms;
+  uint64_t latency_cycles;
+
+  uint64_t mem_read_bytes;
+  uint64_t mem_write_bytes;
+  /* TBD */
+} npu_profile_op;
+
+typedef struct {
+  uint32_t num_ops;
+  npu_profile_op *profile;
+} npu_profile;
+
+/**
+ * @brief Get the profile information from NPU
+ * @param[in] dev NPU device handle
+ * @param[in] run_id Identifier for each inference (obtained by runNPU_*)
+ * @param[out] profile Profile instance
+ * @return 0 if no error, otherwise a negative errno.
+ */
+int getNPU_profile (npudev_h dev, int run_id, npu_profile *profile);
+
+/**
+ * @brief Free the profile instance obtained by getNPU_profile().
+ * @param[in] profile Profile instance
+ */
+void putNPU_profile (npu_profile *profile);
+
  #if defined(__cplusplus)
  }
  #endif
diff --git a/src/core/ne-handler.cc b/src/core/ne-handler.cc

index 1197bc2..cf16965 100644 (file)
--- a/src/core/ne-handler.cc
+++ b/src/core/ne-handler.cc
@@ -75,6 +75,30 @@ void putNPUdevice (npudev_h dev)
  }
  
  /**
+ * @brief Get the profile information from NPU
+ * @param[in] dev The NPU device handle
+ * @param[in] run_id The identifier for each inference
+ * @param[out] profile The profile instance
+ * @return 0 if no error, otherwise a negative errno.
+ */
+int getNPU_profile (npudev_h dev, int run_id, npu_profile *profile)
+{
+  INIT_HOST_HANDLER (host_handler, dev);
+
+  return host_handler->getProfile (run_id, profile);
+}
+
+/**
+ * @brief Free the profile instance obtained by getNPU_profile().
+ * @param[in] profile The profile instance
+ */
+void putNPU_profile (npu_profile *profile)
+{
+  if (profile != nullptr)
+    free (profile);
+}
+
+/**
   * @brief Send the NN model to NPU.
   * @param[in] dev The NPU device handle
   * @param[in] modelfile The filepath to the compiled NPU NN model in any buffer_type
@@ -510,6 +534,32 @@ HostHandler::unregisterModels ()
  }
  
  /**
+ * @brief Get the profile information from NPU
+ * @param[in] run_id The identifier for each inference
+ * @param[out] profile The profile instance
+ * @return 0 if no error, otherwise a negative errno.
+ */
+int
+HostHandler::getProfile (int run_id, npu_profile *profile)
+{
+  if (run_id < 0 || profile == nullptr) {
+    logerr (TAG, "Invalid parameter provided\n");
+    return -EINVAL;
+  }
+
+  const DriverAPI * api = device_->getDriverAPI ();
+  assert (api != nullptr);
+
+  void *profile_buffer;
+  int status = api->getProfile (run_id, &profile_buffer);
+  if (status != 0) {
+    // TODO: Perform parsing
+  }
+
+  return status;
+}
+
+/**
   * @brief Set the data layout for input/output tensors
   * @param[in] modelid The ID of model whose layouts are set
   * @param[in] in the layout/type info for input tensors
diff --git a/src/core/ne-handler.h b/src/core/ne-handler.h

index c021388..a29aa28 100644 (file)
--- a/src/core/ne-handler.h
+++ b/src/core/ne-handler.h
@@ -35,6 +35,8 @@ class HostHandler {
      int unregisterModel (uint32_t modelid);
      int unregisterModels ();
  
+    int getProfile (int run_id, npu_profile *profile);
+
      int setDataInfo (uint32_t modelid, tensors_data_info *in, tensors_data_info *out);
      int setConstraint (uint32_t modelid, npuConstraint constraint);
  
diff --git a/src/core/npu/NPUdrvAPI.h b/src/core/npu/NPUdrvAPI.h

index 2332cb0..d985ea7 100644 (file)
--- a/src/core/npu/NPUdrvAPI.h
+++ b/src/core/npu/NPUdrvAPI.h
@@ -104,6 +104,8 @@ class DriverAPI {
          void *addr, size_t size) const { return -EPERM; }
  #endif
  
+    virtual int getProfile (int run_id, void **profile) const { return -EPERM; }
+
    protected:
      int dev_id_;  /**< device id. assume that 0 <= id < getNUmDevices() */
      int dev_fd_;  /**< devide fd. opened in constructor and closed in destructor */
@@ -180,6 +182,8 @@ class TrinityVision2API : public DriverAPI {
          void *addr, size_t size) const;
  #endif
  
+    int getProfile (int run_id, void **profile) const;
+
    private:
      int getDrvVersion () const;
      static const std::string dev_node_base;
@@ -230,6 +234,8 @@ class TrinityEmulAPI : public DriverAPI {
      int registerModel (model_config_t *model) const;
      int deregisterModel (unsigned long long id) const;
  
+    int getProfile (int run_id, void **profile) const;
+
    private:
      static std::atomic<int> global_fd_;
        /**< global api fd */
diff --git a/src/core/npu/NPUdrvAPI_emul.cc b/src/core/npu/NPUdrvAPI_emul.cc

index 2f0ae8f..39fd3e4 100644 (file)
--- a/src/core/npu/NPUdrvAPI_emul.cc
+++ b/src/core/npu/NPUdrvAPI_emul.cc
@@ -504,3 +504,10 @@ TrinityEmulAPI::stop_target (int taskid) const
  
    return 0;
  }
+
+int
+TrinityEmulAPI::getProfile (int run_id, void **profile) const
+{
+  // TODO: allocate the buffer and call APIs from simulator
+  return 0;
+}
diff --git a/src/core/npu/NPUdrvAPI_triv2.cc b/src/core/npu/NPUdrvAPI_triv2.cc

index fd7aec8..1af93e6 100644 (file)
--- a/src/core/npu/NPUdrvAPI_triv2.cc
+++ b/src/core/npu/NPUdrvAPI_triv2.cc
@@ -401,3 +401,10 @@ TrinityVision2API::fpga_memcpy (int dmabuf, uint32_t offset, void *addr, size_t
    return 0;
  }
  #endif
+
+int
+TrinityVision2API::getProfile (int run_id, void **profile) const
+{
+  // TODO: allocate the buffer and call ioctl() to the kernel driver
+  return 0;
+}
author	Dongju Chae <dongju.chae@samsung.com>
	Tue, 15 Sep 2020 07:10:45 +0000 (16:10 +0900)
committer	송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>
	Mon, 21 Sep 2020 05:43:59 +0000 (14:43 +0900)
include/host/libnpuhost.h		patch \| blob \| history
src/core/ne-handler.cc		patch \| blob \| history
src/core/ne-handler.h		patch \| blob \| history
src/core/npu/NPUdrvAPI.h		patch \| blob \| history
src/core/npu/NPUdrvAPI_emul.cc		patch \| blob \| history
src/core/npu/NPUdrvAPI_triv2.cc		patch \| blob \| history