[Profile] Implement the prototype of profiling APIs

author Dongju Chae <dongju.chae@samsung.com>

Mon, 21 Sep 2020 05:51:52 +0000 (14:51 +0900)

committer 송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>

Thu, 24 Sep 2020 05:57:07 +0000 (14:57 +0900)
author Dongju Chae <dongju.chae@samsung.com>
Mon, 21 Sep 2020 05:51:52 +0000 (14:51 +0900)
committer 송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>
Thu, 24 Sep 2020 05:57:07 +0000 (14:57 +0900)
diff --git a/include/host/libnpuhost.h b/include/host/libnpuhost.h

index 01ef7ba..8f03adf 100644 (file)
--- a/include/host/libnpuhost.h
+++ b/include/host/libnpuhost.h
@@ -440,21 +440,21 @@ typedef struct {
    uint64_t mem_read_bytes;
    uint64_t mem_write_bytes;
    /* TBD */
-} npu_profile_op;
+} npu_profile_layer;
  
  typedef struct {
-  uint32_t num_ops;
-  npu_profile_op *profile;
+  uint32_t num_layers;
+  npu_profile_layer *layers;
  } npu_profile;
  
  /**
   * @brief Get the profile information from NPU
   * @param[in] dev NPU device handle
- * @param[in] run_id Identifier for each inference (obtained by runNPU_*)
+ * @param[in] task_id Identifier for each inference (obtained by runNPU_*)
   * @param[out] profile Profile instance
   * @return 0 if no error, otherwise a negative errno.
   */
-int getNPU_profile (npudev_h dev, int run_id, npu_profile *profile);
+int getNPU_profile (npudev_h dev, int task_id, npu_profile *profile);
  
  /**
   * @brief Free the profile instance obtained by getNPU_profile().
diff --git a/src/core/ne-handler.cc b/src/core/ne-handler.cc

index 080ccd0..a1d3c93 100644 (file)
--- a/src/core/ne-handler.cc
+++ b/src/core/ne-handler.cc
@@ -77,15 +77,15 @@ void putNPUdevice (npudev_h dev)
  /**
   * @brief Get the profile information from NPU
   * @param[in] dev The NPU device handle
- * @param[in] run_id The identifier for each inference
+ * @param[in] task_id The identifier for each inference
   * @param[out] profile The profile instance
   * @return 0 if no error, otherwise a negative errno.
   */
-int getNPU_profile (npudev_h dev, int run_id, npu_profile *profile)
+int getNPU_profile (npudev_h dev, int task_id, npu_profile *profile)
  {
    INIT_HOST_HANDLER (host_handler, dev);
  
-  return host_handler->getProfile (run_id, profile);
+  return host_handler->getProfile (task_id, profile);
  }
  
  /**
@@ -546,14 +546,14 @@ HostHandler::unregisterModels ()
  
  /**
   * @brief Get the profile information from NPU
- * @param[in] run_id The identifier for each inference
+ * @param[in] task_id The identifier for each inference
   * @param[out] profile The profile instance
   * @return 0 if no error, otherwise a negative errno.
   */
  int
-HostHandler::getProfile (int run_id, npu_profile *profile)
+HostHandler::getProfile (int task_id, npu_profile *profile)
  {
-  if (run_id < 0 || profile == nullptr) {
+  if (task_id < 0 || profile == nullptr) {
      logerr (TAG, "Invalid parameter provided\n");
      return -EINVAL;
    }
@@ -562,12 +562,20 @@ HostHandler::getProfile (int run_id, npu_profile *profile)
    assert (api != nullptr);
  
    void *profile_buffer;
-  int status = api->getProfile (run_id, &profile_buffer);
+  size_t profile_size;
+  int status = api->getProfile (task_id, &profile_buffer, &profile_size);
    if (status != 0) {
+    logerr (TAG, "Failed to get profile information: %d\n", status);
+    return status;
+  }
+
+  profile->num_layers = 0;
+  profile->layers = nullptr;
+  if (profile_buffer != nullptr) {
      // TODO: Perform parsing
    }
  
-  return status;
+  return 0;
  }
  
  /**
diff --git a/src/core/npu/NPUdrvAPI.h b/src/core/npu/NPUdrvAPI.h

index d985ea7..0f2d939 100644 (file)
--- a/src/core/npu/NPUdrvAPI.h
+++ b/src/core/npu/NPUdrvAPI.h
@@ -104,7 +104,8 @@ class DriverAPI {
          void *addr, size_t size) const { return -EPERM; }
  #endif
  
-    virtual int getProfile (int run_id, void **profile) const { return -EPERM; }
+    virtual int getProfile (int task_id, void **profile_buf,
+        size_t *profile_size) const { return -EPERM; }
  
    protected:
      int dev_id_;  /**< device id. assume that 0 <= id < getNUmDevices() */
@@ -182,7 +183,8 @@ class TrinityVision2API : public DriverAPI {
          void *addr, size_t size) const;
  #endif
  
-    int getProfile (int run_id, void **profile) const;
+    int getProfile (int task_id, void **profile_buf,
+        size_t *profile_size) const;
  
    private:
      int getDrvVersion () const;
@@ -234,7 +236,8 @@ class TrinityEmulAPI : public DriverAPI {
      int registerModel (model_config_t *model) const;
      int deregisterModel (unsigned long long id) const;
  
-    int getProfile (int run_id, void **profile) const;
+    int getProfile (int task_id, void **profile_buf,
+        size_t *profile_size) const;
  
    private:
      static std::atomic<int> global_fd_;
diff --git a/src/core/npu/NPUdrvAPI_emul.cc b/src/core/npu/NPUdrvAPI_emul.cc

index 39fd3e4..b9f1b52 100644 (file)
--- a/src/core/npu/NPUdrvAPI_emul.cc
+++ b/src/core/npu/NPUdrvAPI_emul.cc
@@ -506,7 +506,8 @@ TrinityEmulAPI::stop_target (int taskid) const
  }
  
  int
-TrinityEmulAPI::getProfile (int run_id, void **profile) const
+TrinityEmulAPI::getProfile (int task_id, void **profile_buf,
+    size_t *profile_size) const
  {
    // TODO: allocate the buffer and call APIs from simulator
    return 0;
diff --git a/src/core/npu/NPUdrvAPI_triv2.cc b/src/core/npu/NPUdrvAPI_triv2.cc

index 1af93e6..a3cb5e0 100644 (file)
--- a/src/core/npu/NPUdrvAPI_triv2.cc
+++ b/src/core/npu/NPUdrvAPI_triv2.cc
@@ -11,7 +11,7 @@
  #include "NPUdrvAPI.h"
  
  constexpr int max_num_devs = ((1<<CHAR_BIT) - 1);
-constexpr int max_buf_size = 1024;
+constexpr size_t max_buf_size = (256 * PAGE_SIZE);
  
  const std::string TrinityVision2API::dev_node_base = "triv2";
  std::bitset<CHAR_BIT> TrinityVision2API::dev_bitset = 0;
@@ -403,8 +403,51 @@ TrinityVision2API::fpga_memcpy (int dmabuf, uint32_t offset, void *addr, size_t
  #endif
  
  int
-TrinityVision2API::getProfile (int run_id, void **profile) const
+TrinityVision2API::getProfile (int task_id, void **profile_buf,
+    size_t *profile_size) const
  {
-  // TODO: allocate the buffer and call ioctl() to the kernel driver
+  struct trinity_profile profile;
+  size_t size = max_buf_size;
+  void * buf;
+  int ret;
+
+  buf = malloc(max_buf_size);
+  if (!buf)
+    return -ENOMEM;
+
+  profile.task_id = task_id;
+  profile.buf = buf;
+  profile.buf_size = size;
+  profile.next_size = 0;
+
+  ret = ioctl (this->getDeviceFD (), TRINITY_IOCTL_GET_PROFILE, &profile);
+  if (ret != 0)
+      goto ioctl_fail;
+
+  if (profile.next_size != 0) {
+    buf = realloc (profile.buf, profile.buf_size + profile.next_size);
+    if (!buf) {
+      free (profile.buf);
+      return -ENOMEM;
+    }
+
+    profile.buf = (char *) buf + profile.buf_size;
+    profile.buf_size = profile.next_size;
+    profile.next_size = 0;
+
+    size += profile.next_size;
+
+    ret = ioctl (this->getDeviceFD (), TRINITY_IOCTL_GET_PROFILE, &profile);
+    if (ret != 0)
+      goto ioctl_fail;
+  }
+
+  *profile_buf = buf;
+  *profile_size = size;
+
    return 0;
+
+ioctl_fail:
+  free (profile.buf);
+  return -errno;
  }
diff --git a/tests/apptests/meson.build b/tests/apptests/meson.build

index 6e156cb..5236da4 100644 (file)
--- a/tests/apptests/meson.build
+++ b/tests/apptests/meson.build
@@ -143,3 +143,13 @@ executable ('apptest_tvn_triv2_xml',
    install_rpath : ne_libdir,
    install_dir : join_paths(ne_bindir, 'apptests')
  )
+
+executable ('apptest_tvn_triv2_profile',
+  'tvn_triv2_profile.cc',
+  include_directories : ne_apptest_inc,
+  dependencies : ne_test_utils_dep,
+  link_with : ne_library_shared,
+  install : true,
+  install_rpath : ne_libdir,
+  install_dir : join_paths(ne_bindir, 'apptests')
+)
diff --git a/tests/apptests/tvn_triv2_profile.cc b/tests/apptests/tvn_triv2_profile.cc

new file mode 100644 (file)

index 0000000..eea59cb
--- /dev/null
+++ b/tests/apptests/tvn_triv2_profile.cc
@@ -0,0 +1,204 @@
+/**
+ * Proprietary
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+/**
+ * @file tvn_triv2_profile.cc
+ * @date 07 Sep 2020
+ * @brief AppTest to test profiling APIs
+ * @author Dongju Chae <dongju.chae@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include <string.h>
+#include <unistd.h>
+
+#include <ne_test_utils.h>
+
+using namespace std;
+
+class Tester : public UtilTRIV2
+{
+  public:
+    Tester () : total_ (0), debug_ (false), sync_ (false) {}
+
+    void set_debug () {
+      debug_ = true;
+    }
+
+    void set_mute () {
+      /** redirect to /dev/null */
+      if (freopen("/dev/null", "w", stdout) == NULL)
+        cerr << "Warning: Failed to reopen stdout\n";
+      if (freopen("/dev/null", "w", stderr) == NULL)
+        cerr << "Warning: Failed to reopen stderr\n";
+    }
+
+    void set_sync () {
+      sync_ = true;
+    }
+
+    int load_model (std::string model) {
+      npu_priority priority = NPU_PRIORITY_MID;
+      uint32_t timeout = 1000;
+
+      total_++;
+
+      return UtilTRIV2::loadModel (model, &model_id_,
+            static_cast<npu_priority>(priority), timeout);
+    }
+
+    int init (std::string model) {
+      int status = UtilTRIV2::init ();
+      if (status != 0) {
+        cerr << "Failed to initialize\n";
+        return status;
+      }
+
+      return load_model (model);
+    }
+
+    int run () {
+      int status = UtilTRIV2::run (model_id_, callback, sync_);
+      if (status < 0) {
+        cerr << "Failed to run the model: " << status << "\n";
+        return status;
+      }
+
+      wait_runs ();
+
+      print_profile (status);
+
+      return passed_ == total_ ? 0 : -EINVAL;
+    }
+
+    void print_profile (int task_id) {
+      npu_profile profile;
+      int status = UtilTRIV2::getProfile (task_id, &profile);
+      if (status == 0) {
+        if (profile.layers != nullptr) {
+          for (uint32_t i = 0; i < profile.num_layers; i++) {
+            cerr << "[" << i << "] " << profile.layers[i].name << "\n";
+            cerr << "\tLatency    (msec) : " << profile.layers[i].latency_ms << "\n";
+            cerr << "\tLatency  (cycles) : " << profile.layers[i].latency_cycles << "\n";
+            cerr << "\tMemRead  (KBytes) : " << (profile.layers[i].mem_read_bytes >> 10) << "\n";
+            cerr << "\tMemWrite (KBytes) : " << (profile.layers[i].mem_write_bytes >> 10) << "\n";
+          }
+          free (profile.layers);
+        }
+      } else {
+        cerr << "Failed to get profile: " << status << "\n";
+      }
+    }
+
+    void wait_runs () {
+      unique_lock<mutex> lock (m_);
+      cv_.wait (lock, [this]() { return done_ == total_; });
+    }
+
+    static void callback (output_buffers *output, uint64_t sequence,
+        void *data) {
+      const char *dirpath = static_cast<const char *> (data);
+      int err = 0;
+
+      for (uint32_t idx = 0; idx < output->num_buffers; idx++) {
+        char * output_data = static_cast<char*> (output->bufs[idx].addr);
+        off_t output_size = output->bufs[idx].size;
+        std::string golden_path (dirpath);
+
+        golden_path += "/output_fmap_" + std::to_string (idx) + ".bin";
+        err = compare_data (golden_path.c_str(), output_data, output_size);
+        free (output_data);
+
+        if (err != 0)
+          break;
+      }
+
+      report (err == 0);
+    }
+
+    static void report (bool passed) {
+      unique_lock<mutex> lock (m_);
+      done_++;
+      if (passed)
+        passed_++;
+      cv_.notify_one ();
+    }
+
+  private:
+    static mutex m_;
+    static condition_variable cv_;
+    static uint32_t done_;
+    static uint32_t passed_;
+
+    uint32_t model_id_;
+    uint32_t total_;
+    bool debug_;
+    bool sync_;
+};
+
+mutex Tester::m_;
+condition_variable Tester::cv_;
+uint32_t Tester::done_ = 0;
+uint32_t Tester::passed_ = 0;
+
+static void
+print_usage (const char *prog_name)
+{
+  cerr << "Usage: " << prog_name << " [options] [model dirpath]\n";
+  cerr << "Options: \n";
+  cerr << "  -h \t\t Show help messages\n";
+  cerr << "  -m \t\t Mute stdout/stderr messages\n";
+  cerr << "  -d \t\t Enable debugging mode\n";
+  cerr << "  -s \t\t Enable run sync mode\n";
+}
+
+/** @brief apptest main  */
+int
+main (int argc, char **argv)
+{
+  Tester tester;
+  int c, status;
+
+  optind = 0;
+  opterr = 0;
+  while ((c = getopt (argc, argv, "dmsh")) != -1) {
+    switch (c) {
+      case 'd':
+        tester.set_debug ();
+        break;
+      case 'm':
+        tester.set_mute ();
+        break;
+      case 's':
+        tester.set_sync ();
+        break;
+      case 'h':
+        print_usage(argv[0]);
+        return 0;
+    }
+  }
+
+  if (optind >= argc) {
+    cerr << "[APPTEST] " << argv[0] << ": SKIPPED\n";
+    return 0;
+  }
+
+  /** initialize triv2 device */
+  status = tester.init (argv[optind]);
+  if (status != 0)
+    goto err;
+
+  /** run the inference with the device */
+  status = tester.run ();
+  if (status != 0)
+    goto err;
+
+  cerr << "[APPTEST] " << argv[0] << ": PASSED\n";
+  return 0;
+
+err:
+  cerr << "[APPTEST] " << argv[0] << ": FAILED (" << status << ")\n";
+  return status;
+}
diff --git a/tests/utils/ne_test_utils.cc b/tests/utils/ne_test_utils.cc

index 6f235d5..f619ae4 100644 (file)
--- a/tests/utils/ne_test_utils.cc
+++ b/tests/utils/ne_test_utils.cc
@@ -414,10 +414,8 @@ int UtilTrinity::run_each (UtilModel *model, npuOutputNotify cb, bool sync)
        cleanNPU_inputBuffers (dev_, &input);
      }
  
-    if (status != 0)
-      return status;
-
-    cb (&output, 0, cb_data);
+    if (status >= 0)
+      cb (&output, 0, cb_data);
    } else {
      if (model) {
        status = runNPU_async (dev_, model->getModelID (), model->getInput (),
@@ -436,12 +434,9 @@ int UtilTrinity::run_each (UtilModel *model, npuOutputNotify cb, bool sync)
        status = runNPU_async (dev_, 0, &input, cb, NULL, cb_data, NPUASYNC_WAIT);
        cleanNPU_inputBuffers (dev_, &input);
      }
-
-    if (status != 0)
-      return status;
    }
  
-  return 0;
+  return status;
  }
  
  /** @brief run inference with the given model id */
@@ -457,7 +452,7 @@ int UtilTrinity::runAll (npuOutputNotify cb, bool sync)
  {
    for (auto& model : models_) {
      int status = run_each (model.get (), cb, sync);
-    if (status != 0)
+    if (status < 0)
        return status;
    }
  
@@ -477,6 +472,12 @@ int UtilTrinity::stopInternal (int task_id)
    return stopNPU_internalInput (dev_, task_id);
  }
  
+/** @brief get profile information from NPU */
+int UtilTrinity::getProfile (int task_id, npu_profile *profile)
+{
+  return getNPU_profile (dev_, task_id, profile);
+}
+
  /** @brief configure constraint for the model */
  int UtilTrinity::set_constraint (uint32_t model_id, uint32_t timeout,
      npu_priority priority) {
diff --git a/tests/utils/ne_test_utils.h b/tests/utils/ne_test_utils.h

index 512aa4c..fb68210 100644 (file)
--- a/tests/utils/ne_test_utils.h
+++ b/tests/utils/ne_test_utils.h
@@ -87,6 +87,8 @@ class UtilTrinity {
      int runInternal (uint32_t model_id, std::string dev_path);
      int stopInternal (int task_id);
  
+    int getProfile (int task_id, npu_profile *profile);
+
      UtilModel *findModel (uint32_t model_id);
  
    protected:
author	Dongju Chae <dongju.chae@samsung.com>
	Mon, 21 Sep 2020 05:51:52 +0000 (14:51 +0900)
committer	송욱/On-Device Lab(SR)/Staff Engineer/삼성전자 <wook16.song@samsung.com>
	Thu, 24 Sep 2020 05:57:07 +0000 (14:57 +0900)
include/host/libnpuhost.h		patch \| blob \| history
src/core/ne-handler.cc		patch \| blob \| history
src/core/npu/NPUdrvAPI.h		patch \| blob \| history
src/core/npu/NPUdrvAPI_emul.cc		patch \| blob \| history
src/core/npu/NPUdrvAPI_triv2.cc		patch \| blob \| history
tests/apptests/meson.build		patch \| blob \| history
tests/apptests/tvn_triv2_profile.cc	[new file with mode: 0644]	patch \| blob
tests/utils/ne_test_utils.cc		patch \| blob \| history
tests/utils/ne_test_utils.h		patch \| blob \| history