This patch implements the prototype of profiling APIs.
Signed-off-by: Dongju Chae <dongju.chae@samsung.com>
uint64_t mem_read_bytes;
uint64_t mem_write_bytes;
/* TBD */
-} npu_profile_op;
+} npu_profile_layer;
typedef struct {
- uint32_t num_ops;
- npu_profile_op *profile;
+ uint32_t num_layers;
+ npu_profile_layer *layers;
} npu_profile;
/**
* @brief Get the profile information from NPU
* @param[in] dev NPU device handle
- * @param[in] run_id Identifier for each inference (obtained by runNPU_*)
+ * @param[in] task_id Identifier for each inference (obtained by runNPU_*)
* @param[out] profile Profile instance
* @return 0 if no error, otherwise a negative errno.
*/
-int getNPU_profile (npudev_h dev, int run_id, npu_profile *profile);
+int getNPU_profile (npudev_h dev, int task_id, npu_profile *profile);
/**
* @brief Free the profile instance obtained by getNPU_profile().
/**
* @brief Get the profile information from NPU
* @param[in] dev The NPU device handle
- * @param[in] run_id The identifier for each inference
+ * @param[in] task_id The identifier for each inference
* @param[out] profile The profile instance
* @return 0 if no error, otherwise a negative errno.
*/
-int getNPU_profile (npudev_h dev, int run_id, npu_profile *profile)
+int getNPU_profile (npudev_h dev, int task_id, npu_profile *profile)
{
INIT_HOST_HANDLER (host_handler, dev);
- return host_handler->getProfile (run_id, profile);
+ return host_handler->getProfile (task_id, profile);
}
/**
/**
* @brief Get the profile information from NPU
- * @param[in] run_id The identifier for each inference
+ * @param[in] task_id The identifier for each inference
* @param[out] profile The profile instance
* @return 0 if no error, otherwise a negative errno.
*/
int
-HostHandler::getProfile (int run_id, npu_profile *profile)
+HostHandler::getProfile (int task_id, npu_profile *profile)
{
- if (run_id < 0 || profile == nullptr) {
+ if (task_id < 0 || profile == nullptr) {
logerr (TAG, "Invalid parameter provided\n");
return -EINVAL;
}
assert (api != nullptr);
void *profile_buffer;
- int status = api->getProfile (run_id, &profile_buffer);
+ size_t profile_size;
+ int status = api->getProfile (task_id, &profile_buffer, &profile_size);
if (status != 0) {
+ logerr (TAG, "Failed to get profile information: %d\n", status);
+ return status;
+ }
+
+ profile->num_layers = 0;
+ profile->layers = nullptr;
+ if (profile_buffer != nullptr) {
// TODO: Perform parsing
}
- return status;
+ return 0;
}
/**
void *addr, size_t size) const { return -EPERM; }
#endif
- virtual int getProfile (int run_id, void **profile) const { return -EPERM; }
+ virtual int getProfile (int task_id, void **profile_buf,
+ size_t *profile_size) const { return -EPERM; }
protected:
int dev_id_; /**< device id. assume that 0 <= id < getNUmDevices() */
void *addr, size_t size) const;
#endif
- int getProfile (int run_id, void **profile) const;
+ int getProfile (int task_id, void **profile_buf,
+ size_t *profile_size) const;
private:
int getDrvVersion () const;
int registerModel (model_config_t *model) const;
int deregisterModel (unsigned long long id) const;
- int getProfile (int run_id, void **profile) const;
+ int getProfile (int task_id, void **profile_buf,
+ size_t *profile_size) const;
private:
static std::atomic<int> global_fd_;
}
int
-TrinityEmulAPI::getProfile (int run_id, void **profile) const
+TrinityEmulAPI::getProfile (int task_id, void **profile_buf,
+ size_t *profile_size) const
{
// TODO: allocate the buffer and call APIs from simulator
return 0;
#include "NPUdrvAPI.h"
constexpr int max_num_devs = ((1<<CHAR_BIT) - 1);
-constexpr int max_buf_size = 1024;
+constexpr size_t max_buf_size = (256 * PAGE_SIZE);
const std::string TrinityVision2API::dev_node_base = "triv2";
std::bitset<CHAR_BIT> TrinityVision2API::dev_bitset = 0;
#endif
int
-TrinityVision2API::getProfile (int run_id, void **profile) const
+TrinityVision2API::getProfile (int task_id, void **profile_buf,
+ size_t *profile_size) const
{
- // TODO: allocate the buffer and call ioctl() to the kernel driver
+ struct trinity_profile profile;
+ size_t size = max_buf_size;
+ void * buf;
+ int ret;
+
+ buf = malloc(max_buf_size);
+ if (!buf)
+ return -ENOMEM;
+
+ profile.task_id = task_id;
+ profile.buf = buf;
+ profile.buf_size = size;
+ profile.next_size = 0;
+
+ ret = ioctl (this->getDeviceFD (), TRINITY_IOCTL_GET_PROFILE, &profile);
+ if (ret != 0)
+ goto ioctl_fail;
+
+ if (profile.next_size != 0) {
+ buf = realloc (profile.buf, profile.buf_size + profile.next_size);
+ if (!buf) {
+ free (profile.buf);
+ return -ENOMEM;
+ }
+
+ profile.buf = (char *) buf + profile.buf_size;
+ profile.buf_size = profile.next_size;
+ profile.next_size = 0;
+
+ size += profile.next_size;
+
+ ret = ioctl (this->getDeviceFD (), TRINITY_IOCTL_GET_PROFILE, &profile);
+ if (ret != 0)
+ goto ioctl_fail;
+ }
+
+ *profile_buf = buf;
+ *profile_size = size;
+
return 0;
+
+ioctl_fail:
+ free (profile.buf);
+ return -errno;
}
install_rpath : ne_libdir,
install_dir : join_paths(ne_bindir, 'apptests')
)
+
+executable ('apptest_tvn_triv2_profile',
+ 'tvn_triv2_profile.cc',
+ include_directories : ne_apptest_inc,
+ dependencies : ne_test_utils_dep,
+ link_with : ne_library_shared,
+ install : true,
+ install_rpath : ne_libdir,
+ install_dir : join_paths(ne_bindir, 'apptests')
+)
--- /dev/null
+/**
+ * Proprietary
+ * Copyright (C) 2020 Samsung Electronics
+ * Copyright (C) 2020 Dongju Chae <dongju.chae@samsung.com>
+ */
+/**
+ * @file tvn_triv2_profile.cc
+ * @date 07 Sep 2020
+ * @brief AppTest to test profiling APIs
+ * @author Dongju Chae <dongju.chae@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include <string.h>
+#include <unistd.h>
+
+#include <ne_test_utils.h>
+
+using namespace std;
+
+class Tester : public UtilTRIV2
+{
+ public:
+ Tester () : total_ (0), debug_ (false), sync_ (false) {}
+
+ void set_debug () {
+ debug_ = true;
+ }
+
+ void set_mute () {
+ /** redirect to /dev/null */
+ if (freopen("/dev/null", "w", stdout) == NULL)
+ cerr << "Warning: Failed to reopen stdout\n";
+ if (freopen("/dev/null", "w", stderr) == NULL)
+ cerr << "Warning: Failed to reopen stderr\n";
+ }
+
+ void set_sync () {
+ sync_ = true;
+ }
+
+ int load_model (std::string model) {
+ npu_priority priority = NPU_PRIORITY_MID;
+ uint32_t timeout = 1000;
+
+ total_++;
+
+ return UtilTRIV2::loadModel (model, &model_id_,
+ static_cast<npu_priority>(priority), timeout);
+ }
+
+ int init (std::string model) {
+ int status = UtilTRIV2::init ();
+ if (status != 0) {
+ cerr << "Failed to initialize\n";
+ return status;
+ }
+
+ return load_model (model);
+ }
+
+ int run () {
+ int status = UtilTRIV2::run (model_id_, callback, sync_);
+ if (status < 0) {
+ cerr << "Failed to run the model: " << status << "\n";
+ return status;
+ }
+
+ wait_runs ();
+
+ print_profile (status);
+
+ return passed_ == total_ ? 0 : -EINVAL;
+ }
+
+ void print_profile (int task_id) {
+ npu_profile profile;
+ int status = UtilTRIV2::getProfile (task_id, &profile);
+ if (status == 0) {
+ if (profile.layers != nullptr) {
+ for (uint32_t i = 0; i < profile.num_layers; i++) {
+ cerr << "[" << i << "] " << profile.layers[i].name << "\n";
+ cerr << "\tLatency (msec) : " << profile.layers[i].latency_ms << "\n";
+ cerr << "\tLatency (cycles) : " << profile.layers[i].latency_cycles << "\n";
+ cerr << "\tMemRead (KBytes) : " << (profile.layers[i].mem_read_bytes >> 10) << "\n";
+ cerr << "\tMemWrite (KBytes) : " << (profile.layers[i].mem_write_bytes >> 10) << "\n";
+ }
+ free (profile.layers);
+ }
+ } else {
+ cerr << "Failed to get profile: " << status << "\n";
+ }
+ }
+
+ void wait_runs () {
+ unique_lock<mutex> lock (m_);
+ cv_.wait (lock, [this]() { return done_ == total_; });
+ }
+
+ static void callback (output_buffers *output, uint64_t sequence,
+ void *data) {
+ const char *dirpath = static_cast<const char *> (data);
+ int err = 0;
+
+ for (uint32_t idx = 0; idx < output->num_buffers; idx++) {
+ char * output_data = static_cast<char*> (output->bufs[idx].addr);
+ off_t output_size = output->bufs[idx].size;
+ std::string golden_path (dirpath);
+
+ golden_path += "/output_fmap_" + std::to_string (idx) + ".bin";
+ err = compare_data (golden_path.c_str(), output_data, output_size);
+ free (output_data);
+
+ if (err != 0)
+ break;
+ }
+
+ report (err == 0);
+ }
+
+ static void report (bool passed) {
+ unique_lock<mutex> lock (m_);
+ done_++;
+ if (passed)
+ passed_++;
+ cv_.notify_one ();
+ }
+
+ private:
+ static mutex m_;
+ static condition_variable cv_;
+ static uint32_t done_;
+ static uint32_t passed_;
+
+ uint32_t model_id_;
+ uint32_t total_;
+ bool debug_;
+ bool sync_;
+};
+
+mutex Tester::m_;
+condition_variable Tester::cv_;
+uint32_t Tester::done_ = 0;
+uint32_t Tester::passed_ = 0;
+
+static void
+print_usage (const char *prog_name)
+{
+ cerr << "Usage: " << prog_name << " [options] [model dirpath]\n";
+ cerr << "Options: \n";
+ cerr << " -h \t\t Show help messages\n";
+ cerr << " -m \t\t Mute stdout/stderr messages\n";
+ cerr << " -d \t\t Enable debugging mode\n";
+ cerr << " -s \t\t Enable run sync mode\n";
+}
+
+/** @brief apptest main */
+int
+main (int argc, char **argv)
+{
+ Tester tester;
+ int c, status;
+
+ optind = 0;
+ opterr = 0;
+ while ((c = getopt (argc, argv, "dmsh")) != -1) {
+ switch (c) {
+ case 'd':
+ tester.set_debug ();
+ break;
+ case 'm':
+ tester.set_mute ();
+ break;
+ case 's':
+ tester.set_sync ();
+ break;
+ case 'h':
+ print_usage(argv[0]);
+ return 0;
+ }
+ }
+
+ if (optind >= argc) {
+ cerr << "[APPTEST] " << argv[0] << ": SKIPPED\n";
+ return 0;
+ }
+
+ /** initialize triv2 device */
+ status = tester.init (argv[optind]);
+ if (status != 0)
+ goto err;
+
+ /** run the inference with the device */
+ status = tester.run ();
+ if (status != 0)
+ goto err;
+
+ cerr << "[APPTEST] " << argv[0] << ": PASSED\n";
+ return 0;
+
+err:
+ cerr << "[APPTEST] " << argv[0] << ": FAILED (" << status << ")\n";
+ return status;
+}
cleanNPU_inputBuffers (dev_, &input);
}
- if (status != 0)
- return status;
-
- cb (&output, 0, cb_data);
+ if (status >= 0)
+ cb (&output, 0, cb_data);
} else {
if (model) {
status = runNPU_async (dev_, model->getModelID (), model->getInput (),
status = runNPU_async (dev_, 0, &input, cb, NULL, cb_data, NPUASYNC_WAIT);
cleanNPU_inputBuffers (dev_, &input);
}
-
- if (status != 0)
- return status;
}
- return 0;
+ return status;
}
/** @brief run inference with the given model id */
{
for (auto& model : models_) {
int status = run_each (model.get (), cb, sync);
- if (status != 0)
+ if (status < 0)
return status;
}
return stopNPU_internalInput (dev_, task_id);
}
+/** @brief get profile information from NPU */
+int UtilTrinity::getProfile (int task_id, npu_profile *profile)
+{
+ return getNPU_profile (dev_, task_id, profile);
+}
+
/** @brief configure constraint for the model */
int UtilTrinity::set_constraint (uint32_t model_id, uint32_t timeout,
npu_priority priority) {
int runInternal (uint32_t model_id, std::string dev_path);
int stopInternal (int task_id);
+ int getProfile (int task_id, npu_profile *profile);
+
UtilModel *findModel (uint32_t model_id);
protected: