npu_profile *profile);
/**
+ * @brief [Optional] Set model data of the request to be profiled
+ * @param[in] dev NPU device handle
+ * @param[in] req_id identifier of request
+ * @param[in] model_id The model to be profiled
+ * @return 0 if no error, otherwise a negative errno.
+ * @note This allows for independent processes (e.g., profiler) to access
+ * requests' profile info. submitted by another processes.
+ */
+int setNPU_profileModel (npudev_h dev, int req_id, uint32_t model_id);
+
+/**
* @brief Free the profile instance obtained by getNPU_profile().
* @param[in] profile Profile instance
*/
}
/**
+ * @brief Set model data of the request to be profiled
+ * @param[in] req_id identifier of request
+ * @param[in] model_id The model to be profiled
+ * @return 0 if no error, otherwise a negative errno.
+ */
+int
+HostHandler::setProfileModel (int req_id, uint32_t model_id) {
+ if (req_id < 0) {
+ logerr (TAG, "Invalid parameter provided\n");
+ return -EINVAL;
+ }
+
+ Model *model = models_.find (model_id);
+ if (model == nullptr) {
+ logerr (TAG, "Unable to find a model\n");
+ return -ENOENT;
+ }
+
+ return profiler_->appendRequest (req_id, model);
+}
+
+/**
* @brief get the stats for the latest apps of the target device
* @param[out] stat The list of app stat
* @note The caller has the responsibility to free the resources.
uint32_t *size);
int getProfile (int req_id, const npu_profile_opt &opt, npu_profile *profile);
+ int setProfileModel (int req_id, uint32_t model_id);
int getAPILevel (uint32_t *level);
int getTops (uint32_t *tops);
int getDspmSize (uint32_t *dspm);
}
/**
+ * @brief [Optional] Set model data of the request to be profiled
+ * @param[in] dev NPU device handle
+ * @param[in] req_id identifier of request
+ * @param[in] model_id The model to be profiled
+ * @return 0 if no error, otherwise a negative errno.
+ * @note This allows for independent processes (e.g., profiler) to access
+ * requests' profile info. submitted by another processes.
+ */
+int
+setNPU_profileModel (npudev_h dev, int req_id, uint32_t model_id) {
+ INIT_HOST_HANDLER (host_handler, dev);
+
+ return host_handler->setProfileModel (req_id, model_id);
+}
+
+/**
* @brief Free the profile instance obtained by getNPU_profile().
* @param[in] profile The profile instance
*/
install_dir : join_paths(ne_bindir, 'apptests')
)
+executable ('apptest_tvn_triv2_profile_example',
+ 'tvn_triv2_profile_example.cc',
+ include_directories : ne_apptest_inc,
+ dependencies : ne_test_utils_dep,
+ link_with : ne_library_shared,
+ install : true,
+ install_rpath : ne_libdir,
+ install_dir : join_paths(ne_bindir, 'apptests')
+)
+
executable ('apptest_tvn_triv2_preempt',
'tvn_triv2_preempt.cc',
include_directories : ne_apptest_inc,
--- /dev/null
+/**
+ * Proprietary
+ * Copyright (C) 2021 Samsung Electronics
+ * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
+ */
+/**
+ * @file tvn_triv2_profile_example.cc
+ * @date 19 July 2021
+ * @brief AppTest to test profiling APIs only
+ * @author Dongju Chae <dongju.chae@samsung.com>
+ * @bug No known bugs except for NYI items
+ */
+
+#include <ne_test_utils_common.h>
+#include <libnpuhost.h>
+#include <iostream>
+
+#define NPU_TYPE (NPUCOND_TRIV2_CONN_SOCIP)
+
+using namespace std;
+
+extern const char *__progname;
+
+/** @brief C++ class to describe how to use npu-engine library */
+class Tester {
+ public:
+ Tester () : dev_ (nullptr), model_id_ (0) {}
+ ~Tester () {
+ if (dev_) {
+ unregisterNPUmodel_all (dev_);
+ putNPUdevice (dev_);
+ }
+ }
+
+ /** @brief initilize the device handle */
+ int init (string model_path) {
+ int status;
+
+ status = getNPUdeviceByTypeAny (&dev_, NPU_TYPE, 2 /* 2TOPS */);
+ if (status != 0)
+ return status;
+
+ generic_buffer model_file;
+ model_file.type = BUFFER_FILE;
+ model_file.filepath = model_path.c_str ();
+ model_file.size = get_file_size (model_path.c_str ());
+
+ return registerNPUmodel (dev_, &model_file, &model_id_);
+ }
+
+ /** @brief print profile */
+ int print_profile (int req_id) {
+ npu_profile profile;
+ int status;
+
+ status = setNPU_profileModel (dev_, req_id, model_id_);
+ if (status != 0)
+ return status;
+
+ npu_profile_opt opt = {.level = PROFILE_LEVEL_LAYER};
+
+ status = getNPU_profile_opt (dev_, req_id, opt, &profile);
+ if (status != 0) {
+ cerr << "Failed to get profile: " << status << "\n";
+ return status;
+ }
+
+ if (profile.prof_path != nullptr)
+ cerr << "Profile Path : " << profile.prof_path << "\n";
+ cerr << "Total System Cycles : " << profile.total_system_cycles << "\n";
+ if (profile.dram_input_footprint > 0)
+ cerr << "DRAM Input Footprint (KB) : "
+ << (profile.dram_input_footprint >> 10) << "\n";
+ if (profile.dram_output_footprint > 0)
+ cerr << "DRAM Output Footprint (KB) : "
+ << (profile.dram_output_footprint >> 10) << "\n";
+
+ if (profile.layers != nullptr) {
+ cerr << "\n";
+ for (uint32_t i = 0; i < profile.num_layers; i++) {
+ cerr << "[" << i << "] " << profile.layers[i].name << "\n";
+ if (profile.layers[i].running_cycles > 0)
+ cerr << "\tRunning Cycles : " << profile.layers[i].running_cycles
+ << "\n";
+ if (profile.layers[i].start_cycles > 0)
+ cerr << "\tStart Cycles : " << profile.layers[i].start_cycles
+ << "\n";
+ if (profile.layers[i].end_cycles > 0)
+ cerr << "\tEnd Cycles : " << profile.layers[i].end_cycles << "\n";
+ if (profile.layers[i].dram_read_bytes > 0)
+ cerr << "\tDRAM Read (KB) : "
+ << (profile.layers[i].dram_read_bytes >> 10) << "\n";
+ if (profile.layers[i].dram_write_bytes > 0)
+ cerr << "\tDRAM Write (KB) : "
+ << (profile.layers[i].dram_write_bytes >> 10) << "\n";
+ if (profile.layers[i].sram_read_bytes > 0)
+ cerr << "\tSRAM Read (KB) : "
+ << (profile.layers[i].sram_read_bytes >> 10) << "\n";
+ if (profile.layers[i].sram_write_bytes > 0)
+ cerr << "\tSRAM Write (KB) : "
+ << (profile.layers[i].sram_write_bytes >> 10) << "\n";
+ if (profile.layers[i].visa_exec_seq > 0)
+ cerr << "\tvISA Exec Seq : " << profile.layers[i].visa_exec_seq
+ << "\n";
+ }
+ }
+
+ putNPU_profile (&profile);
+ return 0;
+ }
+
+ private:
+ npudev_h dev_;
+ uint32_t model_id_;
+};
+
+/** @brief apptest main */
+int
+main (int argc, char **argv) {
+#ifndef ENABLE_EMUL
+ Tester tester;
+ const char *model_dir;
+ int status, req_id;
+
+ if (argc != 3) {
+ cerr << "Usage: ./" << __progname << " [model_dir] [req_id]\n";
+ goto skip;
+ }
+
+ model_dir = argv[1];
+
+ errno = 0;
+ req_id = strtol (argv[2], NULL, 10);
+ if (errno != 0) {
+ status = -errno;
+ goto err;
+ }
+
+ /** initialize triv2 device */
+ status = tester.init (model_dir);
+ if (status != 0)
+ goto err;
+
+ /** get profile info. in the device */
+ status = tester.print_profile (req_id);
+ if (status != 0)
+ goto err;
+
+ cerr << "[APPTEST] " << __progname << ": PASSED\n";
+ return 0;
+
+err:
+ cerr << "[APPTEST] " << __progname << ": FAILED (" << status << ")\n";
+ return status;
+#endif
+
+skip:
+ cerr << "[APPTEST] " << __progname << ": SKIPPED\n";
+ return 0;
+}
ThreadSafeMap<int, EmulDmabuf> global_dmabuf_map;
/** @brief Global model id */
std::atomic<uint64_t> EmulModel::global_id_ (1);
+/** @brief Global request id */
+std::atomic<int32_t> global_request_id (1);
/** @brief Global mapping of models */
ThreadSafeMap<uint64_t, EmulModel> global_model_map;
/** @brief Global mapping of profile data */
}
/**
+ * @brief TRIV2 impl. of get_next_request ioctl()
+ */
+static int
+triv2_get_next_request (trinity_cuse_context *ctx, int32_t *req_id) {
+ *req_id = global_request_id.fetch_add (1);
+ return 0;
+}
+
+/**
* @brief TRIV2 impl. of hwmem_alloc ioctl()
*/
static int
.get_state = triv2_get_state,
.get_tops = triv2_get_tops,
.get_dspm = triv2_get_dspm,
+ .get_next_request = triv2_get_next_request,
/* Device Control */
.hwmem_alloc = triv2_hwmem_alloc,
.hwmem_dealloc = triv2_hwmem_dealloc,
fuse_reply_ioctl (req, 0, &val, sizeof (val));
}
break;
+ case TRINITY_IOCTL_GET_NEXT_REQUEST:
+ if (!out_size) {
+ struct iovec iov = {arg, sizeof (int32_t)};
+ fuse_reply_ioctl_retry (req, NULL, 0, &iov, 1);
+ } else {
+ int32_t val;
+ vtable->get_next_request (ctx, &val);
+ fuse_reply_ioctl (req, 0, &val, sizeof (val));
+ }
+ break;
case TRINITY_IOCTL_HWMEM_ALLOC:
if (!in_size) {
struct iovec iov = {arg, sizeof (struct trinity_ioctl_hwmem)};
int (*get_state) (trinity_cuse_context *, uint32_t *);
int (*get_tops) (trinity_cuse_context *, uint32_t *);
int (*get_dspm) (trinity_cuse_context *, uint32_t *);
+ int (*get_next_request) (trinity_cuse_context *, int32_t *);
/* Device Control */
int (*hwmem_alloc) (trinity_cuse_context *,
const struct trinity_ioctl_hwmem *);