This patch revises profiling interface for TRIV2 kernel driver.
It tries to keep the same interface with simulator's one.
Signed-off-by: Dongju Chae <dongju.chae@samsung.com>
npu_stat_task *stat;
} npu_stat_tasks;
-/** NPU Profiling (both for emulated/real-device envionment) */
+/**
+ * NPU Profiling (both for emulated/real-device envionment)
+ *
+ * Note that negative values mean non-supported profiling info.
+ */
-#define NPU_OPNAME_MAX (32)
+#define NPU_PROFILE_SIZE (128)
+#define NPU_OPNAME_MAX (32)
typedef struct {
- char name[NPU_OPNAME_MAX];
+ union {
+ struct {
+ char name[NPU_OPNAME_MAX];
- uint64_t running_cycles;
+ int64_t running_cycles;
- uint64_t dram_read_bytes;
- uint64_t dram_write_bytes;
+ int64_t dram_read_bytes;
+ int64_t dram_write_bytes;
- uint64_t sram_read_bytes;
- uint64_t sram_write_bytes;
+ int64_t sram_read_bytes;
+ int64_t sram_write_bytes;
+
+ /** TODO: Add more info */
+ };
+ char reserved[NPU_PROFILE_SIZE];
+ };
} npu_profile_layer;
typedef struct {
uint32_t num_layers;
- uint64_t total_running_cycles;
- uint64_t dram_input_footprint;
- uint64_t dram_output_footprint;
+ int64_t total_system_cycles;
+ int64_t dram_input_footprint;
+ int64_t dram_output_footprint;
npu_profile_layer *layers;
} npu_profile;
if (total_dump > 0) {
profile->layers = new npu_profile_layer [total_dump];
profile->num_layers = total_dump;
- profile->total_running_cycles = head.total_cycles;
+ profile->total_system_cycles = head.total_cycles;
profile->dram_input_footprint = head.nna_dma_in.access_footprint_byte;
profile->dram_output_footprint = head.nna_dma_out.access_footprint_byte;
#include <npubinfmt.h>
constexpr int max_num_devs = ((1<<CHAR_BIT) - 1);
-constexpr size_t max_buf_size = (256 * PAGE_SIZE);
+constexpr size_t default_buf_size = (256 * PAGE_SIZE);
const std::string TrinityVision2API::dev_node_base = "triv2";
std::bitset<CHAR_BIT> TrinityVision2API::dev_bitset = 0;
TrinityVision2API::getProfile (int task_id, npu_profile *profile) const
{
struct trinity_profile t_profile;
- size_t size = max_buf_size;
- void * buf;
- int ret;
+ int ret = 0;
- buf = malloc(max_buf_size);
- if (!buf)
- return -ENOMEM;
+ if (profile == nullptr)
+ return -EINVAL;
- t_profile.task_id = task_id;
- t_profile.buf = buf;
- t_profile.buf_size = size;
- t_profile.next_size = 0;
+ t_profile.taskid = task_id;
- ret = ioctl (this->getDeviceFD (), TRINITY_IOCTL_GET_PROFILE, &t_profile);
+ ret = ioctl (this->getDeviceFD (), TRINITY_IOCTL_GET_PROFILE_META, &t_profile);
if (ret != 0)
- goto ioctl_fail;
+ return -errno;
- if (t_profile.next_size != 0) {
- buf = realloc (t_profile.buf, t_profile.buf_size + t_profile.next_size);
- if (!buf) {
- free (t_profile.buf);
- return -ENOMEM;
- }
+ /** no profiling buffer provided but it's okay */
+ if (t_profile.total_ops == 0)
+ return 0;
- t_profile.buf = (char *) buf + t_profile.buf_size;
- t_profile.buf_size = t_profile.next_size;
- t_profile.next_size = 0;
+ profile->total_system_cycles = t_profile.total_cycles;
+ profile->dram_input_footprint = -1;
+ profile->dram_output_footprint = -1;
- size += t_profile.next_size;
+ profile->num_layers = t_profile.total_ops;
+ profile->layers = new npu_profile_layer[profile->num_layers];
- ret = ioctl (this->getDeviceFD (), TRINITY_IOCTL_GET_PROFILE, &t_profile);
- if (ret != 0)
- goto ioctl_fail;
+ t_profile.profile_buf = profile->layers;
+ ret = ioctl (this->getDeviceFD (), TRINITY_IOCTL_GET_PROFILE_BUFF, &t_profile);
+ if (ret != 0) {
+ delete [] (profile->layers);
+
+ profile->layers = nullptr;
+ profile->num_layers = 0;
+
+ return -errno;
}
- /** TODO: manipulate 'profile' from the obtained 't_profile' buffer */
- free (buf);
- return 0;
+ /** TODO: manipulate the profiling info later (i.e., per-visa to per-layer) */
-ioctl_fail:
- free (buf);
- return -errno;
+ return 0;
}
int
npu_profile profile;
int status = UtilTRIV2::getProfile (task_id, &profile);
if (status == 0) {
- if (profile.layers != nullptr) {
- cerr << "Total System Cycles : " << profile.total_running_cycles << "\n";
+ cerr << "Total System Cycles : " << profile.total_system_cycles << "\n";
+ if (profile.dram_input_footprint > 0)
cerr << "Dram Input Footprint (KB) : " << (profile.dram_input_footprint >> 10) << "\n";
+ if (profile.dram_output_footprint > 0)
cerr << "Dram Output Footprint (KB) : " << (profile.dram_output_footprint >> 10) << "\n";
- cerr << "\n";
+ if (profile.layers != nullptr) {
+ cerr << "\n";
for (uint32_t i = 0; i < profile.num_layers; i++) {
cerr << "[" << i << "] " << profile.layers[i].name << "\n";
- cerr << "\tRunning Cycles : " << profile.layers[i].running_cycles << "\n";
- cerr << "\tDram Read (KB) : " << (profile.layers[i].dram_read_bytes >> 10) << "\n";
- cerr << "\tDram Write (KB) : " << (profile.layers[i].dram_write_bytes >> 10) << "\n";
- cerr << "\tSRAM Read (KB) : " << (profile.layers[i].sram_read_bytes >> 10) << "\n";
- cerr << "\tSRAM Write (KB) : " << (profile.layers[i].sram_write_bytes >> 10) << "\n";
+ if (profile.layers[i].running_cycles > 0)
+ cerr << "\tRunning Cycles : " << profile.layers[i].running_cycles << "\n";
+ if (profile.layers[i].dram_read_bytes > 0)
+ cerr << "\tDram Read (KB) : " << (profile.layers[i].dram_read_bytes >> 10) << "\n";
+ if (profile.layers[i].dram_write_bytes > 0)
+ cerr << "\tDram Write (KB) : " << (profile.layers[i].dram_write_bytes >> 10) << "\n";
+ if (profile.layers[i].sram_read_bytes > 0)
+ cerr << "\tSRAM Read (KB) : " << (profile.layers[i].sram_read_bytes >> 10) << "\n";
+ if (profile.layers[i].sram_write_bytes > 0)
+ cerr << "\tSRAM Write (KB) : " << (profile.layers[i].sram_write_bytes >> 10) << "\n";
}
}