3 * Copyright (C) 2021 Samsung Electronics
4 * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
9 * @brief Model profiler for NPU Engine (NE) users.
10 * @author Dongju Chae <dongju.chae@samsung.com>
11 * @bug No known bugs except for NYI items
14 #include "ne-profiler.h"
16 ModelProfiler::ModelProfiler (const DriverAPI *api) : api_ (api) {}
18 ModelProfiler::~ModelProfiler () {
19 profile_map_.clear ();
23 ModelProfiler::appendRequest (int req_id, const Model *model) {
24 ProfileData *data = new ProfileData (req_id, model);
25 return profile_map_.insert (req_id, data);
29 ModelProfiler::getProfile (int req_id, const npu_profile_opt &opt,
30 npu_profile *profile) {
31 ProfileData *data = profile_map_.find (req_id);
35 const Model *model = data->getModel ();
39 int status = api_->getProfile (req_id, profile);
43 if (opt.level == PROFILE_LEVEL_EXT_META || opt.level == PROFILE_LEVEL_LAYER) {
44 HWmem *extended = model->getExtendedMetadata ();
45 if (extended != nullptr)
46 manipulateProfile (extended, profile);
47 else if (opt.level == PROFILE_LEVEL_LAYER)
51 profile_map_.remove (req_id);
56 ModelProfiler::manipulateProfile (HWmem *extended, npu_profile *profile) {
57 npubin_meta_profile *meta_profile =
58 reinterpret_cast<npubin_meta_profile *> (extended->getData ());
59 npu_profile_layer *new_layers =
60 new npu_profile_layer[meta_profile->node_entry_num + 1];
62 npu_profile_layer *unclassified = &new_layers[meta_profile->node_entry_num];
64 snprintf (unclassified->name, NPU_OPNAME_MAX - 1, "%s", "Unclassified");
65 unclassified->name[NPU_OPNAME_MAX - 1] = '\x00';
66 unclassified->node_id = -1;
68 unclassified->running_cycles = 0;
69 unclassified->start_cycles = 0;
70 unclassified->end_cycles = 0;
71 unclassified->dram_read_bytes = 0;
72 unclassified->dram_write_bytes = 0;
73 unclassified->sram_read_bytes = 0;
74 unclassified->sram_write_bytes = 0;
76 /** 1) parsing node table */
77 std::unordered_map<uint32_t, npu_profile_layer *> node_table;
80 node_table.reserve (meta_profile->node_entry_num);
82 for (uint32_t i = 0; i < meta_profile->node_entry_num; i++) {
85 memcpy (&id, meta_profile->entry_data + pos, sizeof (uint32_t));
86 pos += sizeof (uint32_t);
88 memcpy (&length, meta_profile->entry_data + pos, sizeof (uint32_t));
89 pos += sizeof (uint32_t);
92 std::cerr << "Zero length detected at ";
93 std::cerr << id << "th node" << std::endl;
99 std::string name (meta_profile->entry_data + pos);
102 npu_profile_layer *layer = &new_layers[i];
104 snprintf (layer->name, NPU_OPNAME_MAX - 1, "%s", name.c_str ());
105 layer->name[NPU_OPNAME_MAX - 1] = '\x00';
108 layer->running_cycles = 0;
109 layer->start_cycles = 0;
110 layer->end_cycles = 0;
111 layer->dram_read_bytes = 0;
112 layer->dram_write_bytes = 0;
113 layer->sram_read_bytes = 0;
114 layer->sram_write_bytes = 0;
116 node_table.insert (std::make_pair (id, layer));
119 /** 2) parsing visa table */
120 pos = meta_profile->node_table_size;
121 for (uint32_t i = 0; i < meta_profile->visa_entry_num; i++) {
122 uint32_t id, node_num;
124 memcpy (&id, meta_profile->entry_data + pos, sizeof (uint32_t));
125 pos += sizeof (uint32_t);
127 memcpy (&node_num, meta_profile->entry_data + pos, sizeof (uint32_t));
128 pos += sizeof (uint32_t);
131 uint32_t *node_ids = (uint32_t *) (meta_profile->entry_data + pos);
133 for (uint32_t j = 0; j < node_num; j++) {
134 uint32_t node_id = node_ids[j];
135 auto it = node_table.find (node_id);
137 if (it != node_table.end ()) {
138 npu_profile_layer *layer = it->second;
140 /** TODO: evenly divided to fused layers */
141 layer->running_cycles += profile->layers[i].running_cycles / node_num;
142 if (layer->start_cycles == 0)
143 layer->start_cycles = profile->layers[i].start_cycles;
144 if (layer->end_cycles < profile->layers[i].end_cycles)
145 layer->end_cycles = profile->layers[i].end_cycles;
146 layer->dram_read_bytes +=
147 profile->layers[i].dram_read_bytes / node_num;
148 layer->dram_write_bytes +=
149 profile->layers[i].dram_write_bytes / node_num;
150 layer->sram_read_bytes +=
151 profile->layers[i].sram_read_bytes / node_num;
152 layer->sram_write_bytes +=
153 profile->layers[i].sram_write_bytes / node_num;
154 layer->visa_exec_seq = -1;
156 std::cerr << "Unable to find the node ID " << node_id << std::endl;
160 unclassified->running_cycles += profile->layers[i].running_cycles;
161 unclassified->dram_read_bytes += profile->layers[i].dram_read_bytes;
162 unclassified->dram_write_bytes += profile->layers[i].dram_write_bytes;
163 unclassified->sram_read_bytes += profile->layers[i].sram_read_bytes;
164 unclassified->sram_write_bytes += profile->layers[i].sram_write_bytes;
165 unclassified->visa_exec_seq = -1;
168 pos += sizeof (uint32_t) * node_num;
171 /** 3) profile data mapping */
172 size_t num_layers = node_table.size ();
173 if (num_layers > 0) {
174 delete[] profile->layers;
176 profile->layers = new_layers;
177 profile->num_layers = num_layers + 1;