3 * Copyright (C) 2021 Samsung Electronics
4 * Copyright (C) 2021 Dongju Chae <dongju.chae@samsung.com>
9 * @brief Model profiler for NPU Engine (NE) users.
10 * @author Dongju Chae <dongju.chae@samsung.com>
11 * @bug No known bugs except for NYI items
14 #include "ne-profiler.h"
16 ModelProfiler::ModelProfiler (const DriverAPI *api) : api_ (api) {}
18 ModelProfiler::~ModelProfiler () {
19 profile_map_.clear ();
23 ModelProfiler::appendRequest (int req_id, const Model *model) {
24 ProfileData *data = new ProfileData (req_id, model);
25 return profile_map_.insert (req_id, data);
29 ModelProfiler::getProfile (int req_id, npu_profile *profile) {
30 ProfileData *data = profile_map_.find (req_id);
34 const Model *model = data->getModel ();
38 int status = api_->getProfile (req_id, profile);
42 HWmem *extended = model->getExtendedMetadata ();
43 if (extended != nullptr)
44 manipulateProfile (extended, profile);
46 profile_map_.remove (req_id);
51 ModelProfiler::manipulateProfile (HWmem *extended, npu_profile *profile) {
52 npubin_meta_profile *meta_profile =
53 reinterpret_cast<npubin_meta_profile *> (extended->getData ());
54 npu_profile_layer *new_layers =
55 new npu_profile_layer[meta_profile->node_entry_num + 1];
57 npu_profile_layer *unclassified = &new_layers[meta_profile->node_entry_num];
59 snprintf (unclassified->name, NPU_OPNAME_MAX - 1, "%s", "Unclassified");
60 unclassified->name[NPU_OPNAME_MAX - 1] = '\x00';
61 unclassified->node_id = -1;
63 unclassified->running_cycles = 0;
64 unclassified->start_cycles = 0;
65 unclassified->end_cycles = 0;
66 unclassified->dram_read_bytes = 0;
67 unclassified->dram_write_bytes = 0;
68 unclassified->sram_read_bytes = 0;
69 unclassified->sram_write_bytes = 0;
71 /** 1) parsing node table */
72 std::unordered_map<uint32_t, npu_profile_layer *> node_table;
75 node_table.reserve (meta_profile->node_entry_num);
77 for (uint32_t i = 0; i < meta_profile->node_entry_num; i++) {
80 memcpy (&id, meta_profile->entry_data + pos, sizeof (uint32_t));
81 pos += sizeof (uint32_t);
83 memcpy (&length, meta_profile->entry_data + pos, sizeof (uint32_t));
84 pos += sizeof (uint32_t);
87 std::cerr << "Zero length detected at ";
88 std::cerr << id << "th node" << std::endl;
94 std::string name (meta_profile->entry_data + pos);
97 npu_profile_layer *layer = &new_layers[i];
99 snprintf (layer->name, NPU_OPNAME_MAX - 1, "%s", name.c_str ());
100 layer->name[NPU_OPNAME_MAX - 1] = '\x00';
103 layer->running_cycles = 0;
104 layer->start_cycles = 0;
105 layer->end_cycles = 0;
106 layer->dram_read_bytes = 0;
107 layer->dram_write_bytes = 0;
108 layer->sram_read_bytes = 0;
109 layer->sram_write_bytes = 0;
111 node_table.insert (std::make_pair (id, layer));
114 /** 2) parsing visa table */
115 pos = meta_profile->node_table_size;
116 for (uint32_t i = 0; i < meta_profile->visa_entry_num; i++) {
117 uint32_t id, node_num;
119 memcpy (&id, meta_profile->entry_data + pos, sizeof (uint32_t));
120 pos += sizeof (uint32_t);
122 memcpy (&node_num, meta_profile->entry_data + pos, sizeof (uint32_t));
123 pos += sizeof (uint32_t);
126 uint32_t *node_ids = (uint32_t *) (meta_profile->entry_data + pos);
128 for (uint32_t j = 0; j < node_num; j++) {
129 uint32_t node_id = node_ids[j];
130 auto it = node_table.find (node_id);
132 if (it != node_table.end ()) {
133 npu_profile_layer *layer = it->second;
135 /** TODO: evenly divided to fused layers */
136 layer->running_cycles += profile->layers[i].running_cycles / node_num;
137 if (layer->start_cycles == 0)
138 layer->start_cycles = profile->layers[i].start_cycles;
139 if (layer->end_cycles < profile->layers[i].end_cycles)
140 layer->end_cycles = profile->layers[i].end_cycles;
141 layer->dram_read_bytes +=
142 profile->layers[i].dram_read_bytes / node_num;
143 layer->dram_write_bytes +=
144 profile->layers[i].dram_write_bytes / node_num;
145 layer->sram_read_bytes +=
146 profile->layers[i].sram_read_bytes / node_num;
147 layer->sram_write_bytes +=
148 profile->layers[i].sram_write_bytes / node_num;
149 layer->visa_exec_seq = -1;
151 std::cerr << "Unable to find the node ID " << node_id << std::endl;
155 unclassified->running_cycles += profile->layers[i].running_cycles;
156 unclassified->dram_read_bytes += profile->layers[i].dram_read_bytes;
157 unclassified->dram_write_bytes += profile->layers[i].dram_write_bytes;
158 unclassified->sram_read_bytes += profile->layers[i].sram_read_bytes;
159 unclassified->sram_write_bytes += profile->layers[i].sram_write_bytes;
160 unclassified->visa_exec_seq = -1;
163 pos += sizeof (uint32_t) * node_num;
166 /** 3) profile data mapping */
167 size_t num_layers = node_table.size ();
168 if (num_layers > 0) {
169 delete[] profile->layers;
171 profile->layers = new_layers;
172 profile->num_layers = num_layers + 1;