2 * Copyright (c) 2022 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 #include "DevContext.h"
30 // All things related to npu device handle are gathered this Class, but when implementing npu
31 // deamon, others except the context roles should be seperated.
32 DevContext::DevContext() : _dev_handles{}, _model_ids{}, _meta_map{}
34 auto dev_count = getnumNPUdeviceByType(NPUCOND_TRIV2_CONN_SOCIP);
37 throw std::runtime_error("Unable to find TRIX NPU device");
40 // Get NPU device handles
41 for (int i = 0; i < dev_count; ++i)
44 if (getNPUdeviceByType(&handle, NPUCOND_TRIV2_CONN_SOCIP, i) < 0)
46 throw std::runtime_error("Failed to get TRIX NPU device handle");
48 _dev_handles.emplace_back(handle);
51 // NOTE Do not change the number of threads as long as jobs in thread call
52 // the synchronous APIs such as submitNPU_request()
53 _batch_thread_pool = std::make_unique<BatchThreadPool>(_dev_handles.size());
54 // We need to careful not to create multiple `BatchThreadPool`. In case of multiple models, there
55 // may be a problem having multiple `BatchThreadPool` in current implementation. But if this
56 // creating thread pool is moved to npu deamon, I think this problem will be solved smoothly.
59 DevContext::~DevContext()
61 // NOTE Must release _batch_thread_pool before releasing _dev_handles to wait for all threads to
63 _batch_thread_pool.reset(nullptr);
65 for (const auto &dev_handle : _dev_handles)
67 unregisterNPUmodel_all(dev_handle);
68 putNPUdevice(dev_handle);
72 ModelID DevContext::registerModel(const std::string &model_file_path)
74 auto meta = getNPUmodel_metadata(model_file_path.c_str(), false);
78 throw std::runtime_error("Unable to extract the model metadata");
81 generic_buffer file_info;
82 file_info.type = BUFFER_FILE;
83 file_info.filepath = model_file_path.c_str();
84 file_info.size = meta->size;
88 for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
90 // Register model for each device
91 uint32_t model_id_at_device;
92 if (registerNPUmodel(_dev_handles.at(dev_num), &file_info, &model_id_at_device) < 0)
94 throw std::runtime_error("Failed to register npu model");
99 model_id = model_id_at_device;
100 _meta_map[model_id_at_device] = std::shared_ptr<npubin_meta>(meta);
104 _meta_map[model_id_at_device] = _meta_map[model_id];
107 _model_ids[model_id].resize(dev_num + 1);
108 _model_ids[model_id].at(dev_num) = model_id_at_device;
111 // Return the model id for device 0 only
115 void DevContext::unRegisterModel(ModelID model_id)
117 for (uint32_t dev_num = 0; dev_num < _dev_handles.size(); ++dev_num)
119 const auto model_id_at_device = _model_ids.at(model_id).at(dev_num);
120 const auto &dev_handle = _dev_handles.at(dev_num);
123 _meta_map.erase(model_id_at_device);
125 // Unregister Model for each device
126 unregisterNPUmodel(dev_handle, model_id_at_device);
129 _model_ids.erase(model_id);
132 void DevContext::requestRun(ModelID model_id, input_buffers *input_bufs, tensors_data_info *in_info,
133 output_buffers *output_bufs, tensors_data_info *out_info,
138 if (in_info->num_info != 1)
140 throw std::runtime_error("Supported only an input that has batch now");
142 if (out_info->num_info != 1)
144 throw std::runtime_error("Supported only one output now");
147 if (input_bufs->bufs[0].size % batch_size != 0)
149 throw std::runtime_error("Invalid batch size. batch size :" + std::to_string(batch_size) +
150 ", input buffer size : " + std::to_string(input_bufs->bufs[0].size));
153 if (output_bufs->bufs[0].size % batch_size != 0)
155 throw std::runtime_error(
156 "Invalid batch size. batch size :" + std::to_string(batch_size) +
157 ", output tensor size : " + std::to_string(output_bufs->bufs[0].size));
160 // inputs/outputs for each batch
161 std::vector<input_buffers> in_buffers_vec(batch_size);
162 std::vector<output_buffers> out_buffers_vec(batch_size);
164 // Run on thread pool
165 std::vector<std::future<int32_t>> batch_futures;
166 for (uint32_t batch_num = 0; batch_num < batch_size; ++batch_num)
169 // The in_info and out_info are always the same even if they are divided by batch, so they are
171 auto future = _batch_thread_pool->enqueueJob(
172 [batch_size, in_info, out_info,
173 this](uint32_t dev_num, ModelID model_id, const input_buffers *input_bufs,
174 const output_buffers *output_bufs, uint32_t batch_num) -> int32_t {
175 // Set buffers of inputs/outputs for each batch
176 // TODO Support multiple inputs/outputs
177 input_buffers in_batch_buffers;
178 in_batch_buffers.num_buffers = input_bufs->num_buffers;
179 const uint64_t in_batch_offset = input_bufs->bufs[0].size / batch_size;
180 setBufferByBatch(input_bufs->bufs[0], batch_num, in_batch_offset,
181 &in_batch_buffers.bufs[0]);
183 output_buffers out_batch_buffers;
184 out_batch_buffers.num_buffers = output_bufs->num_buffers;
185 const uint64_t out_batch_offset = output_bufs->bufs[0].size / batch_size;
186 setBufferByBatch(output_bufs->bufs[0], batch_num, out_batch_offset,
187 &out_batch_buffers.bufs[0]);
191 // dev_num is the same as the thread number in _batch_thread_pool
192 this->runOneBatch(dev_num, model_id, &in_batch_buffers, in_info, &out_batch_buffers,
197 _eptr = std::current_exception();
202 model_id, input_bufs, output_bufs, batch_num);
203 batch_futures.emplace_back(std::move(future));
206 for (auto &&future : batch_futures)
213 std::exception_ptr eptr(nullptr);
215 std::rethrow_exception(eptr);
220 runOneBatch(0, model_id, input_bufs, in_info, output_bufs, out_info);
224 void DevContext::runOneBatch(uint32_t dev_num, ModelID model_id, input_buffers *input_bufs,
225 tensors_data_info *in_info, output_buffers *output_bufs,
226 tensors_data_info *out_info)
228 const auto &model_id_at_device = _model_ids.at(model_id).at(dev_num);
230 const auto meta = _meta_map.at(model_id_at_device);
231 if (meta->input_seg_num != in_info->num_info)
233 throw std::runtime_error("The number of inputs does not match to model input seg num");
236 if (meta->output_seg_num != out_info->num_info)
238 throw std::runtime_error("The number of outputs does not match to model output seg num");
241 const auto &dev_handle = _dev_handles.at(dev_num);
244 if (auto error_code = createNPU_request(dev_handle, model_id_at_device, &req_id))
246 throw std::runtime_error("Unable to create NPU request with model id (" +
247 std::to_string(model_id_at_device) + ")" +
248 " error code : " + std::to_string(error_code));
251 if (auto error_code =
252 setNPU_requestData(dev_handle, req_id, input_bufs, in_info, output_bufs, out_info))
254 removeNPU_request(dev_handle, req_id);
255 throw std::runtime_error("Unable to create NPU request for model id (" +
256 std::to_string(model_id_at_device) + ")" +
257 " error code : " + std::to_string(error_code));
260 // NOTE submitNPU_request is not thread-safe(?). It is rarely hanging(unresponsive).
261 // Ultimately, to solve this problem, we have to either use other thread-safe API or
262 // change submitNPU_request to be thread-safe, but both works take time.
263 // As a workaround, let's allow hanging thread.
264 // TODO Change submitNPU_request to be thread-safe or replaced with other thread-safe API
265 std::packaged_task<int(npudev_h, int)> task(submitNPU_request);
266 auto f = task.get_future();
267 std::thread thread_submit_request(std::move(task), dev_handle, req_id);
268 auto status = f.wait_until(std::chrono::system_clock::now() + std::chrono::seconds(60));
269 if (status == std::future_status::timeout)
271 // There is no way to terminate hanging submitNPU_request from the outside.
272 // If a hanging thread is detached, it will remain as a hanging thread. Even so, it's better
273 // than having the main thread hanging.
274 thread_submit_request.detach();
276 // TODO Enable removeNPU_request after resolving hanging.
277 // removeNPU_request(dev_handle, req_id);
278 throw std::runtime_error("The npu API \"submitNPU_request\" timeout");
281 auto error_code = f.get();
282 thread_submit_request.join();
285 removeNPU_request(dev_handle, req_id);
286 throw std::runtime_error("Unable to submit NPU request with req id (" + std::to_string(req_id) +
287 ")" + " error code : " + std::to_string(error_code));
290 if (auto error_code = removeNPU_request(dev_handle, req_id))
292 throw std::runtime_error("Unable to remove NPU request with req id (" + std::to_string(req_id) +
293 ")" + " error code : " + std::to_string(error_code));
297 void DevContext::setBufferByBatch(const generic_buffer &origin_buf, uint32_t batch_num,
298 uint64_t batch_offset, generic_buffer *batch_buf)
300 batch_buf->addr = reinterpret_cast<uint8_t *>(origin_buf.addr) + batch_num * batch_offset;
301 batch_buf->size = batch_offset;
302 batch_buf->type = BUFFER_MAPPED;
306 } // namespace backend