3 * Copyright (C) 2019 Samsung Electronics
4 * Copyright (C) 2019 MyungJoo Ham <myungjoo.ham@samsung.com>
5 * Copyright (C) 2019 Dongju Chae <dongju.chae@samsung.com>
6 * Copyright (C) 2019 Wook Song <wook16.song@samsung.com>
7 * Copyright (C) 2019 Parichay Kapoor <pk.kapoor@samsung.com>
12 * @brief API to access NPU from Host Computer
13 * @see https://code.sec.samsung.net/confluence/display/ODLC/2020+Overall+Software+Stack
14 * @author MyungJoo Ham <myungjoo.ham@samsung.com>
15 * Dongju Chae <dongju.chae@samsung.com>
16 * Wook Song <wook16.song@samsung.com>
17 * Parichay Kapoor <pk.kapoor@samsung.com>
18 * @bug No known bugs except for NYI items
19 * @note libnpuhost.h is the entry point to access NPU Engine (by host handler).
20 * Also, the term 'NPU' now includes all variants such as traditional NPU, ASR,
21 * and new device types (i.e., TRIV, TRIV2, and TRIA).
24 #ifndef __NPU_HOST_LIBNPUHOST_H__
25 #define __NPU_HOST_LIBNPUHOST_H__
32 #include <npubinfmt.h>
34 #if defined(__cplusplus)
39 * @brief Get npu-engine libraray version
40 * @param[out] major major version
41 * @param[out] minor minor version
42 * @param[out] extra extra version
44 void getVersion (uint32_t *major, uint32_t *minor, uint32_t *extra);
47 * @brief Returns the number of available NPU devices.
48 * @param[in] type the device type
49 * @return @c The number of available NPU devices.
50 * @note this number indicates the range of device IDs in getNPUdeviceByType ().
52 int getnumNPUdeviceByType (dev_type type);
55 * @brief Returns the handle of the chosen NPU devices.
56 * @param[out] dev The NPU device handle
57 * @param[in] type the NPU device type
58 * @param[in] id The NPU id to get the handle. 0 <= id < getnumNPUdeviceByType().
59 * @return @c 0 if no error. otherwise a negative error value
60 * @note the caller should call putNPUdevice() to release the device handle
62 int getNPUdeviceByType (npudev_h *dev, dev_type type, uint32_t id);
65 * @brief Returns the handle of any available device with the given type and tops.
66 * @param[out] dev The NPU device handle
67 * @param[in] type the NPU device type
68 * @param[in] tops the device's computing power (Tera Operations Per Sec)
69 * @return @c 0 if no error. otherwise a negative error value
70 * @note the caller should call putNPUdevice() to release the device handle
72 int getNPUdeviceByTypeAny (npudev_h *dev, dev_type type, uint32_t tops);
75 * @brief release the NPU device instance obtained by getDevice ()
76 * @param[in] dev the NPU device handle
78 void putNPUdevice (npudev_h dev);
81 * @brief Get the driver API level that npu-engine library assumes
82 * @param[out] level driver API level
83 * @note when this API level is lower than actual driver's one,
84 * some operations might be not working.
86 void getDriverAPILevel (uint32_t *level);
89 * @brief Get the driver API level of opened NPU device
90 * @param[in] dev the NPU device handle
91 * @param[out] level driver API level
92 * @return 0 if no error, otherwise a negative errno
94 int getNPU_driverAPILevel (npudev_h dev, uint32_t *level);
97 * @brief Get the TOPS of the opened NPU device
98 * @param[in] dev the NPU device handle
99 * @param[out] tops npu tops
100 * @return 0 if no error, otherwise a negative errno
101 * @note this does not support for emulated devices
103 int getNPU_tops (npudev_h dev, uint32_t *tops);
106 * @brief Get the DSP DSPM size of the opened NPU device
107 * @param[in] dev the NPU device handle
108 * @param[out] dspm dspm size
109 * @return 0 if no error, otherwise a negative errno
110 * @note this does not support for emulated devices
112 int getNPU_dspmSize (npudev_h dev, uint32_t *dspm);
115 * @brief Get metadata for NPU model
116 * @param[in] model The path of model binary file
117 * @param[in] need_extra whether you want to extract the extra data in metadata
118 * @return the metadata structure to be filled if no error, otherwise NULL
120 * @note For most npu-engine users, the extra data is not useful because it will be
121 * used for second-party users (e.g., compiler, simulator).
122 * Also, the caller needs to free the metadata.
124 * @note the caller needs to free the metadata
126 npubin_meta *getNPUmodel_metadata (const char *model, bool need_extra);
129 * @brief Send the NN model to NPU.
130 * @param[in] dev The NPU device handle
131 * @param[in] modelfile The filepath to the compiled NPU NN model in any buffer_type
132 * @param[out] model_id The modelid allocated for this instance of NN model.
133 * @return @c 0 if no error. otherwise a negative error value
135 * @detail For ASR devices, which do not accept models, but have models
136 * embedded in devices, you do not need to call register and
137 * register calls for ASR are ignored.
139 int registerNPUmodel (npudev_h dev, generic_buffer *modelfile,
143 * @brief Remove the NN model from NPU
144 * @param[in] dev The NPU device handle
145 * @param[in] modelid The model to be removed from the NPU.
146 * @return @c 0 if no error. otherwise a negative error value
147 * @detail This may incur some latency with memory compatcion.
149 int unregisterNPUmodel (npudev_h dev, uint32_t model_id);
152 * @brief Remove all NN models from NPU
153 * @param[in] dev The NPU device handle
154 * @return @c 0 if no error. otherwise a negative error value
156 int unregisterNPUmodel_all (npudev_h dev);
159 * @brief Get tensor size that the target model assumes
160 * @param[in] dev The NPU device handle
161 * @param[in] model_id The target model id
162 * @param[in] input true if it's input tensor
163 * @param[in] index tensor index
164 * @param[out] size tensor size
165 * @return 0 if no error. otherwise a negative error value
167 int getNPUmodel_tensorSize (npudev_h dev, uint32_t model_id, bool input,
168 uint32_t index, uint32_t *size);
171 * @brief [OPTIONAL] Set the data layout for input/output tensors
172 * @param[in] dev The NPU device handle
173 * @param[in] model_id The ID of model whose layouts are set
174 * @param[in] info_in the layout/type info for input tensors
175 * @param[in] info_out the layout/type info for output tensors
176 * @return @c 0 if no error. otherwise a negative error value
177 * @note if this function is not called, default layout/type will be used.
179 int setNPU_dataInfo (npudev_h dev, uint32_t model_id,
180 tensors_data_info *info_in, tensors_data_info *info_out);
183 * @brief [OPTIONAL] Set the inference constraint for next NPU inferences
184 * @param[in] dev The NPU device handle
185 * @param[in] model_id The target model id
186 * @param[in] constraint inference constraint (e.g., timeout, priority)
187 * @return @c 0 if no error. otherwise a negative error value
188 * @note If this function is not called, default values are used.
190 int setNPU_constraint (npudev_h dev, uint32_t model_id,
191 npu_constraint constraint);
194 * @brief Execute inference.
195 * @param[in] dev The NPU device handle
196 * @param[in] model_id The model id to be inferred
197 * @param[in] mode Configures how this inference works.
198 * @param[in] input The input data to be inferred.
199 * @param[in/out] [nullable] output The output data to be filled in.
200 * @param[in] [nullable] cb The output callback handler
201 * @param[in] [nullable] data The data to pass to callback handler
202 * @return @c positive id if no error. otherwise a negative error value
203 * @note This API allows for users to use pre-allocated (dmabuf) input/output buffers
204 * to avoid unnecessary memcpy. Make sure that they have 'BUFFER_DMABUF' types.
206 int runNPU_model (npudev_h dev, uint32_t model_id, npu_infer_mode mode,
207 const input_buffers *input, output_buffers *output,
208 npuOutputNotify cb, void *data);
211 * @brief Execute inference. Blocking call (wait until output is available).
212 * @param[in] dev The NPU device handle
213 * @param[in] model_id The model id to be inferred
214 * @param[in] input The input data to be inferred.
215 * @param[out] output The output result to be filled.
216 * @return @c positive id if no error. otherwise a negative error value
218 * @note This is a syntactic sugar of runNPU_model() but deprecated.
219 * Please use runNPU_model().
220 * @detail There is a memcpy for the output buffer.
222 int runNPU_sync (npudev_h dev, uint32_t model_id, const input_buffers *input,
223 output_buffers *output);
226 * @brief Invoke NPU inference. Unblocking call.
227 * @param[in] dev The NPU device handle
228 * @param[in] model_id The model id to be inferred
229 * @param[in] input The input data to be inferred.
230 * @param[in] [nullable] cb The output callback handler
231 * @param[out] [nullable] sequence The sequence number (deprecated).
232 * @param[in] [nullable] data The data to pass to callback handler
233 * @param[in] mode Configures how this operation works (deprecated).
234 * @return @c positive id if no error. otherwise a negative error value
236 * @note This is a syntactic sugar of runNPU_model() but deprecated.
237 * Please use runNPU_model().
238 * @detail There is a memcpy for the output buffer.
240 int runNPU_async (npudev_h dev, uint32_t model_id, const input_buffers *input,
241 npuOutputNotify cb, uint64_t *sequence, void *data,
242 npu_async_mode mode);
245 * @brief get the current memory status for the given device
246 * @param[in] dev The NPU device handle
247 * @param[out] alloc_total The size of allocated memory until now
248 * @param[out] free_total The size of freed memory until now
249 * @return @c 0 if no error. otherwise a negatice error value
251 int getNPU_memoryStatus (npudev_h dev, size_t *alloc_total, size_t *free_total);
254 * @brief Get the current device status to be used
255 * @param[in] dev The NPU device handle
256 * @param[out] status the device status
257 * @param[out] num_requests the number of running requests (or pending)
258 * @return 0 if no error, otherwise a negative errno.
260 int getNPU_deviceStatus (npudev_h dev, npu_status *status,
261 uint32_t *num_requests);
264 * [IMPORTANT] Descriptions for buffer allocation APIs.
266 * NPU Engine provides some APIs to allocate model and input buffers for users.
267 * Each buffer may have one of three types.
268 * - BUFFER_FILE: buffer with the content of filepath (virtual mapping)
269 * - BUFFER_MAPPED: buffer with the requested memory size (virtual mapping)
270 * - BUFFER_DMABUF: buffer with dmabuf-fd sharing (physically-contiguous)
272 * Each allocation API has several pre-/post-conditions
273 * Pre-conditions: users must specify some variables of buffers
274 * - BUFFER_FILE: buffer->type, buffer->size, and buffer->filepath
275 * - BUFFER_MAPPED: buffer->type and buffer->size
276 * - BUFFER_DMABUF: buffer->type and buffer->size
278 * Post-conditions: one of internal variables is assigned
279 * - BUFFER_FILE: None
280 * - BUFFER_MAPPED: buffer->addr
281 * - BUFFER_DMABUF: buffer->dmabuf
283 * The below provides the usage of buffer allocation APIs.
285 * generic_buffer model, input;
288 * if (getNPUdevice (&dev, 0) != 0)
291 * model.type = BUFFER_MAPPED;
293 * if (allocNPU_modelBuffer (dev, &model) != 0) {
297 * input.type = BUFFER_MAPPED;
299 * if (allocNPU_inputBuffer (dev, &input) != 0) {
303 * cleanNPU_modelBuffer (dev, &model);
304 * cleanNPU_inputBuffer (dev, &input);
309 * @brief Allocate a buffer for NPU model with the requested buffer type.
310 * @param[in] dev The NPU device handle
311 * @param[in/out] Buffer the buffer pointer where memory is allocated.
312 * @return 0 if no error, otherwise a negative errno.
314 int allocNPU_modelBuffer (npudev_h dev, generic_buffer *buffer);
317 * @brief Free the buffer and remove the address mapping.
318 * @param[in] dev The NPU device handle
319 * @param[in] buffer the model buffer
320 * @return 0 if no error, otherwise a negative errno.
322 int cleanNPU_modelBuffer (npudev_h dev, generic_buffer *buffer);
325 * @brief Allocate a buffer for NPU input with the requested buffer type.
326 * @param[in] dev The NPU device handle
327 * @param[in/out] Buffer the buffer pointer where memory is allocated.
328 * @return 0 if no error, otherwise a negative errno.
330 int allocNPU_inputBuffer (npudev_h dev, generic_buffer *buffer);
333 * @brief Free the buffer and remove the address mapping.
334 * @param[in] dev The NPU device handle
335 * @param[in] buffer the input buffer
336 * @return 0 if no error, otherwise a negative errno.
338 int cleanNPU_inputBuffer (npudev_h dev, generic_buffer *buffer);
341 * @brief Allocate input buffers, which have multiple instances of generic_buffer
342 * @param[in] dev The NPU device handle
343 * @param[in/out] input input buffers.
344 * @return 0 if no error, otherwise a negative errno.
345 * @note it reuses allocInputBuffer().
347 int allocNPU_inputBuffers (npudev_h dev, input_buffers *input);
350 * @brief Free input buffers allocated by allocInputBuffers().
351 * @param[in] dev The NPU device handle
352 * @param[in/out] input input buffers.
353 * @note it reuses cleanInputbuffer().
354 * @return 0 if no error, otherwise a negative errno.
356 int cleanNPU_inputBuffers (npudev_h dev, input_buffers *input);
359 * @brief Allocate a generic buffer with the requested buffer type.
360 * @param[in] dev The NPU device handle
361 * @param[in/out] Buffer the buffer pointer where memory is allocated.
362 * @return 0 if no error, otherwise a negative errno.
364 int allocNPU_genericBuffer (npudev_h dev, generic_buffer *buffer);
367 * @brief Free the generic buffer and remove the address mapping
368 * @param[in] dev The NPU device handle
369 * @param[in] buffer the model buffer
370 * @return 0 if no error, otherwise a negative errno.
372 int cleanNPU_genericBuffer (npudev_h dev, generic_buffer *buffer);
375 * @brief Allocate generic buffers with the requested buffer type.
376 * @param[in] dev The NPU device handle
377 * @param[in/out] Buffer the buffer pointer where memory is allocated.
378 * @return 0 if no error, otherwise a negative errno.
380 int allocNPU_genericBuffers (npudev_h dev, generic_buffers *buffers);
383 * @brief Free generic buffers allocated by allocGenericBuffers().
384 * @param[in] dev The NPU device handle
385 * @param[in/out] input input buffers.
386 * @return 0 if no error, otherwise a negative errno.
388 int cleanNPU_genericBuffers (npudev_h dev, generic_buffers *buffers);
391 * @brief Get the profile information from NPU
392 * @param[in] dev NPU device handle
393 * @param[in] req_id Identifier for each inference (obtained by runNPU_*)
394 * @param[out] profile Profile instance
395 * @return 0 if no error, otherwise a negative errno.
396 * @note This is one-shot API. Don't call multiple times for the same infernece.
397 * @note Internal data of npu_profile is valid until putNPU_profile is called.
398 * @note The existence of model's extended metadata decides its profiling level.
399 * (e.g., if extended metadata does not exist, it performs vISA-level profiling.)
401 int getNPU_profile (npudev_h dev, int req_id, npu_profile *profile);
404 * @brief Get the profile information from NPU with optional requirements
405 * @param[in] dev NPU device handle
406 * @param[in] req_id Identifier for each inference (obtained by runNPU_*)
407 * @param[in] opt Profile options
408 * @param[out] profile Profile instance
409 * @return 0 if no error, otherwise a negative errno.
410 * @note This is one-shot API. Don't call multiple times for the same infernece.
411 * @note Internal data of npu_profile is valid until putNPU_profile is called.
413 int getNPU_profile_opt (npudev_h dev, int req_id, const npu_profile_opt opt,
414 npu_profile *profile);
417 * @brief Free the profile instance obtained by getNPU_profile().
418 * @param[in] profile Profile instance
420 void putNPU_profile (npu_profile *profile);
422 /** NPU Statistics (only for real-device envionment) */
425 * @brief get the stats for the latest apps of the target device
426 * @param[in] dev The NPU device handle
427 * @param[out] stat The list of app stat
428 * @note The caller has the responsibility to free the resources.
429 * This API is not working on the emulated envionment.
431 int getNPU_statApps (npudev_h dev, npu_stat_apps *stat);
434 * @brief Free the stat instance obtained by getNPU_statApps().
435 * @param[in] stat Stat instance
437 void putNPU_statApps (npu_stat_apps *stat);
440 * @brief get the stats for the latest requests of the target app
441 * @param[in] dev The NPU device handle
442 * @param[in] app_id The identifier of target app (obtained by getNPU_statApps)
443 * @param[out] stat The list of request stat
444 * @note The caller has the responsibility to free the resources.
445 * This API is not working on the emulated envionment.
447 int getNPU_statReqs (npudev_h dev, int app_id, npu_stat_reqs *stat);
450 * @brief Free the stat instance obtained by getNPU_statReqs().
451 * @param[in] stat Stat instance
453 void putNPU_statReqs (npu_stat_reqs *stat);
457 * @param[in] level log severity level
458 * @param[in] tag log tag for users
459 * @param[in] format log format string
460 * @return 0 if no error. Otherwise a negative errno
462 int writeNPU_log (npu_loglevel level, const char *tag, const char *format, ...);
464 #define writeNPU_logInfo(tag, format, ...) \
465 writeNPU_log (NPU_LOG_INFO, tag, format, ##__VA_ARGS__)
466 #define writeNPU_logWarn(tag, format, ...) \
467 writeNPU_log (NPU_LOG_WARN, tag, format, ##__VA_ARGS__)
468 #define writeNPU_logError(tag, format, ...) \
469 writeNPU_log (NPU_LOG_ERROR, tag, format, ##__VA_ARGS__)
471 /** NPU Request/Submit Interface (decoupled version for runNPU_* APIs) */
474 * @brief Create NPU inferance request
475 * @param[in] dev The NPU device handle
476 * @param[in] model_id The model to be inferred.
477 * @param[out] req_id The ID of created request
478 * @return 0 if no error. Otherwise a negative errno
479 * @note The created request is not submitted until runNPU_request() is called.
480 * Also, the request can be submitted multiple times but removeNPU_request()
481 * should be called explictly when it's no longer used.
483 int createNPU_request (npudev_h dev, uint32_t model_id, int *req_id);
486 * @brief Remove the request instance
487 * @param[in] dev The NPU device handle
488 * @param[in] req_id The request's ID
489 * @return 0 if no error. Otherwise a negative errno
491 int removeNPU_request (npudev_h dev, int req_id);
494 * @brief Get the request's model id
495 * @param[in] dev The NPU device handle
496 * @param[in] req_id The request's ID
497 * @param[out] model_id The request's model ID
498 * @return 0 if no error. Otherwise a negative errno
500 int getNPU_requestModel (npudev_h dev, int req_id, uint32_t *model_id);
503 * @brief Set request's input/output data
504 * @param[in] dev The NPU device handle
505 * @param[in] req_id The request ID
506 * @param[in] input The input data buffers
507 * @param[in] in_info The input data info (format, type)
508 * @param[in] output The output data buffers
509 * @param[in] out_info The output data info (format, type)
510 * @return 0 if no error. Otherwise a negative errno
511 * @note The data and its data info are user-expected ones. When data format/type are
512 * different from the model-assumed ones, npu-engine performs data manipulation
513 * internally (e.g., NHWC <-> TRIV2).
514 * @note it's not necessary if you're going to use submitNPU_requestKernel()
516 int setNPU_requestData (npudev_h dev, int req_id, input_buffers *input,
517 tensors_data_info *in_info, output_buffers *output,
518 tensors_data_info *out_info);
521 * @brief Set output callback of the request
522 * @param[in] dev The NPU device handle
523 * @param[in] req_id The request ID
524 * @param[in] cb The output callback handler
525 * @param[in] [nullable] data The data to pass to callback handler
526 * @return 0 if no error. Otherwise a negative errno
527 * @note it's not necessary if you're going to use submitNPU_requestKernel()
529 int setNPU_requestCallback (npudev_h dev, int req_id, npuOutputNotify cb,
533 * @brief Set the request's inference mode
534 * @param[in] dev The NPU device handle
535 * @param[in] req_id The request ID
536 * @param[in] mode Configures how this inference works.
537 * @return 0 if no error. Otherwise a negative errno
538 * @note it's not necessary if you're going to use submitNPU_requestKernel()
540 int setNPU_requestMode (npudev_h dev, int req_id, npu_infer_mode mode);
543 * @brief [OPTIONAL] Set the request's constraint
544 * @param[in] dev The NPU device handle
545 * @param[in] req_id The request ID
546 * @param[in] constraint inference constraint (e.g., timeout, priority)
547 * @return 0 if no error. Otherwise a negative errno
548 * @note if this is not called, the default values are used (see typedef.h).
550 int setNPU_requestConstraint (npudev_h dev, int req_id,
551 npu_constraint constraint);
554 * @brief [OPTIONAL] Set the request's scheduler
555 * @param[in] dev The NPU device handle
556 * @param[in] req_id The request ID
557 * @param[in] sched npu scheduler
558 * @param[in] [nullable] sched_param npu scheduler param
559 * @return 0 if no error. Otherwise a negative errno
560 * @note if this is not called, the default scheduler is used (see typedef.h).
562 int setNPU_requestScheduler (npudev_h dev, int req_id, npu_scheduler sched,
563 npu_scheduler_param sched_param);
566 * @brief Submit the request to the NPU
567 * @param[in] dev The NPU device handle
568 * @param[in] req_id The request ID
569 * @return 0 if no error. Otherwise a negative errno
571 int submitNPU_request (npudev_h dev, int req_id);
574 * @brief Submit the request to the NPU working with kernel modules
575 * @param[in] dev The NPU device handle
576 * @param[in] req_id The request ID
577 * @return 0 if no error. Otherwise a negative errno
578 * @note this API ignores user-provided input and output data/info because
579 * the reserved kernel modules may provide input and output buffers.
580 * @note any data manipulation such as layout conversion is not supported.
581 * @note only VD NPU Scheduler is supported for now.
583 int submitNPU_requestKernel (npudev_h dev, int req_id);
585 #if defined(__cplusplus)
589 #endif /* __NPU_HOST_LIBNPUHOST_H__ */