2 * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 * @brief This file describes runtime API
32 * @brief Session to query with runtime
34 * <p>nnfw_session is started and passed by calling {@link nnfw_create_session}.
35 * Each session has its own inference environment, such as model to inference, backend usage, etc.
37 * <p>Load model by calling {@link nnfw_load_model_from_file}
39 * <p>After loading, prepare inference by calling {@link nnfw_prepare}.
40 * Application can set runtime environment before prepare by calling
41 * {@link nnfw_set_available_backends} and {@link nnfw_set_op_backend}, and it is optional.
43 * <p>Application can inference by calling {@link nnfw_run}.
44 * Before inference, application has responsibility to set input tensor to set input data by calling
45 * {@link nnfw_set_output}, and output tensor to get output by calling {@link nnfw_set_input}
47 * <p>To support input and output setting, application can get
48 * input and output tensor information by calling<ul>
49 * <li>{@link nnfw_input_size}</li>
50 * <li>{@link nnfw_output_size}</li>
51 * <li>{@link nnfw_input_tensorinfo}</li>
52 * <li>{@link nnfw_output_tensorinfo}</li>
55 * <p>Application can inference many times using one session,
56 * but next inference can do after prior inference end
58 * <p>Application cannot use muitiple model using one session
60 typedef struct nnfw_session nnfw_session;
65 * The type of tensor represented in {@link nnfw_tensorinfo}
69 /** A tensor of 32 bit floating point */
70 NNFW_TYPE_TENSOR_FLOAT32 = 0,
71 /** A tensor of 32 bit signed integer */
72 NNFW_TYPE_TENSOR_INT32 = 1,
74 * A tensor of 8 bit unsigned integers that represent real numbers.
76 * real_value = (integer_value - zeroPoint) * scale.
78 NNFW_TYPE_TENSOR_QUANT8_ASYMM = 2,
79 /** A tensor of boolean */
80 NNFW_TYPE_TENSOR_BOOL = 3,
82 /** A tensor of 8 bit unsigned integer */
83 NNFW_TYPE_TENSOR_UINT8 = 4,
85 /** A tensor of 64 bit signed integer */
86 NNFW_TYPE_TENSOR_INT64 = 5,
89 * A tensor of 8 bit signed integers that represent real numbers.
91 * real_value = (integer_value - zeroPoint) * scale.
93 NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6,
96 * A tensor of 16 bit signed integers that represent real numbers.
98 * real_value = (integer_value - zeroPoint) * scale.
100 * Forced to have zeroPoint equal to 0.
102 NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7,
107 * @brief Result values returned from a call to an API function
112 NNFW_STATUS_NO_ERROR = 0,
114 * An error code for general use.
115 * Mostly used when there is no specific value for that certain situation.
117 NNFW_STATUS_ERROR = 1,
118 /** Unexpected null argument is given. */
119 NNFW_STATUS_UNEXPECTED_NULL = 2,
120 /** When a function was called but it is not valid for the current session state. */
121 NNFW_STATUS_INVALID_STATE = 3,
122 /** When it is out of memory */
123 NNFW_STATUS_OUT_OF_MEMORY = 4,
124 /** When it was given an insufficient output buffer */
125 NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE = 5,
129 * @brief Data format of a tensor
133 /** Don't care layout */
134 NNFW_LAYOUT_NONE = 0,
136 * Channel last layout
137 * If rank is 4, layout is NHWC
139 NNFW_LAYOUT_CHANNELS_LAST = 1,
141 * Channel first layout
142 * If rank is 4, layout is NCHW
144 NNFW_LAYOUT_CHANNELS_FIRST = 2,
148 * @brief Information ID for retrieving information on nnfw (e.g. version)
152 /** nnfw runtime version
153 * Its value is uint32 in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch.
155 NNFW_INFO_ID_VERSION = 0,
159 * @brief Maximum rank expressible with nnfw
161 #define NNFW_MAX_RANK (6)
164 * @brief tensor info describes the type and shape of tensors
166 * <p>This structure is used to describe input and output tensors.
167 * Application can get input and output tensor type and shape described in model by using
168 * {@link nnfw_input_tensorinfo} and {@link nnfw_output_tensorinfo}
170 * <p>Maximum rank is 6 (NNFW_MAX_RANK). And tensor's dimension value is filled in 'dims' field from
172 * For example, if tensor's rank is 4,
173 * application can get dimension value from dims[0], dims[1], dims[2], and dims[3]
175 typedef struct nnfw_tensorinfo
179 /** The number of dimensions (rank) */
182 * The dimension of tensor.
183 * Maximum rank is 6 (NNFW_MAX_RANK).
185 int32_t dims[NNFW_MAX_RANK];
189 * @brief Create a new session instance.
191 * <p>This only creates a session.
192 * Model is loaded after {@link nnfw_load_model_from_file} is invoked.
193 * And inference is performed after {@link nnfw_run} is invoked.
195 * <p>{@link nnfw_close_session} should be called once
196 * if session is no longer needed
198 * @param[out] session The session to be created
199 * @return NNFW_STATUS_NO_ERROR if successful
201 NNFW_STATUS nnfw_create_session(nnfw_session **session);
204 * @brief Close a session instance
206 * After called, access to closed session by application will be invalid
208 * @param[in] session The session to be closed
209 * @return @c NNFW_STATUS_NO_ERROR if successful
211 NNFW_STATUS nnfw_close_session(nnfw_session *session);
214 * @brief Load model from nnpackage file or directory
216 * The length of \p package_file_path must not exceed 1024 bytes including zero at the end.
218 * @param[in] session nnfw_session loading the given nnpackage file/dir
219 * @param[in] package_file_path Path to the nnpackage file or unzipped directory to be loaded
221 * @return @c NNFW_STATUS_NO_ERROR if successful
223 NNFW_STATUS nnfw_load_model_from_file(nnfw_session *session, const char *package_file_path);
226 * @brief Apply i-th input's tensor info to resize input tensor
228 * This function should be called before {@link nnfw_prepare} is invoked, and
229 * should be called after {@link nnfw_load_model_from_file} is invoked
230 * See {@link nnfw_prepare} for information applying updated tensor info
231 * If this function is called many times for same index, tensor info is overwritten
233 * @deprecated Deprecated since 1.7.0. Use {@link nnfw_set_input_tensorinfo} instead.
235 * @param[in] session Session to the input tensor info is to be set
236 * @param[in] index Index of input to be applied (0-indexed)
237 * @param[in] tensor_info Tensor info to be applied
238 * @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
240 NNFW_STATUS nnfw_apply_tensorinfo(nnfw_session *session, uint32_t index,
241 nnfw_tensorinfo tensor_info);
244 * @brief Set input model's tensor info for resizing
246 * This function can be called at any time after calling {@link nnfw_model_load_from_file}. Changing
247 * input tensor's shape will cause shape inference for the model. There are two different types of
248 * shape inference - static and dynamic. Which one to use is depend on the current state of the
250 * When it is called after calling {@link nnfw_model_load_from_file} and before calling {@link
251 * nnfw_prepare}, this info will be used when {@link nnfw_prepare}. And it will perform static shape
252 * inference for all tensors.
253 * When it is called after calling {@link nnfw_prepare} or even after {@link nnfw_run}, this info
254 * will be used when {@link nnfw_run}. And the shapes of the tensors are determined on the fly.
255 * If this function is called many times for the same index, it is overwritten.
257 * @param[in] session Session to the input tensor info is to be set
258 * @param[in] index Index of input to be set (0-indexed)
259 * @param[in] tensor_info Tensor info to be set
260 * @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
262 NNFW_STATUS nnfw_set_input_tensorinfo(nnfw_session *session, uint32_t index,
263 const nnfw_tensorinfo *tensor_info);
266 * @brief Prepare session to be ready for inference
268 * This phase may finalize model compilation, scheduling, and additional settings.
269 * If {@link nnfw_apply_tensor} is called to apply input tensor info different with model
270 * before this function, tries to resize all tensors.
272 * @param[in] session the session to be prepared
273 * @return @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
275 NNFW_STATUS nnfw_prepare(nnfw_session *session);
278 * @brief Run inference
280 * <p>This function should be called after model is loaded by {@link nnfw_load_model_from_file},
281 * session is prepared for inference by {@link nnfw_prepare}, set input and output buffers
282 * by {@link nnfw_set_input} and {@link nnfw_set_output}.</p>
284 * <p>This function return after inference is finished.</p>
286 * @param[in] session The session to run inference
287 * @return @c NNFW_STATUS_NO_ERROR if successful
289 NNFW_STATUS nnfw_run(nnfw_session *session);
292 * @brief Run inference asynchronously
294 * <p>This function must be called after model is loaded by {@link nnfw_load_model_from_file},
295 * session is prepared for inference by {@link nnfw_prepare}, set input and output buffers
296 * by {@link nnfw_set_input} and {@link nnfw_set_output}.</p>
298 * <p>This function returns immediately after starting a thread to run the inference.
299 * To get the result of it or to do the next inference with {@link nnfw_run} or
300 * {@link nnfw_run_async}, {@link nnfw_await} must be called to ensure the current asynchronous
301 * inference has finished. Only one asynchronous inference is allowed at a time for a session.
302 * If this function is called while the previous one is still running, it returns an error.</p>
304 * @param[in] session The session to run inference
305 * @return @c NNFW_STATUS_NO_ERROR if successful
307 NNFW_STATUS nnfw_run_async(nnfw_session *session);
310 * @brief Wait for asynchronous run to finish
312 * <p>This function must be called after calling {@link nnfw_run_asnyc}, and can be called only once
313 * for a {@link nnfw_run_async} call.
315 * <p>When this function returns, it means that this session has finished the asynchronous run. Then
316 * the user can safely use the output data.</p>
318 * <p>This function returns after the asynchronous inference is finished.</p>
320 * @param[in] session The session to run inference
321 * @return @c NNFW_STATUS_NO_ERROR if successful
323 NNFW_STATUS nnfw_await(nnfw_session *session);
326 * @brief Set input buffer
328 * This function must be called after {@link nnfw_prepare}, \p buffer given to this function can be
329 * reused for many inferences. \p length must be greater or equal than the operand requires. To
330 * specify an optional input, you can either not call this for that input or call this with \p
331 * buffer of NULL and \p length of 0.
333 * @param[in] session Session to the input is to be set
334 * @param[in] index Index of input to be set (0-indexed)
335 * @param[in] type Type of the input
336 * @param[in] buffer Raw buffer for input
337 * @param[in] length Size of bytes of input buffer
339 * @return @c NNFW_STATUS_NO_ERROR if successful
341 NNFW_STATUS nnfw_set_input(nnfw_session *session, uint32_t index, NNFW_TYPE type,
342 const void *buffer, size_t length);
345 * @brief Set output buffer
347 * This function must be called after {@link nnfw_prepare}, \p buffer given to this function can be
348 * reused for many inferences. \p length must be greater or equal than the operand requires. An
349 * output operand can have unspecified shape and deduced dynamically during the execution. You must
350 * provide \p buffer large enough.
352 * @param[in] session Session from inference output is to be extracted
353 * @param[in] index Index of output to be set (0-indexed)
354 * @param[in] type Type of the output
355 * @param[out] buffer Raw buffer for output
356 * @param[in] length Size of bytes of output buffer
358 * @return @c NNFW_STATUS_NO_ERROR if successful
360 NNFW_STATUS nnfw_set_output(nnfw_session *session, uint32_t index, NNFW_TYPE type, void *buffer,
364 * @brief Get the number of inputs
366 * Application can call this function to get number of inputs defined in loaded model.
367 * This function should be called after {@link nnfw_load_model_from_file} is invoked to load model
369 * @param[in] session Session from input information is to be extracted
370 * @param[out] number Variable which the number of inputs is put into
372 * @return @c NNFW_STATUS_NO_ERROR if successful
374 NNFW_STATUS nnfw_input_size(nnfw_session *session, uint32_t *number);
377 * @brief Get the number of outputs
379 * Application can call this function to get number of outputs defined in loaded model.
380 * This function should be called after {@link nnfw_load_model_from_file} is invoked to load model
382 * @param[in] session Session from output information is to be extracted
383 * @param[out] number Variable which the number of outputs is put into
385 * @return @c NNFW_STATUS_NO_ERROR if successful
387 NNFW_STATUS nnfw_output_size(nnfw_session *session, uint32_t *number);
390 * @brief Set the layout of an input
392 * The input that does not call this has NNFW_LAYOUT_NHWC layout
394 * @param[in] session session from inference input is to be extracted
395 * @param[in] index index of input to be set (0-indexed)
396 * @param[in] layout layout to set to target input
398 * @return NNFW_STATUS_NO_ERROR if successful
400 NNFW_STATUS nnfw_set_input_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout);
403 * @brief Set the layout of an output
405 * The output that does not call this has NNFW_LAYOUT_NHWC layout
407 * @param[in] session session from inference output is to be extracted
408 * @param[in] index index of output to be set (0-indexed)
409 * @param[in] layout layout to set to target output
411 * @return NNFW_STATUS_NO_ERROR if successful
413 NNFW_STATUS nnfw_set_output_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout);
416 * @brief Get i-th input tensor info
418 * <p>Before {@link nnfw_prepare} is invoked, this function return tensor info in model,
419 * so updated tensor info by {@link nnfw_apply_tensorinfo} is not returned.</p>
421 * <p>After {@link nnfw_prepare} is invoked, this function return updated tensor info
422 * if tensor info is updated by {@link nnfw_apply_tensorinfo}.</p>
424 * @param[in] session Session from input information is to be extracted
425 * @param[in] index Index of input
426 * @param[out] tensor_info Tensor info (shape, type, etc)
428 * @return @c NNFW_STATUS_NO_ERROR if successful
430 NNFW_STATUS nnfw_input_tensorinfo(nnfw_session *session, uint32_t index,
431 nnfw_tensorinfo *tensor_info);
434 * @brief Get i-th output tensor info
436 * <p>After {@link nnfw_load_model_from_file} and before {@link nnfw_prepare} is invoked, it returns
437 * tensor info in the model.</p>
439 * <p>After {@link nnfw_prepare} and before {@link nnfw_run} is invoked, this function returns
440 * updated tensor info if tensor info is updated by {@link nnfw_set_input_tensorinfo}.</p>
442 * <p>After {@link nnfw_run} is invoked(at least once), it returns the updated tensor info during
443 * the latest execution.</p>
445 * @param[in] session Session from output information is to be extracted
446 * @param[in] index Index of output
447 * @param[out] tensor_info Tensor info (shape, type, etc)
449 * @return @c NNFW_STATUS_NO_ERROR if successful
451 NNFW_STATUS nnfw_output_tensorinfo(nnfw_session *session, uint32_t index,
452 nnfw_tensorinfo *tensor_info);
455 * @brief Set available backends
457 * This function should be called before {@link nnfw_prepare} is invoked.
459 * <p>Supported backends differs on each platforms.
460 * For example, `x86_64` supports "cpu" only.
461 * Multiple backends can be set and they must be separated by a semicolon (ex: "acl_cl;cpu").
462 * For each backend string, `libbackend_{backend}.so` will be dynamically loaded during
463 * {@link nnfw_prepare}.
464 * Among the multiple backends, the 1st element is used as the default backend.</p>
466 * @param[in] session session to which avilable backends are set
467 * @param[in] backends available backends on which nnfw uses
469 * @return @c NNFW_STATUS_NO_ERROR if successful
471 NNFW_STATUS nnfw_set_available_backends(nnfw_session *session, const char *backends);
474 * @brief Set the operation's backend
476 * This function should be called before {@link nnfw_prepare} is invoked.
478 * <p>The backend for op has higher priority than available backends specified by
479 * {@link nnfw_set_available_backends}.</p>
481 * @deprecated Deprecated since 1.8.0.
483 * @param[in] session session to be modified
484 * @param[in] op operation to be set
485 * @param[in] backend bakcend on which operation run
487 * @return @c NNFW_STATUS_NO_ERROR if successful
489 NNFW_STATUS nnfw_set_op_backend(nnfw_session *session, const char *op, const char *backend);
492 * @brief Retrieve uint32 type of nnfw information for given information ID.
494 * <p>Retrieves the information of property given by information id </p>
496 * @note: The input session could be null for global information (e.g. runtime version).*
498 * @param[in] session session to be queried on.
499 * @param[in] information ID to be queried
500 * @param[out] val uint32 value to be returned.
502 * @return @c NNFW_STATUS_NO_ERROR if successful
504 NNFW_STATUS nnfw_query_info_u32(nnfw_session *session, NNFW_INFO_ID id, uint32_t *val);