runtime/onert/api/include/nnfw.h

   1 /*
   2  * Copyright (c) 2019 Samsung Electronics Co., Ltd. All Rights Reserved
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *      http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 /**
  18  * @file  nnfw.h
  19  * @brief This file describes runtime API
  20  */
  21 #ifndef __NNFW_H__
  22 #define __NNFW_H__
  23
  24 #include <stddef.h>
  25 #include <stdint.h>
  26
  27 #ifdef __cplusplus
  28 extern "C" {
  29 #endif
  30
  31 /**
  32  * @brief Session to query with runtime
  33  *
  34  * <p>nnfw_session is started and passed by calling {@link nnfw_create_session}.
  35  * Each session has its own inference environment, such as model to inference, backend usage, etc.
  36  *
  37  * <p>Load model by calling {@link nnfw_load_model_from_file}
  38  *
  39  * <p>After loading, prepare inference by calling {@link nnfw_prepare}.
  40  * Application can set runtime environment before prepare by calling
  41  * {@link nnfw_set_available_backends} and {@link nnfw_set_op_backend}, and it is optional.
  42  *
  43  * <p>Application can inference by calling {@link nnfw_run}.
  44  * Before inference, application has responsibility to set input tensor to set input data by calling
  45  * {@link nnfw_set_output}, and output tensor to get output by calling {@link nnfw_set_input}
  46  *
  47  * <p>To support input and output setting, application can get
  48  * input and output tensor information by calling<ul>
  49  * <li>{@link nnfw_input_size}</li>
  50  * <li>{@link nnfw_output_size}</li>
  51  * <li>{@link nnfw_input_tensorinfo}</li>
  52  * <li>{@link nnfw_output_tensorinfo}</li>
  53  * </ul>
  54  *
  55  * <p>Application can inference many times using one session,
  56  * but next inference can do after prior inference end
  57  *
  58  * <p>Application cannot use muitiple model using one session
  59  */
  60 typedef struct nnfw_session nnfw_session;
  61
  62 /**
  63  * @brief Tensor types
  64  *
  65  * The type of tensor represented in {@link nnfw_tensorinfo}
  66  */
  67 typedef enum
  68 {
  69   /** A tensor of 32 bit floating point */
  70   NNFW_TYPE_TENSOR_FLOAT32 = 0,
  71   /** A tensor of 32 bit signed integer */
  72   NNFW_TYPE_TENSOR_INT32 = 1,
  73   /**
  74    * A tensor of 8 bit unsigned integers that represent real numbers.
  75    *
  76    * real_value = (integer_value - zeroPoint) * scale.
  77    */
  78   NNFW_TYPE_TENSOR_QUANT8_ASYMM = 2,
  79   /** A tensor of boolean */
  80   NNFW_TYPE_TENSOR_BOOL = 3,
  81
  82   /** A tensor of 8 bit unsigned integer */
  83   NNFW_TYPE_TENSOR_UINT8 = 4,
  84
  85   /** A tensor of 64 bit signed integer */
  86   NNFW_TYPE_TENSOR_INT64 = 5,
  87
  88   /**
  89    * A tensor of 8 bit signed integers that represent real numbers.
  90    *
  91    * real_value = (integer_value - zeroPoint) * scale.
  92    */
  93   NNFW_TYPE_TENSOR_QUANT8_ASYMM_SIGNED = 6,
  94
  95   /**
  96    * A tensor of 16 bit signed integers that represent real numbers.
  97    *
  98    * real_value = (integer_value - zeroPoint) * scale.
  99    *
 100    * Forced to have zeroPoint equal to 0.
 101    */
 102   NNFW_TYPE_TENSOR_QUANT16_SYMM_SIGNED = 7,
 103
 104 } NNFW_TYPE;
 105
 106 /**
 107  * @brief Result values returned from a call to an API function
 108  */
 109 typedef enum
 110 {
 111   /** Successful */
 112   NNFW_STATUS_NO_ERROR = 0,
 113   /**
 114    * An error code for general use.
 115    * Mostly used when there is no specific value for that certain situation.
 116    */
 117   NNFW_STATUS_ERROR = 1,
 118   /** Unexpected null argument is given. */
 119   NNFW_STATUS_UNEXPECTED_NULL = 2,
 120   /** When a function was called but it is not valid for the current session state. */
 121   NNFW_STATUS_INVALID_STATE = 3,
 122   /** When it is out of memory */
 123   NNFW_STATUS_OUT_OF_MEMORY = 4,
 124   /** When it was given an insufficient output buffer */
 125   NNFW_STATUS_INSUFFICIENT_OUTPUT_SIZE = 5,
 126 } NNFW_STATUS;
 127
 128 /**
 129  * @brief Data format of a tensor
 130  */
 131 typedef enum
 132 {
 133   /** Don't care layout */
 134   NNFW_LAYOUT_NONE = 0,
 135   /**
 136    * Channel last layout
 137    * If rank is 4, layout is NHWC
 138    */
 139   NNFW_LAYOUT_CHANNELS_LAST = 1,
 140   /**
 141    * Channel first layout
 142    * If rank is 4, layout is NCHW
 143    */
 144   NNFW_LAYOUT_CHANNELS_FIRST = 2,
 145 } NNFW_LAYOUT;
 146
 147 /**
 148  * @brief Information ID for retrieving information on nnfw (e.g. version)
 149  */
 150 typedef enum
 151 {
 152   /** nnfw runtime version
 153    * Its value is uint32 in 0xMMmmmmPP, where MM = major, mmmm = minor, PP = patch.
 154    */
 155   NNFW_INFO_ID_VERSION = 0,
 156 } NNFW_INFO_ID;
 157
 158 /**
 159  * @brief Maximum rank expressible with nnfw
 160  */
 161 #define NNFW_MAX_RANK (6)
 162
 163 /**
 164  * @brief tensor info describes the type and shape of tensors
 165  *
 166  * <p>This structure is used to describe input and output tensors.
 167  * Application can get input and output tensor type and shape described in model by using
 168  * {@link nnfw_input_tensorinfo} and {@link nnfw_output_tensorinfo}
 169  *
 170  * <p>Maximum rank is 6 (NNFW_MAX_RANK). And tensor's dimension value is filled in 'dims' field from
 171  * index 0.
 172  * For example, if tensor's rank is 4,
 173  * application can get dimension value from dims[0], dims[1], dims[2], and dims[3]
 174  */
 175 typedef struct nnfw_tensorinfo
 176 {
 177   /** The data type */
 178   NNFW_TYPE dtype;
 179   /** The number of dimensions (rank) */
 180   int32_t rank;
 181   /**
 182    * The dimension of tensor.
 183    * Maximum rank is 6 (NNFW_MAX_RANK).
 184    */
 185   int32_t dims[NNFW_MAX_RANK];
 186 } nnfw_tensorinfo;
 187
 188 /**
 189  * @brief Create a new session instance.
 190  *
 191  * <p>This only creates a session.
 192  * Model is loaded after {@link nnfw_load_model_from_file} is invoked.
 193  * And inference is performed after {@link nnfw_run} is invoked.
 194  *
 195  * <p>{@link nnfw_close_session} should be called once
 196  * if session is no longer needed
 197  *
 198  * @param[out]  session The session to be created
 199  * @return      NNFW_STATUS_NO_ERROR if successful
 200  */
 201 NNFW_STATUS nnfw_create_session(nnfw_session **session);
 202
 203 /**
 204  * @brief Close a session instance
 205  *
 206  * After called, access to closed session by application will be invalid
 207  *
 208  * @param[in] session The session to be closed
 209  * @return    @c NNFW_STATUS_NO_ERROR if successful
 210  */
 211 NNFW_STATUS nnfw_close_session(nnfw_session *session);
 212
 213 /**
 214  * @brief     Load model from nnpackage file or directory
 215  *
 216  * The length of \p package_file_path must not exceed 1024 bytes including zero at the end.
 217  *
 218  * @param[in] session           nnfw_session loading the given nnpackage file/dir
 219  * @param[in] package_file_path Path to the nnpackage file or unzipped directory to be loaded
 220  *
 221  * @return    @c NNFW_STATUS_NO_ERROR if successful
 222  */
 223 NNFW_STATUS nnfw_load_model_from_file(nnfw_session *session, const char *package_file_path);
 224
 225 /**
 226  * @brief     Apply i-th input's tensor info to resize input tensor
 227  *
 228  * This function should be called before {@link nnfw_prepare} is invoked, and
 229  * should be called after {@link nnfw_load_model_from_file} is invoked
 230  * See {@link nnfw_prepare} for information applying updated tensor info
 231  * If this function is called many times for same index, tensor info is overwritten
 232  *
 233  * @deprecated Deprecated since 1.7.0. Use {@link nnfw_set_input_tensorinfo} instead.
 234  *
 235  * @param[in] session     Session to the input tensor info is to be set
 236  * @param[in] index       Index of input to be applied (0-indexed)
 237  * @param[in] tensor_info Tensor info to be applied
 238  * @return    @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
 239  */
 240 NNFW_STATUS nnfw_apply_tensorinfo(nnfw_session *session, uint32_t index,
 241                                   nnfw_tensorinfo tensor_info);
 242
 243 /**
 244  * @brief    Set input model's tensor info for resizing
 245  *
 246  * This function can be called at any time after calling {@link nnfw_model_load_from_file}. Changing
 247  * input tensor's shape will cause shape inference for the model. There are two different types of
 248  * shape inference - static and dynamic. Which one to use is depend on the current state of the
 249  * session.
 250  * When it is called after calling {@link nnfw_model_load_from_file} and before calling {@link
 251  * nnfw_prepare}, this info will be used when {@link nnfw_prepare}. And it will perform static shape
 252  * inference for all tensors.
 253  * When it is called after calling {@link nnfw_prepare} or even after {@link nnfw_run}, this info
 254  * will be used when {@link nnfw_run}. And the shapes of the tensors are determined on the fly.
 255  * If this function is called many times for the same index, it is overwritten.
 256  *
 257  * @param[in] session     Session to the input tensor info is to be set
 258  * @param[in] index       Index of input to be set (0-indexed)
 259  * @param[in] tensor_info Tensor info to be set
 260  * @return    @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
 261  */
 262 NNFW_STATUS nnfw_set_input_tensorinfo(nnfw_session *session, uint32_t index,
 263                                       const nnfw_tensorinfo *tensor_info);
 264
 265 /**
 266  * @brief     Prepare session to be ready for inference
 267  *
 268  * This phase may finalize model compilation, scheduling, and additional settings.
 269  * If {@link nnfw_apply_tensor} is called to apply input tensor info different with model
 270  * before this function, tries to resize all tensors.
 271  *
 272  * @param[in] session the session to be prepared
 273  * @return    @c NNFW_STATUS_NO_ERROR if successful, otherwise return @c NNFW_STATUS_ERROR
 274  */
 275 NNFW_STATUS nnfw_prepare(nnfw_session *session);
 276
 277 /**
 278  * @brief     Run inference
 279  *
 280  * <p>This function should be called after model is loaded by {@link nnfw_load_model_from_file},
 281  * session is prepared for inference by {@link nnfw_prepare}, set input and output buffers
 282  * by {@link nnfw_set_input} and {@link nnfw_set_output}.</p>
 283  *
 284  * <p>This function return after inference is finished.</p>
 285  *
 286  * @param[in] session The session to run inference
 287  * @return    @c NNFW_STATUS_NO_ERROR if successful
 288  */
 289 NNFW_STATUS nnfw_run(nnfw_session *session);
 290
 291 /**
 292  * @brief     Run inference asynchronously
 293  *
 294  * <p>This function must be called after model is loaded by {@link nnfw_load_model_from_file},
 295  * session is prepared for inference by {@link nnfw_prepare}, set input and output buffers
 296  * by {@link nnfw_set_input} and {@link nnfw_set_output}.</p>
 297  *
 298  * <p>This function returns immediately after starting a thread to run the inference.
 299  * To get the result of it or to do the next inference with {@link nnfw_run} or
 300  * {@link nnfw_run_async}, {@link nnfw_await} must be called to ensure the current asynchronous
 301  * inference has finished. Only one asynchronous inference is allowed at a time for a session.
 302  * If this function is called while the previous one is still running, it returns an error.</p>
 303  *
 304  * @param[in] session The session to run inference
 305  * @return    @c NNFW_STATUS_NO_ERROR if successful
 306  */
 307 NNFW_STATUS nnfw_run_async(nnfw_session *session);
 308
 309 /**
 310  * @brief     Wait for asynchronous run to finish
 311  *
 312  * <p>This function must be called after calling {@link nnfw_run_asnyc}, and can be called only once
 313  * for a {@link nnfw_run_async} call.
 314  *
 315  * <p>When this function returns, it means that this session has finished the asynchronous run. Then
 316  * the user can safely use the output data.</p>
 317  *
 318  * <p>This function returns after the asynchronous inference is finished.</p>
 319  *
 320  * @param[in] session The session to run inference
 321  * @return    @c NNFW_STATUS_NO_ERROR if successful
 322  */
 323 NNFW_STATUS nnfw_await(nnfw_session *session);
 324
 325 /**
 326  * @brief     Set input buffer
 327  *
 328  * This function must be called after {@link nnfw_prepare}, \p buffer given to this function can be
 329  * reused for many inferences. \p length must be greater or equal than the operand requires. To
 330  * specify an optional input, you can either not call this for that input or call this with \p
 331  * buffer of NULL and \p length of 0.
 332  *
 333  * @param[in] session Session to the input is to be set
 334  * @param[in] index   Index of input to be set (0-indexed)
 335  * @param[in] type    Type of the input
 336  * @param[in] buffer  Raw buffer for input
 337  * @param[in] length  Size of bytes of input buffer
 338  *
 339  * @return    @c NNFW_STATUS_NO_ERROR if successful
 340  */
 341 NNFW_STATUS nnfw_set_input(nnfw_session *session, uint32_t index, NNFW_TYPE type,
 342                            const void *buffer, size_t length);
 343
 344 /**
 345  * @brief       Set output buffer
 346  *
 347  * This function must be called after {@link nnfw_prepare}, \p buffer given to this function can be
 348  * reused for many inferences. \p length must be greater or equal than the operand requires. An
 349  * output operand can have unspecified shape and deduced dynamically during the execution. You must
 350  * provide \p buffer large enough.
 351  *
 352  * @param[in]   session Session from inference output is to be extracted
 353  * @param[in]   index   Index of output to be set (0-indexed)
 354  * @param[in]   type    Type of the output
 355  * @param[out]  buffer  Raw buffer for output
 356  * @param[in]   length  Size of bytes of output buffer
 357  *
 358  * @return      @c NNFW_STATUS_NO_ERROR if successful
 359  */
 360 NNFW_STATUS nnfw_set_output(nnfw_session *session, uint32_t index, NNFW_TYPE type, void *buffer,
 361                             size_t length);
 362
 363 /**
 364  * @brief       Get the number of inputs
 365  *
 366  * Application can call this function to get number of inputs defined in loaded model.
 367  * This function should be called after {@link nnfw_load_model_from_file} is invoked to load model
 368  *
 369  * @param[in]   session Session from input information is to be extracted
 370  * @param[out]  number  Variable which the number of inputs is put into
 371  *
 372  * @return      @c NNFW_STATUS_NO_ERROR if successful
 373  */
 374 NNFW_STATUS nnfw_input_size(nnfw_session *session, uint32_t *number);
 375
 376 /**
 377  * @brief       Get the number of outputs
 378  *
 379  * Application can call this function to get number of outputs defined in loaded model.
 380  * This function should be called after {@link nnfw_load_model_from_file} is invoked to load model
 381  *
 382  * @param[in]   session Session from output information is to be extracted
 383  * @param[out]  number  Variable which the number of outputs is put into
 384  *
 385  * @return      @c NNFW_STATUS_NO_ERROR if successful
 386  */
 387 NNFW_STATUS nnfw_output_size(nnfw_session *session, uint32_t *number);
 388
 389 /**
 390  * @brief Set the layout of an input
 391  *
 392  * The input that does not call this has NNFW_LAYOUT_NHWC layout
 393  *
 394  * @param[in] session session from inference input is to be extracted
 395  * @param[in] index   index of input to be set (0-indexed)
 396  * @param[in] layout  layout to set to target input
 397  *
 398  * @return NNFW_STATUS_NO_ERROR if successful
 399  */
 400 NNFW_STATUS nnfw_set_input_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout);
 401
 402 /**
 403  * @brief Set the layout of an output
 404  *
 405  * The output that does not call this has NNFW_LAYOUT_NHWC layout
 406  *
 407  * @param[in] session session from inference output is to be extracted
 408  * @param[in] index   index of output to be set (0-indexed)
 409  * @param[in] layout  layout to set to target output
 410  *
 411  * @return NNFW_STATUS_NO_ERROR if successful
 412  */
 413 NNFW_STATUS nnfw_set_output_layout(nnfw_session *session, uint32_t index, NNFW_LAYOUT layout);
 414
 415 /**
 416  * @brief       Get i-th input tensor info
 417  *
 418  * <p>Before {@link nnfw_prepare} is invoked, this function return tensor info in model,
 419  * so updated tensor info by {@link nnfw_apply_tensorinfo} is not returned.</p>
 420  *
 421  * <p>After {@link nnfw_prepare} is invoked, this function return updated tensor info
 422  * if tensor info is updated by {@link nnfw_apply_tensorinfo}.</p>
 423  *
 424  * @param[in]   session     Session from input information is to be extracted
 425  * @param[in]   index       Index of input
 426  * @param[out]  tensor_info Tensor info (shape, type, etc)
 427  *
 428  * @return      @c NNFW_STATUS_NO_ERROR if successful
 429  */
 430 NNFW_STATUS nnfw_input_tensorinfo(nnfw_session *session, uint32_t index,
 431                                   nnfw_tensorinfo *tensor_info);
 432
 433 /**
 434  * @brief     Get i-th output tensor info
 435  *
 436  * <p>After {@link nnfw_load_model_from_file} and before {@link nnfw_prepare} is invoked, it returns
 437  * tensor info in the model.</p>
 438  *
 439  * <p>After {@link nnfw_prepare} and before {@link nnfw_run} is invoked, this function returns
 440  * updated tensor info if tensor info is updated by {@link nnfw_set_input_tensorinfo}.</p>
 441  *
 442  * <p>After {@link nnfw_run} is invoked(at least once), it returns the updated tensor info during
 443  * the latest execution.</p>
 444  *
 445  * @param[in]   session     Session from output information is to be extracted
 446  * @param[in]   index       Index of output
 447  * @param[out]  tensor_info Tensor info (shape, type, etc)
 448  *
 449  * @return      @c NNFW_STATUS_NO_ERROR if successful
 450  */
 451 NNFW_STATUS nnfw_output_tensorinfo(nnfw_session *session, uint32_t index,
 452                                    nnfw_tensorinfo *tensor_info);
 453
 454 /**
 455  * @brief     Set available backends
 456  *
 457  * This function should be called before {@link nnfw_prepare} is invoked.
 458  *
 459  * <p>Supported backends differs on each platforms.
 460  * For example, `x86_64` supports "cpu" only.
 461  * Multiple backends can be set and they must be separated by a semicolon (ex: "acl_cl;cpu").
 462  * For each backend string, `libbackend_{backend}.so` will be dynamically loaded during
 463  * {@link nnfw_prepare}.
 464  * Among the multiple backends, the 1st element is used as the default backend.</p>
 465  *
 466  * @param[in] session session to which avilable backends are set
 467  * @param[in] backends available backends on which nnfw uses
 468  *
 469  * @return @c NNFW_STATUS_NO_ERROR if successful
 470  */
 471 NNFW_STATUS nnfw_set_available_backends(nnfw_session *session, const char *backends);
 472
 473 /**
 474  * @brief     Set the operation's backend
 475  *
 476  * This function should be called before {@link nnfw_prepare} is invoked.
 477  *
 478  * <p>The backend for op has higher priority than available backends specified by
 479  * {@link nnfw_set_available_backends}.</p>
 480  *
 481  * @deprecated Deprecated since 1.8.0.
 482  *
 483  * @param[in] session session to be modified
 484  * @param[in] op operation to be set
 485  * @param[in] backend bakcend on which operation run
 486  *
 487  * @return @c NNFW_STATUS_NO_ERROR if successful
 488  */
 489 NNFW_STATUS nnfw_set_op_backend(nnfw_session *session, const char *op, const char *backend);
 490
 491 /**
 492  * @brief     Retrieve uint32 type of nnfw information for given information ID.
 493  *
 494  * <p>Retrieves the information of property given by information id </p>
 495  *
 496  * @note: The input session could be null for global information (e.g. runtime version).*
 497  *
 498  * @param[in] session session to be queried on.
 499  * @param[in] information ID to be queried
 500  * @param[out] val uint32 value to be returned.
 501  *
 502  * @return @c NNFW_STATUS_NO_ERROR if successful
 503  */
 504 NNFW_STATUS nnfw_query_info_u32(nnfw_session *session, NNFW_INFO_ID id, uint32_t *val);
 505
 506 #ifdef __cplusplus
 507 }
 508 #endif
 509
 510 #endif