* @param[in] modelid The model to be inferred.
* @param[in] input The input data to be inferred.
* @param[out] output The output result. The caller MUST allocate appropriately before calling this.
- * @return @c 0 if no error. otherwise a negative error value
+ * @return @c 0 or positive id if no error. otherwise a negative error value
*
* @detail This is a syntactic sugar of runNPU_async().
* CAUTION: There is a memcpy for the output buffer.
* @param[out] sequence The sequence number returned with runNPU_async.
* @param[in] data The data given as a parameter to the runNPU_async call.
* @param[in] mode Configures how this operation works.
- * @return @c 0 if no error. otherwise a negative error value
+ * @return @c 0 or positive id if no error. otherwise a negative error value
*/
int runNPU_async(npudev_h dev, uint32_t modelid, const input_buffers *input,
npuOutputNotify cb, uint64_t *sequence, void *data,
*/
int cleanNPU_genericBuffers (npudev_h dev, generic_buffers * buffers);
+/** NPU Profiling */
+
+#define NPU_OPNAME_MAX (32)
+
+typedef struct {
+ char name[NPU_OPNAME_MAX];
+
+ uint32_t latency_ms;
+ uint64_t latency_cycles;
+
+ uint64_t mem_read_bytes;
+ uint64_t mem_write_bytes;
+ /* TBD */
+} npu_profile_op;
+
+typedef struct {
+ uint32_t num_ops;
+ npu_profile_op *profile;
+} npu_profile;
+
+/**
+ * @brief Get the profile information from NPU
+ * @param[in] dev NPU device handle
+ * @param[in] run_id Identifier for each inference (obtained by runNPU_*)
+ * @param[out] profile Profile instance
+ * @return 0 if no error, otherwise a negative errno.
+ */
+int getNPU_profile (npudev_h dev, int run_id, npu_profile *profile);
+
+/**
+ * @brief Free the profile instance obtained by getNPU_profile().
+ * @param[in] profile Profile instance
+ */
+void putNPU_profile (npu_profile *profile);
+
#if defined(__cplusplus)
}
#endif
}
/**
+ * @brief Get the profile information from NPU
+ * @param[in] dev The NPU device handle
+ * @param[in] run_id The identifier for each inference
+ * @param[out] profile The profile instance
+ * @return 0 if no error, otherwise a negative errno.
+ */
+int getNPU_profile (npudev_h dev, int run_id, npu_profile *profile)
+{
+ INIT_HOST_HANDLER (host_handler, dev);
+
+ return host_handler->getProfile (run_id, profile);
+}
+
+/**
+ * @brief Free the profile instance obtained by getNPU_profile().
+ * @param[in] profile The profile instance
+ */
+void putNPU_profile (npu_profile *profile)
+{
+ if (profile != nullptr)
+ free (profile);
+}
+
+/**
* @brief Send the NN model to NPU.
* @param[in] dev The NPU device handle
* @param[in] modelfile The filepath to the compiled NPU NN model in any buffer_type
}
/**
+ * @brief Get the profile information from NPU
+ * @param[in] run_id The identifier for each inference
+ * @param[out] profile The profile instance
+ * @return 0 if no error, otherwise a negative errno.
+ */
+int
+HostHandler::getProfile (int run_id, npu_profile *profile)
+{
+ if (run_id < 0 || profile == nullptr) {
+ logerr (TAG, "Invalid parameter provided\n");
+ return -EINVAL;
+ }
+
+ const DriverAPI * api = device_->getDriverAPI ();
+ assert (api != nullptr);
+
+ void *profile_buffer;
+ int status = api->getProfile (run_id, &profile_buffer);
+ if (status != 0) {
+ // TODO: Perform parsing
+ }
+
+ return status;
+}
+
+/**
* @brief Set the data layout for input/output tensors
* @param[in] modelid The ID of model whose layouts are set
* @param[in] in the layout/type info for input tensors
int unregisterModel (uint32_t modelid);
int unregisterModels ();
+ int getProfile (int run_id, npu_profile *profile);
+
int setDataInfo (uint32_t modelid, tensors_data_info *in, tensors_data_info *out);
int setConstraint (uint32_t modelid, npuConstraint constraint);
void *addr, size_t size) const { return -EPERM; }
#endif
+ virtual int getProfile (int run_id, void **profile) const { return -EPERM; }
+
protected:
int dev_id_; /**< device id. assume that 0 <= id < getNUmDevices() */
int dev_fd_; /**< devide fd. opened in constructor and closed in destructor */
void *addr, size_t size) const;
#endif
+ int getProfile (int run_id, void **profile) const;
+
private:
int getDrvVersion () const;
static const std::string dev_node_base;
int registerModel (model_config_t *model) const;
int deregisterModel (unsigned long long id) const;
+ int getProfile (int run_id, void **profile) const;
+
private:
static std::atomic<int> global_fd_;
/**< global api fd */
return 0;
}
+
+int
+TrinityEmulAPI::getProfile (int run_id, void **profile) const
+{
+ // TODO: allocate the buffer and call APIs from simulator
+ return 0;
+}
return 0;
}
#endif
+
+int
+TrinityVision2API::getProfile (int run_id, void **profile) const
+{
+ // TODO: allocate the buffer and call ioctl() to the kernel driver
+ return 0;
+}